# Examen Corto 1
## Task Frozen Lake
### Integrantes:
- Diego Leiva       21752
- Maria Ramirez     21342
- Gustavo Gonzalez  21438

**Librerias Necesarias**

In [1]:
import gymnasium as gym
from gymnasium.envs.toy_text.frozen_lake import generate_random_map
import numpy as np
import warnings

warnings.simplefilter('ignore')

**Crear el entorno Frozen Lake**

Estados:
- S: starting point, seguro
- F: frozen surface, seguro
- H: hole, atrapado
- G: goal, seguro

Acciones:
- 0: Move left
- 1: Move down
- 2: Move right
- 3: Move up

Recompensas:
- Reach goal: +1
- Reach hole: 0
- Reach frozen: 0

Probabilidades:
- P(intended) = 1/3
- P(perpendicular direction) = 1/3
- P(perpendicular direction) = 1/3

In [2]:
np.random.seed(42)

# Crear un nuevo entorno
env = gym.make("FrozenLake-v1", 
               desc = generate_random_map(size=4),  # Generar un mapa aleatorio
               map_name = "4x4",                    # Mapa 4x4
               is_slippery = True,                  # Entorno resbaladizo
               render_mode="ansi")                  # Modo de renderizado
env.reset()

# Visualizar el entorno
print(env.render())


[41mS[0mFFF
HFFF
FFFF
FFFG



**Q-Learning**

In [3]:
qtable = np.zeros((env.observation_space.n, env.action_space.n))

# Hiperparametros:
episodes = 100000
alpha = 0.9                 # Tasa de aprendizaje
gamma = 0.9                 # Tasa de descuento
epsilon = 1.0               # Aletoriedad en la seleccion de acciones
epsilon_decay = 0.0001      # Tasa de decaiminto de epsilon

# Mostrar la tabla
print('Q-table antes del entrenamiento: ')
print(qtable)

Q-table antes del entrenamiento: 
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [4]:
# Entrenamiento
for i in range(episodes):
    state = env.reset()[0]
    terminated = False
    truncated = False

    # Mientras que el agente no caiga en un hoyo o alcanze la meta, continuar entrenando
    while not truncated and not terminated:
        # Generar un numero aleatorio entre 0 y 1
        rnd = np.random.random()

        # Si el aleatorio es menor a epsilon, tomar una accion aleatoria
        if rnd < epsilon:
            action = env.action_space.sample()
        # de lo contrario, tomar la accion con mayor valor
        else:
            action = np.argmax(qtable[state])

        # Ejecutar la accion y mover el agente
        new_state, reward, terminated, truncated, info = env.step(action)

        # Actualizar Q(s,a)
        qtable[state, action] = qtable[state, action] + alpha * (
            reward + gamma * np.max(qtable[new_state]) - qtable[state, action]
            )
        
        # Actualizar el estado actual
        state = new_state

    # Actualizar el valor de epsilon
    epsilon = max(epsilon - epsilon_decay, 0)

    if epsilon == 0:
        alpha = 0.0001
            
# Visualizar la Q-table actualizada
print('Q-table despues del entrenamiento:')
print(f"{qtable}\n")

Q-table despues del entrenamiento:
[[0.0085728  0.00422405 0.02199462 0.15152207]
 [0.06427551 0.06476317 0.19685767 0.0685083 ]
 [0.09113729 0.22488153 0.09176423 0.08753788]
 [0.09896328 0.26227364 0.09698403 0.09116873]
 [0.         0.         0.         0.        ]
 [0.01536578 0.02036773 0.23067482 0.0036164 ]
 [0.09172661 0.27659024 0.08863807 0.09295948]
 [0.10320385 0.35207497 0.11028755 0.11190699]
 [0.02552849 0.26190339 0.01244113 0.01925618]
 [0.09689198 0.10268948 0.29347005 0.10171718]
 [0.11045631 0.11984497 0.12216131 0.33477529]
 [0.53899967 0.16971948 0.1586009  0.16724307]
 [0.10469197 0.10316571 0.29174127 0.10107233]
 [0.10640664 0.40436963 0.10594869 0.10524147]
 [0.16787045 0.65371116 0.17307192 0.16152586]
 [0.         0.         0.         0.        ]]



**Encontrar la secuencia**

In [5]:
success = 0
# Evaluar la tasa de exito
for i in range(episodes):
    state = env.reset()[0]
    terminated = False
    truncated = False
    done = truncated and terminated

    while not done:
        action = np.argmax(qtable[state])

        new_state, reward, truncated, terminated, info = env.step(action)
        done = truncated and terminated

        state = new_state
        success += reward

# Obtener la tasa de exito
print(f"Tasa de exito: {round(success/episodes*100,2)}%")

Tasa de exito: 100.0%


In [6]:
state = env.reset()[0]
terminated = False
truncated = False

sequence = []
print(env.render())

while not terminated and not truncated:
    if np.max(qtable[state]) > 0:
      action = np.argmax(qtable[state])
    else:
      action = env.action_space.sample()
    
    # Add the action to the sequence
    sequence.append(action)

    # Implement this action and move the agent in the desired direction
    new_state, reward, terminated, truncated, info = env.step(action)

    # Update our current state
    state = new_state

    print(env.render())

print(f"Secuencia = {sequence}")


[41mS[0mFFF
HFFF
FFFF
FFFG

  (Up)
[41mS[0mFFF
HFFF
FFFF
FFFG

  (Up)
S[41mF[0mFF
HFFF
FFFF
FFFG

  (Right)
S[41mF[0mFF
HFFF
FFFF
FFFG

  (Right)
SFFF
H[41mF[0mFF
FFFF
FFFG

  (Right)
SFFF
HF[41mF[0mF
FFFF
FFFG

  (Down)
SFFF
H[41mF[0mFF
FFFF
FFFG

  (Right)
S[41mF[0mFF
HFFF
FFFF
FFFG

  (Right)
SF[41mF[0mF
HFFF
FFFF
FFFG

  (Down)
SFF[41mF[0m
HFFF
FFFF
FFFG

  (Down)
SF[41mF[0mF
HFFF
FFFF
FFFG

  (Down)
SFFF
HF[41mF[0mF
FFFF
FFFG

  (Down)
SFFF
HFFF
FF[41mF[0mF
FFFG

  (Up)
SFFF
HFFF
F[41mF[0mFF
FFFG

  (Right)
SFFF
HFFF
FF[41mF[0mF
FFFG

  (Up)
SFFF
HFFF
FFF[41mF[0m
FFFG

  (Left)
SFFF
HFFF
FFFF
FFF[41mG[0m

Secuencia = [3, 3, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 3, 2, 3, 0]
