# Q-Learning

### Libraries

In [220]:
import gymnasium as gym
import numpy as np
import pygame as pg
import matplotlib.pyplot as plt
import imageio
from collections import defaultdict

### Pick parameters

In [221]:
class QLearningConfig:
    def __init__(self, env_name):
        # Define the parameter table
        self.configs = {
            "FrozenLake-v1 (4x4)": {
                "n_train_episodes": 10000,
                "lr": 0.7,
                "n_eval_episodes": 100,
                "max_steps": 100,
                "gamma": 0.95,
                "min_epsilon": 0.05,
                "max_epsilon": 1.0,
                "decay": 0.0005
            },
            "FrozenLake-v1 (4x4 - slippery)": {
                "n_train_episodes": 10000,
                "lr": 0.1,
                "n_eval_episodes": 100,
                "max_steps": 100,
                "gamma": 0.99,
                "min_epsilon": 0.05,
                "max_epsilon": 1.0,
                "decay": 0.0005
            },
            "FrozenLake-v1 (8x8)": {
                "n_train_episodes": 250000,
                "lr": 0.8,
                "n_eval_episodes": 1000,
                "max_steps": 400,
                "gamma": 0.9,
                "min_epsilon": 0.001,
                "max_epsilon": 1.0,
                "decay": 0.00005
            },
            "FrozenLake-v1 (8x8, slippery)": {
                "n_train_episodes": 250000,
                "lr": 0.1,
                "n_eval_episodes": 1000,
                "max_steps": 400,
                "gamma": 0.99,
                "min_epsilon": 0.05,
                "max_epsilon": 1.0,
                "decay": 0.00005
            },
            "Taxi-v3": {
                "n_train_episodes": 10000,
                "lr": 0.7,
                "n_eval_episodes": 100,
                "max_steps": 100,
                "gamma": 0.95,
                "min_epsilon": 0.05,
                "max_epsilon": 1.0,
                "decay": 0.0005
            },
            "CliffWalking-v0": {
                "n_train_episodes": 10000,
                "lr": 0.7,
                "n_eval_episodes": 100,
                "max_steps": 100,
                "gamma": 0.95,
                "min_epsilon": 0.05,
                "max_epsilon": 1.0,
                "decay": 0.0005
            }
        }
        
        # Load the configuration for the selected environment
        if env_name in self.configs:
            self.params = self.configs[env_name]
        else:
            raise ValueError(f"Environment '{env_name}' not found in configurations.")
    
    def get_params(self):
        return self.params

In [222]:
'''
def list_all_envs():
    # Get all registered environments
    all_envs = gym.envs.registry.values()
    
    # Extract and print environment IDs
    env_ids = sorted(set(env_spec.id for env_spec in all_envs))
    print("Available Gymnasium Environments:")
    for env_id in env_ids:
        print(env_id)

# Call the function
list_all_envs()'
'''

'\ndef list_all_envs():\n    # Get all registered environments\n    all_envs = gym.envs.registry.values()\n\n    # Extract and print environment IDs\n    env_ids = sorted(set(env_spec.id for env_spec in all_envs))\n    print("Available Gymnasium Environments:")\n    for env_id in env_ids:\n        print(env_id)\n\n# Call the function\nlist_all_envs()\'\n'

### Selection of said parameters

In [230]:
env_param_pairs = [
    ("FrozenLake-v1", "FrozenLake-v1 (4x4)"),                   # 0
    ("FrozenLake-v1", "FrozenLake-v1 (4x4 - slippery)"),        # 1
    ("FrozenLake8x8-v1", "FrozenLake-v1 (8x8)"),                # 2
    ("FrozenLake8x8-v1", "FrozenLake-v1 (8x8, slippery)"),      # 3
    ("Taxi-v3", "Taxi-v3"),                                     # 4
    ("CliffWalking-v0", "CliffWalking-v0")                      # 5
]

################################
# Select the environment index #
################################
index = 5
env_name, env_name_long = env_param_pairs[index]

if "FrozenLake" in env_name:
    env = gym.make(env_name, render_mode="rgb_array", is_slippery=("slippery" in env_name_long))
else:
    env = gym.make(env_name, render_mode="rgb_array")

config = QLearningConfig(env_name_long)

params = config.get_params()

n_train_episodes = params["n_train_episodes"]
lr = params["lr"]
n_eval_episodes = params["n_eval_episodes"]
max_steps = params["max_steps"]
gamma = params["gamma"]
min_epsilon = params["min_epsilon"]
max_epsilon = params["max_epsilon"]
decay = params["decay"]

print(f"Selected Environment: {env_name_long}")
print(f"Parameters: {params}")
print(f"Slippery: {'slippery' in env_name_long if 'FrozenLake' in env_name else 'N/A'}")

Selected Environment: CliffWalking-v0
Parameters: {'n_train_episodes': 10000, 'lr': 0.7, 'n_eval_episodes': 100, 'max_steps': 100, 'gamma': 0.95, 'min_epsilon': 0.05, 'max_epsilon': 1.0, 'decay': 0.0005}
Slippery: N/A


### Traing agent

In [231]:
num_states = env.observation_space.n
num_actions = env.action_space.n
done = False
frames = []
Q_table = np.zeros((num_states, num_actions))

for ep in range(n_train_episodes):
    epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay * ep)
    obs, _ = env.reset()
    for _ in range(max_steps):
        if np.random.random() < epsilon:
            action = env.action_space.sample()
        else:
            action = int(np.argmax(Q_table[obs]))
        new_obs, reward, terminated, truncated, _ = env.step(action)
        Q_table[obs, action] += lr * (reward + gamma * (not truncated and not terminated) * np.max(Q_table[new_obs]) - Q_table[obs, action])
        if terminated or truncated:
            break
        obs = new_obs

### Export to gif

In [None]:
obs, _ = env.reset()

while True:
    action = np.argmax(Q_table[obs])
    new_obs, reward, terminated, truncated, _ = env.step(action)
    frames.append(env.render())
    if terminated or truncated:
        break
    obs = new_obs

env.close()

imageio.mimsave('results/' + env_name_long + '.gif', frames, fps=1)