In [5]:
import gymnasium as gym
import pandas as pd

In [8]:
maze = [
    "FFFG",
    "SHFH",
    "FFFF"
]

env = gym.make('FrozenLake-v1', desc=maze, render_mode='human')
initial_state = env.reset()

num_states = env.observation_space.n
num_actions = env.action_space.n
q_table = [[0 for _ in range(num_actions)] for _ in range(num_states)]

alpha = 0.2
gamma = 0.9
epsilon = 1.0
epsilon_decay = 0.99
min_epsilon = 0.2

rewards = {
    'S': 0,
    'F': -1,
    'H': -10,
    'G': 10
}

In [None]:
for episode in range(1001):
    state = env.reset()[0]
    terminated = False

    while not terminated:
        if random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()
        else:
            action = max(range(num_actions), key=lambda a: q_table[state][a])

        new_state, reward, terminated, truncated, info = env.step(action)

        row = new_state // len(maze[0])
        col = new_state % len(maze[0])
        tile_type = maze[row][col]

        reward = rewards.get(tile_type, 0)

        if tile_type in ['H', 'G']:
            terminated = True

        old_value = q_table[state][action]
        next_max = max(q_table[new_state])
        new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
        q_table[state][action] = new_value

        state = new_state

    epsilon = max(min_epsilon, epsilon * epsilon_decay)

    if episode % 100 == 0:
        print(f"Episode: {episode}, Epsilon: {epsilon:.2f}")