In [2]:
import gymnasium as gym
from gymnasium.envs.toy_text.frozen_lake import generate_random_map
import matplotlib.pyplot as plt
import pandas as pd
import random

# Initialize FrozenLake environment
map_size = 10
map_type = generate_random_map
# Make maze

env = gym.make('FrozenLake-v1', desc=map_type(size=map_size), is_slippery=True)

# Initialize Q-table (100 states for a 4x4 grid)
qtable = {"Up": [0] * 100, "Down": [0] * 100, "Left": [0] * 100, "Right": [0] * 100}

# Hyperparameters
alpha = 0.9  # Increased Learning Rate
gamma = 0.9  # Discount Factor
epsilon = 0.6  # More Exploration
episodes = 50000  # Training Episodes

# Action mapping
actions = {0: "Left", 1: "Down", 2: "Right", 3: "Up"}

# Training loop
for episode in range(episodes):
    state, _ = env.reset()
    done = False

    while not done:
        # Epsilon-greedy strategy
        if random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()  # Explore (random action)
        else:
            # Choose the action with the highest Q-value, breaking ties randomly
            max_q = max(qtable[a][state] for a in qtable)
            best_actions = [a for a in qtable if abs(qtable[a][state] - max_q) < 1e-6]
            action_name = random.choice(best_actions)  # Random tie-breaking
            action = list(actions.keys())[list(actions.values()).index(action_name)]  # Convert to action index

        # Take action and observe results
        next_state, reward, done, truncated, _ = env.step(action)

        # Small penalty for movement to prevent getting stuck in loops
        if reward == 0 and not done:
            reward = -0.01  # Slight penalty for taking unnecessary steps

        # Q-learning update
        action_name = actions[action]
        old_value = qtable[action_name][state]
        next_max = max(qtable[a][next_state] for a in qtable) if next_state < 100 else 0

        qtable[action_name][state] = old_value + alpha * (reward + gamma * next_max - old_value)

        state = next_state

env.close()

# Convert Q-table to DataFrame and Print
df = pd.DataFrame(qtable)
print("\n===== Final Q-Table =====\n")
print(df)



===== Final Q-Table =====

          Up      Down      Left     Right
0  -0.039192 -0.027097 -0.036999 -0.027689
1  -0.002104 -0.022472 -0.025894 -0.019738
2   0.000000  0.000000  0.000000  0.000000
3  -0.009819 -0.000191 -0.006346 -0.009994
4   0.000000  0.000000  0.000000  0.000000
..       ...       ...       ...       ...
95  0.339831  0.050283  0.348401  0.434945
96  0.586170  0.431321  0.393032  0.408765
97  0.533845  0.468668  0.530606  0.614709
98  0.562727  0.987776  0.633597  0.722028
99  0.000000  0.000000  0.000000  0.000000

[100 rows x 4 columns]
