In [1]:
# pip install gym

# pip install gymnasium
# pip install "gymnasium[classic-control]" # This part gives us the visual window to see the game

In [2]:
# CORRECTED: Use gymnasium instead of the old gym
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt

import tqdm                 # Progress bars


In [None]:
# CORRECTED: Use gymnasium instead of the old gym
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
# Import the progress bar library
from tqdm import tqdm

# Create the game environment.
env = gym.make('FrozenLake-v1', render_mode='human', is_slippery=False)
print("Observation Space:", env.observation_space)
print("Action Space:", env.action_space)

# Initialize the Q-table (cheat sheet) with zeros
q_table = np.zeros([env.observation_space.n, env.action_space.n])

# Hyperparameters
alpha = 0.8   # Learning rate
gamma = 0.95  # Discount factor
epsilon = 0.9 # Exploration rate
num_episodes = 1000  # How many games to play

# Main training loop
# CHANGE 1: Wrap range(num_episodes) with tqdm() to create a progress bar
for episode in tqdm(range(num_episodes), desc="Training", unit="episode"):
    # Reset the game for a new episode.
    state, info = env.reset()
    done = False

    while not done:
        # Choose action: Explore or Exploit?
        if np.random.random() < epsilon:
            action = env.action_space.sample() # Random action (explore)
        else:
            action = np.argmax(q_table[state]) # Best known action (exploit)

        # Take the action! See what happens.
        new_state, reward, done, truncated, info = env.step(action)

        # Update the Q-table (the cheat sheet)
        old_value = q_table[state, action]
        next_max = np.max(q_table[new_state])
        new_value = old_value + alpha * (reward + gamma * next_max - old_value)
        q_table[state, action] = new_value

        state = new_state

    # CHANGE 2: You can REMOVE or COMMENT OUT the old print statement.
    # The progress bar replaces this!
    # if episode % 1000 == 0:
    #    print(f"Episode: {episode}")

print("\nTraining finished.\nFinal Q-Table:")
print(q_table)

# Let's watch our smart agent play!
input("Press Enter to watch the trained agent...")
state, info = env.reset()
done = False
while not done:
    action = np.argmax(q_table[state])
    new_state, reward, done, truncated, info = env.step(action)
    env.render() # Show the game on your screen!
    state = new_state

env.close()

Observation Space: Discrete(16)
Action Space: Discrete(4)


Training: 100%|██████████| 1000/1000 [35:33<00:00,  2.13s/episode]



Training finished.
Final Q-Table:
[[0.73509189 0.77378094 0.77378094 0.73509189]
 [0.73509189 0.         0.81450625 0.77378094]
 [0.77378094 0.857375   0.77378094 0.81450625]
 [0.81450625 0.         0.77378094 0.77378094]
 [0.77378094 0.81450625 0.         0.73509189]
 [0.         0.         0.         0.        ]
 [0.         0.9025     0.         0.81450625]
 [0.         0.         0.         0.        ]
 [0.81450625 0.         0.857375   0.77378094]
 [0.81450625 0.9025     0.9025     0.        ]
 [0.857375   0.95       0.         0.857375  ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.9025     0.95       0.857375  ]
 [0.9025     0.95       1.         0.9025    ]
 [0.         0.         0.         0.        ]]


: 