In [None]:
import gym
import numpy as np
import random
import time

alpha = 0.8
gamma = 0.95
epsilon = 0.1
episodes = 2
max_steps = 100

env = gym.make("FrozenLake-v1", is_slippery=True)
q_table = np.zeros((env.observation_space.n, env.action_space.n))

for episode in range(episodes):
    state = env.reset()[0]
    done = False

    for _ in range(max_steps):
        if random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(q_table[state, :])

        next_state, reward, done, truncated, _ = env.step(action)

        old_value = q_table[state, action]
        next_max = np.max(q_table[next_state, :])
        new_value = old_value + alpha * (reward + gamma * next_max - old_value)
        q_table[state, action] = new_value

        state = next_state

        if done:
            break

print("Training finished!\n")

def play_game(render=True):
    state = env.reset()[0]
    done = False
    total_reward = 0

    for _ in range(max_steps):
        if render:
            env.render()
            time.sleep(0.5)

        action = np.argmax(q_table[state, :])
        state, reward, done, truncated, _ = env.step(action)
        total_reward += reward

        if done:
            if render:
                env.render()
            print("✅ Success!" if reward == 1 else "❌ Fell into a hole.")
            break

    print(f"Total reward: {total_reward}")

for i in range(3):
    print(f"\nEpisode {i + 1}")
    play_game(render=True)

env.close()
