In [None]:
import gymnasium as gym
import os
import ale_py
import numpy as np
import random
import matplotlib.pyplot as plt

In [2]:
env = gym.make("ALE/Frogger-v5", render_mode="human")
observation, info=env.reset()

In [3]:
# Define Q-learning parameters
alpha = 0.7  # Learning rate
gamma = 0.75 # Discount factor
epsilon = 1.0  # Initial exploration rate
epsilon_min = 0.01  # Minimum exploration rate
epsilon_decay = 0.99  # Decay factor per episode
num_episodes = 1000  

# Initialize Q-table
state_size = 255  # Approximate number of states
action_size = env.action_space.n  # Number of actions
Q_table = np.zeros((state_size, action_size))

def get_state(observation):
    """Convert observation into a discrete state."""
    return hash(str(observation)) % state_size  # Simple hashing approach

rewards_per_episode = []  # Track rewards

# Training loop
for episode in range(num_episodes):
    observation, info = env.reset()
    state = get_state(observation)
    done = False
    total_reward = 0

    while not done:
        # Epsilon-greedy action selection with decay
        if random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()  # Explore
        else:
            action = np.argmax(Q_table[state])  # Exploit

        new_observation, reward, done, _, _ = env.step(action)
        new_state = get_state(new_observation)
        total_reward += reward

        # Q-learning update
        Q_table[state, action] += alpha * (
            reward + gamma * np.max(Q_table[new_state]) - Q_table[state, action]
        )

        state = new_state  # Move to next state

    rewards_per_episode.append(total_reward)  # Track progress

    # Apply epsilon decay
    epsilon = max(epsilon_min, epsilon * epsilon_decay)

env.close()

# Plot learning progress
plt.plot(rewards_per_episode)
plt.xlabel("Episodes")
plt.ylabel("Total Reward")
plt.title("Frogger Q-learning Progress with Epsilon Decay")
plt.show()

KeyboardInterrupt: 