In [4]:
import gymnasium as gym
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense
from keras.optimizers import Adam
import cv2
import random
from collections import deque
import time
import os
import ale_py

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [5]:
env = gym.make('ALE/Frogger-v5')
frame, info = env.reset()

In [6]:
# Building the CNN model for Q-learning
def build_model(action_size):
    """Build a CNN model for Deep Q-Learning"""
    model = Sequential()
    # Input shape: grayscale image of 210x160 (210, 160, 1)
    model.add(Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=(210, 160, 1)))
    model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
    model.add(Conv2D(64, (3, 3), strides=(1, 1), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(action_size, activation='linear'))  # Output layer with one node per action
    
    model.compile(loss='mse', optimizer=Adam(learning_rate=0.00025))
    return model

In [None]:
# Preprocessing function
def preprocess_frame(frame):
    """Convert RGB frame to grayscale and keep original dimensions (210x160)"""
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    # Keep original dimensions
    normalized_frame = gray_frame / 255.0  # Normalize pixel values
    return normalized_frame


# Experience replay memory
def create_memory(capacity=100000):
    """Create a memory buffer for experience replay"""
    return deque(maxlen=capacity)

def add_to_memory(memory, state, action, reward, next_state, done):
    """Add experience to memory"""
    memory.append((state, action, reward, next_state, done))

def sample_from_memory(memory, batch_size):
    """Sample random batch from memory"""
    return random.sample(memory, batch_size)

# Epsilon-greedy policy
def epsilon_greedy_action(model, state, epsilon, action_size):
    """Choose action using epsilon-greedy policy"""
    if np.random.random() <= epsilon:
        return random.randrange(action_size)  # Explore: choose random action
    else:
        # Exploit: choose best action
        q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)[0]
        return np.argmax(q_values)  # Choose action with highest Q-value

# Training function
def train_dqn(episodes=10000, 
              max_steps=50000, 
              batch_size=32, 
              gamma=0.99, 
              epsilon_start=1.0, 
              epsilon_end=0.1, 
              epsilon_decay=0.995,
              update_target_freq=10000,
              memory_capacity=100000,
              save_freq=100):
    """Train a DQN model on Frogger"""
    # Create environment
    env = gym.make('ALE/Frogger-v5')
    action_size = env.action_space.n
    
    # Create main and target models
    main_model = build_model(action_size)
    target_model = build_model(action_size)
    target_model.set_weights(main_model.get_weights())  # Initialize target with same weights
    
    # Create memory for experience replay
    memory = create_memory(capacity=memory_capacity)
    
    # Training metrics
    total_steps = 0
    epsilon = epsilon_start
    
    # Model saving directory
    save_dir = "frogger_model"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    # Training loop
    for episode in range(1, episodes + 1):
        # Reset environment and get initial state
        frame, info = env.reset()
        state = preprocess_frame(frame)
        state = np.expand_dims(state, axis=-1)  # Add channel dimension: (120, 160, 1)
        
        episode_reward = 0
        
        for step in range(max_steps):
            # Choose action
            action = epsilon_greedy_action(main_model, state, epsilon, action_size)
            
            # Take action
            next_frame, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated
            
            # Prepare next state
            next_state = preprocess_frame(next_frame)
            next_state = np.expand_dims(next_state, axis=-1)  # Add channel dimension
            
            # Store experience in memory
            add_to_memory(memory, state, action, reward, next_state, done)
            
            # Move to next state
            state = next_state
            episode_reward += reward
            total_steps += 1
            
            # Train with experience replay if memory has enough samples
            if len(memory) > batch_size:
                # Sample batch from memory
                minibatch = sample_from_memory(memory, batch_size)
                
                # Prepare batch for training
                states = np.array([experience[0] for experience in minibatch])
                actions = np.array([experience[1] for experience in minibatch])
                rewards = np.array([experience[2] for experience in minibatch])
                next_states = np.array([experience[3] for experience in minibatch])
                dones = np.array([experience[4] for experience in minibatch])
                
                # Calculate target Q values
                target_q_values = main_model.predict(states, verbose=0)
                next_q_values = target_model.predict(next_states, verbose=0)
                
                for i in range(batch_size):
                    if dones[i]:
                        target_q_values[i, actions[i]] = rewards[i]
                    else:
                        target_q_values[i, actions[i]] = rewards[i] + gamma * np.max(next_q_values[i])
                
                # Train the model
                main_model.fit(states, target_q_values, epochs=1, verbose=0)
            
            # Update target network periodically
            if total_steps % update_target_freq == 0:
                target_model.set_weights(main_model.get_weights())
                print(f"Target network updated at step {total_steps}")
            
            if done:
                break
        
        # Decay epsilon
        if epsilon > epsilon_end:
            epsilon *= epsilon_decay
        
        # Print episode stats
        print(f"Episode: {episode}, Reward: {episode_reward}, Epsilon: {epsilon:.4f}, Steps: {step+1}")
        
        # Save model periodically
        if episode % save_freq == 0:
            main_model.save(f"{save_dir}/frogger_dqn_episode_{episode}.h5")
            print(f"Model saved at episode {episode}")
    
    # Save final model
    main_model.save(f"{save_dir}/frogger_dqn_final.h5")
    print("Training completed!")
    
    env.close()
    return main_model

# Main execution
if __name__ == "__main__":
    # You can adjust these parameters as needed
    train_dqn(episodes=1000,
              max_steps=10000, 
              batch_size=8, 
              gamma=0.99, 
              epsilon_start=1.0, 
              epsilon_end=0.01, 
              epsilon_decay=0.995,
              update_target_freq=1000,
              memory_capacity=50000,
              save_freq=50)

Episode: 1, Reward: 8.0, Epsilon: 0.9950, Steps: 328
Episode: 2, Reward: 11.0, Epsilon: 0.9900, Steps: 303
Episode: 3, Reward: 9.0, Epsilon: 0.9851, Steps: 345
Target network updated at step 1000
Episode: 4, Reward: 8.0, Epsilon: 0.9801, Steps: 209
Episode: 5, Reward: 9.0, Epsilon: 0.9752, Steps: 229
Episode: 6, Reward: 10.0, Epsilon: 0.9704, Steps: 279
Episode: 7, Reward: 6.0, Epsilon: 0.9655, Steps: 297
Target network updated at step 2000
Episode: 8, Reward: 8.0, Epsilon: 0.9607, Steps: 229
Episode: 9, Reward: 10.0, Epsilon: 0.9559, Steps: 302
Episode: 10, Reward: 10.0, Epsilon: 0.9511, Steps: 283
Target network updated at step 3000
Episode: 11, Reward: 10.0, Epsilon: 0.9464, Steps: 251
Episode: 12, Reward: 9.0, Epsilon: 0.9416, Steps: 226
Episode: 13, Reward: 10.0, Epsilon: 0.9369, Steps: 299
Episode: 14, Reward: 10.0, Epsilon: 0.9322, Steps: 273
Target network updated at step 4000
Episode: 15, Reward: 11.0, Epsilon: 0.9276, Steps: 247
Episode: 16, Reward: 9.0, Epsilon: 0.9229, Step

Episode: 126, Reward: 18.0, Epsilon: 0.5318, Steps: 390
Episode: 127, Reward: 13.0, Epsilon: 0.5291, Steps: 271
Episode: 128, Reward: 11.0, Epsilon: 0.5264, Steps: 388
Target network updated at step 36000
Episode: 129, Reward: 18.0, Epsilon: 0.5238, Steps: 330
Episode: 130, Reward: 11.0, Epsilon: 0.5212, Steps: 286
Episode: 131, Reward: 11.0, Epsilon: 0.5186, Steps: 270
Target network updated at step 37000
Episode: 132, Reward: 14.0, Epsilon: 0.5160, Steps: 252
Episode: 133, Reward: 17.0, Epsilon: 0.5134, Steps: 280
Episode: 134, Reward: 19.0, Epsilon: 0.5108, Steps: 342
Target network updated at step 38000
Episode: 135, Reward: 18.0, Epsilon: 0.5083, Steps: 411
Episode: 136, Reward: 20.0, Epsilon: 0.5058, Steps: 441
Episode: 137, Reward: 9.0, Epsilon: 0.5032, Steps: 388
Target network updated at step 39000
Episode: 138, Reward: 21.0, Epsilon: 0.5007, Steps: 305
Episode: 139, Reward: 18.0, Epsilon: 0.4982, Steps: 445
Target network updated at step 40000
Episode: 140, Reward: 16.0, Epsi

In [7]:
# Preprocessing function
def preprocess_frame(frame):
    """Convert RGB frame to grayscale and keep original dimensions (210x160)"""
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    # Keep original dimensions
    normalized_frame = gray_frame / 255.0  # Normalize pixel values
    return normalized_frame

In [8]:
def build_model(action_size):
    """Build a CNN model for Deep Q-Learning"""
    model = Sequential()
    # Input shape: grayscale image of 210x160 (210, 160, 1)
    model.add(Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=(210, 160, 1)))
    model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
    model.add(Conv2D(64, (3, 3), strides=(1, 1), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(action_size, activation='linear'))  # Output layer with one node per action
    
    model.compile(loss='mse', optimizer=Adam(learning_rate=0.00025))
    return model

In [11]:
def run_trained_model(env_name, model_path, episodes=5):
    # Initialize the environment
    env = gym.make(env_name, render_mode='human' )
    model = build_model(env.action_space.n)
    model.load_weights(model_path)

    for episode in range(episodes):
        frame, info = env.reset()
        state = preprocess_frame(frame)
        state = np.expand_dims(state, axis=-1)  # Add channel dimension

        episode_reward = 0
        done = False

        while not done:
            env.render()  # Render the environment to see the agent in action
            time.sleep(0.01)  # Slow down simulation for better visualization

            # Predict action from the model
            q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)
            action = np.argmax(q_values[0])  # Choose action with highest predicted Q-value

            # Take the action
            next_frame, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated

            # Prepare next state
            next_state = preprocess_frame(next_frame)
            next_state = np.expand_dims(next_state, axis=-1)

            # Update the current state
            state = next_state
            episode_reward += reward

        print(f"Episode {episode + 1} - Total Reward: {episode_reward}")

    env.close()

# Assuming you have saved your trained model in 'frogger_model/frogger_dqn_final.h5'
run_trained_model(env_name='ALE/Frogger-v5', model_path='frogger_dqn_final.h5')

FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = 'frogger_dqn_final.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [10]:
# Load trained model
model_path = 'frogger_dqn_episode_200.h5'
model = build_model(action_size=5) 
model.load_weights(model_path)

# Run the game with trained model
def play_frogger():
    env = gym.make('ALE/Frogger-v5', render_mode='human')
    state, _ = env.reset()
    state = preprocess_frame(state)
    state = np.expand_dims(state, axis=-1)  # Add channel dimension
    state = np.expand_dims(state, axis=0)   # Add batch dimension
    
    total_reward = 0
    while True:
        # Get Q-values and choose best action
        q_values = model.predict(state, verbose=0)
        action = np.argmax(q_values[0])
        
        # Take action
        next_state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated
        
        # Prepare next state
        next_state = preprocess_frame(next_state)
        next_state = np.expand_dims(next_state, axis=-1)
        next_state = np.expand_dims(next_state, axis=0)
        
        state = next_state
        total_reward += reward
        
        if done:
            print(f"Game Over! Total reward: {total_reward}")
            break
            
    env.close()

# Run the game
if __name__ == "__main__":
    play_frogger()

FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = 'frogger_dqn_episode_200.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)