In [1]:
pip install "numpy<2"

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install ale_py

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install "gymnasium[atari]"

Note: you may need to restart the kernel to use updated packages.


In [26]:
import gymnasium as gym
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense
from keras.optimizers import Adam
import cv2
import random
from collections import deque
import time
import os
import ale_py
from tensorflow.keras import layers, models
import time

In [5]:
def preprocess_frame(frame):
    # Convert to grayscale and resize to 120x160
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    resized_frame = cv2.resize(gray_frame, (160, 120))
    normalized_frame = resized_frame / 255.0  # Normalize to [0, 1]
    return normalized_frame

In [6]:
def build_model(action_size):
    """CNN architecture"""
    model = Sequential()
    model.add(Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=(210, 160, 1)))
    model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
    model.add(Conv2D(64, (3, 3), strides=(1, 1), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(action_size, activation='linear'))
    
    model.compile(loss='mse', optimizer=Adam(learning_rate=0.00025))
    return model

In [7]:
# System for progressively saving weights as the CNN trains
def create_memory(capacity=100000):
    """Create a memory buffer for experience replay"""
    return deque(maxlen=capacity)

def add_to_memory(memory, state, action, reward, next_state, done):
    """Add experience to memory"""
    memory.append((state, action, reward, next_state, done))

def sample_from_memory(memory, batch_size):
    """Sample random batch from memory"""
    return random.sample(memory, batch_size)

In [8]:
def epsilon_greedy_action(model, state, epsilon, action_size):
    if np.random.rand() <= epsilon:
        return np.random.randint(action_size)  # Random action (explore)
    q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)  # Predict Q-values
    return np.argmax(q_values[0])  # Action with highest Q-value (exploit)

In [15]:
# Define CNN architecture for action prediction
def build_cnn(action_size, input_shape=(120, 160, 1)):
    """
    Build a Convolutional Neural Network (CNN) for action prediction in an RL environment.
    """
    model = models.Sequential()

    # First convolutional layer
    model.add(layers.Conv2D(32, (8, 8), strides=4, activation='relu', input_shape=input_shape))
    
    # Second convolutional layer
    model.add(layers.Conv2D(64, (4, 4), strides=2, activation='relu'))
    
    # Third convolutional layer
    model.add(layers.Conv2D(64, (3, 3), strides=1, activation='relu'))
    
    # Flatten the output for the fully connected layers
    model.add(layers.Flatten())
    
    # Fully connected layer
    model.add(layers.Dense(512, activation='relu'))
    
    # Output layer with 'action_size' neurons (one for each possible action)
    model.add(layers.Dense(action_size, activation='softmax'))  # Softmax for action probability

    # Compile the model with a categorical crossentropy loss (because it's a classification task)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [28]:
def train_cnn(episodes=10000, 
              max_steps=50000, 
              batch_size=32, 
              epsilon_start=1.0, 
              epsilon_end=0.1, 
              epsilon_decay=0.995,
              memory_capacity=100000,
              save_freq=100):
    """Train a CNN model for action prediction on the Frogger environment"""
    # Create environment
    env = gym.make('ALE/Frogger-v5')
    action_size = env.action_space.n
    
    # Create CNN model
    model = build_cnn(action_size)
    
    # Model saving directory
    save_dir = "frogger_model"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Initialize memory
    memory = []
    
    # Training loop
    epsilon = epsilon_start
    for episode in range(1, episodes + 1):
        start_time = time.time()

        frame, info = env.reset()
        state = preprocess_frame(frame)
        state = np.expand_dims(state, axis=-1)

        episode_reward = 0
        steps_taken = 0
        
        for step in range(max_steps):
            action = epsilon_greedy_action(model, state, epsilon, action_size)
            next_frame, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated

            # Reward shaping for completion speed
            time_penalty = 0.01
            speed_bonus = 10 if done and reward > 0 else 0
            reward = reward - (step * time_penalty) + speed_bonus
            reward = np.clip(reward, -10, 10)

            next_state = preprocess_frame(next_frame)
            next_state = np.expand_dims(next_state, axis=-1)

            memory.append((state, action, reward, next_state, done))
            state = next_state
            episode_reward += reward
            steps_taken += 1
            
            # If memory has enough samples, train the model (optional)
            if len(memory) > batch_size:
                # Sample a minibatch from memory
                minibatch = random.sample(memory, batch_size)
                
                # Prepare training data from minibatch
                states = np.array([experience[0] for experience in minibatch])
                actions = np.array([experience[1] for experience in minibatch])
                rewards = np.array([experience[2] for experience in minibatch])
                next_states = np.array([experience[3] for experience in minibatch])
                dones = np.array([experience[4] for experience in minibatch])
                
                # One-hot encode the actions for categorical crossentropy
                actions_one_hot = tf.keras.utils.to_categorical(actions, num_classes=action_size)
                
                # Train the model using the states and actions
                model.fit(states, actions_one_hot, epochs=1, verbose=0)

            if done:
                break
        
        # Decay epsilon for exploration-exploitation balance
        if epsilon > epsilon_end:
            epsilon *= epsilon_decay
        
        # Print episode stats
        print(f"Episode: {episode}, Reward: {episode_reward}, Epsilon: {epsilon:.4f}, Steps: {step+1}")
        
        # Save the model periodically
        if episode % save_freq == 0:
            model.save(f"{save_dir}/frogger_cnn_episode_{episode}.h5")
            print(f"Model saved at episode {episode}")
    
    # Save the final model
    model.save(f"{save_dir}/frogger_cnn_final.h5")
    print("Training completed!")
    
    env.close()
    return model

In [22]:
trained_model = train_cnn(episodes=1000, max_steps=50000, batch_size=32)
#Pray it works

Episode: 1, Reward: 9.0, Epsilon: 0.9950, Steps: 313
Episode: 2, Reward: 9.0, Epsilon: 0.9900, Steps: 295
Episode: 3, Reward: 7.0, Epsilon: 0.9851, Steps: 288
Episode: 4, Reward: 13.0, Epsilon: 0.9801, Steps: 261
Episode: 5, Reward: 12.0, Epsilon: 0.9752, Steps: 248


KeyboardInterrupt: 