In [1]:
pip install "numpy<2"

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install ale_py

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install "gymnasium[atari]"

Note: you may need to restart the kernel to use updated packages.


In [26]:
import gymnasium as gym
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense
from keras.optimizers import Adam
import cv2
import random
from collections import deque
import time
import os
import ale_py
from tensorflow.keras import layers, models
import time

In [32]:
def preprocess_frame(frame):
    # Convert to grayscale and resize to 120x160
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    resized_frame = cv2.resize(gray_frame, (160, 120))
    normalized_frame = resized_frame / 255.0  # Normalize to [0, 1]
    return normalized_frame

In [33]:
def build_model(action_size):
    """CNN architecture"""
    model = Sequential()
    model.add(Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=(210, 160, 1)))
    model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
    model.add(Conv2D(64, (3, 3), strides=(1, 1), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(action_size, activation='linear'))
    
    model.compile(loss='mse', optimizer=Adam(learning_rate=0.00025))
    return model

In [34]:
# System for progressively saving weights as the CNN trains
def create_memory(capacity=100000):
    """Create a memory buffer for experience replay"""
    return deque(maxlen=capacity)

def add_to_memory(memory, state, action, reward, next_state, done):
    """Add experience to memory"""
    memory.append((state, action, reward, next_state, done))

def sample_from_memory(memory, batch_size):
    """Sample random batch from memory"""
    return random.sample(memory, batch_size)

In [35]:
def epsilon_greedy_action(model, state, epsilon, action_size):
    if np.random.rand() <= epsilon:
        return np.random.randint(action_size)  # Random action (explore)
    q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)  # Predict Q-values
    return np.argmax(q_values[0])  # Action with highest Q-value (exploit)

In [36]:
# Define CNN architecture for action prediction
def build_cnn(action_size, input_shape=(120, 160, 1)):
    """
    Build a Convolutional Neural Network (CNN) for action prediction in an RL environment.
    """
    model = models.Sequential()

    # First convolutional layer
    model.add(layers.Conv2D(32, (8, 8), strides=4, activation='relu', input_shape=input_shape))
    
    # Second convolutional layer
    model.add(layers.Conv2D(64, (4, 4), strides=2, activation='relu'))
    
    # Third convolutional layer
    model.add(layers.Conv2D(64, (3, 3), strides=1, activation='relu'))
    
    # Flatten the output for the fully connected layers
    model.add(layers.Flatten())
    
    # Fully connected layer
    model.add(layers.Dense(512, activation='relu'))
    
    # Output layer with 'action_size' neurons (one for each possible action)
    model.add(layers.Dense(action_size, activation='softmax'))  # Softmax for action probability

    # Compile the model with a categorical crossentropy loss (because it's a classification task)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [39]:
def train_cnn(episodes=10000, 
              max_steps=50000, 
              batch_size=32, 
              epsilon_start=1.0, 
              epsilon_end=0.1, 
              epsilon_decay=0.995,
              memory_capacity=100000,
              save_freq=100):
    """Train a CNN model for action prediction on the Frogger environment"""
    # Create environment
    env = gym.make('ALE/Frogger-v5')
    action_size = env.action_space.n
    
    # Create CNN model
    model = build_cnn(action_size)
    
    # Model saving directory
    save_dir = "frogger_model"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Initialize memory
    memory = []
    
    # Training loop
    epsilon = epsilon_start
    for episode in range(1, episodes + 1):
        start_time = time.time()

        frame, info = env.reset()
        state = preprocess_frame(frame)
        state = np.expand_dims(state, axis=-1)

        episode_reward = 0
        steps_taken = 0
        
        for step in range(max_steps):
            action = epsilon_greedy_action(model, state, epsilon, action_size)
            next_frame, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated

            # Reward shaping for completion speed
            time_penalty = 0.01
            speed_bonus = 10 if done and reward > 0 else 0
            reward = reward - time_penalty + speed_bonus
            reward = np.clip(reward, -10, 10)

            next_state = preprocess_frame(next_frame)
            next_state = np.expand_dims(next_state, axis=-1)

            memory.append((state, action, reward, next_state, done))
            state = next_state
            episode_reward += reward
            steps_taken += 1
            
            # If memory has enough samples, train the model (optional)
            if len(memory) > batch_size:
                # Sample a minibatch from memory
                minibatch = random.sample(memory, batch_size)
                
                # Prepare training data from minibatch
                states = np.array([experience[0] for experience in minibatch])
                actions = np.array([experience[1] for experience in minibatch])
                rewards = np.array([experience[2] for experience in minibatch])
                next_states = np.array([experience[3] for experience in minibatch])
                dones = np.array([experience[4] for experience in minibatch])
                
                # One-hot encode the actions for categorical crossentropy
                actions_one_hot = tf.keras.utils.to_categorical(actions, num_classes=action_size)
                
                # Train the model using the states and actions
                model.fit(states, actions_one_hot, epochs=1, verbose=0)

            if done:
                break
        
        # Decay epsilon for exploration-exploitation balance
        if epsilon > epsilon_end:
            epsilon *= epsilon_decay
            
        #  Calculate duration after the episode finishes
        end_time = time.time()
        episode_duration = end_time - start_time
        
        # Print episode stats
        print(f"Episode {episode} | Reward: {episode_reward:.2f} | Steps: {steps_taken} | Time: {episode_duration:.2f}s | Epsilon: {epsilon:.4f}")
        
        # Save the model periodically
        if episode % save_freq == 0:
            model.save(f"{save_dir}/frogger_cnn_episode_{episode}.h5")
            print(f"Model saved at episode {episode}")
    
    # Save the final model
    model.save(f"{save_dir}/frogger_cnn_final.h5")
    print("Training completed!")
    
    env.close()
    return model

In [41]:
trained_model = train_cnn(episodes=2000, max_steps=50000, batch_size=32)
#Pray it works

Episode 1 | Reward: 15.30 | Steps: 271 | Time: 52.39s | Epsilon: 0.9950
Episode 2 | Reward: 9.58 | Steps: 242 | Time: 36.17s | Epsilon: 0.9900
Episode 3 | Reward: 3.95 | Steps: 305 | Time: 45.67s | Epsilon: 0.9851
Episode 4 | Reward: 3.07 | Steps: 293 | Time: 43.82s | Epsilon: 0.9801
Episode 5 | Reward: 6.33 | Steps: 267 | Time: 40.18s | Epsilon: 0.9752
Episode 6 | Reward: 14.89 | Steps: 312 | Time: 47.32s | Epsilon: 0.9704
Episode 7 | Reward: 5.82 | Steps: 318 | Time: 48.25s | Epsilon: 0.9655
Episode 8 | Reward: 5.17 | Steps: 283 | Time: 42.60s | Epsilon: 0.9607
Episode 9 | Reward: 8.11 | Steps: 289 | Time: 44.07s | Epsilon: 0.9559
Episode 10 | Reward: 13.95 | Steps: 306 | Time: 47.36s | Epsilon: 0.9511
Episode 11 | Reward: 4.11 | Steps: 389 | Time: 60.51s | Epsilon: 0.9464
Episode 12 | Reward: 11.92 | Steps: 309 | Time: 48.32s | Epsilon: 0.9416
Episode 13 | Reward: 6.57 | Steps: 243 | Time: 36.63s | Epsilon: 0.9369
Episode 14 | Reward: 6.23 | Steps: 277 | Time: 43.18s | Epsilon: 0.93



Episode 100 | Reward: 5.52 | Steps: 248 | Time: 51.69s | Epsilon: 0.6058
Model saved at episode 100
Episode 101 | Reward: 5.01 | Steps: 299 | Time: 64.74s | Epsilon: 0.6027
Episode 102 | Reward: 6.35 | Steps: 265 | Time: 57.37s | Epsilon: 0.5997
Episode 103 | Reward: 14.22 | Steps: 279 | Time: 59.35s | Epsilon: 0.5967
Episode 104 | Reward: 16.45 | Steps: 256 | Time: 54.45s | Epsilon: 0.5937
Episode 105 | Reward: 13.61 | Steps: 240 | Time: 50.20s | Epsilon: 0.5908
Episode 106 | Reward: 4.84 | Steps: 316 | Time: 66.92s | Epsilon: 0.5878
Episode 107 | Reward: 14.97 | Steps: 304 | Time: 63.91s | Epsilon: 0.5849
Episode 108 | Reward: 2.83 | Steps: 317 | Time: 69.05s | Epsilon: 0.5820
Episode 109 | Reward: 4.99 | Steps: 301 | Time: 64.70s | Epsilon: 0.5790
Episode 110 | Reward: 8.68 | Steps: 332 | Time: 71.31s | Epsilon: 0.5762
Episode 111 | Reward: 15.85 | Steps: 216 | Time: 46.19s | Epsilon: 0.5733
Episode 112 | Reward: 13.35 | Steps: 266 | Time: 56.72s | Epsilon: 0.5704
Episode 113 | Rewa



Episode 200 | Reward: 15.24 | Steps: 277 | Time: 75.94s | Epsilon: 0.3670
Model saved at episode 200
Episode 201 | Reward: 3.22 | Steps: 278 | Time: 73.54s | Epsilon: 0.3651
Episode 202 | Reward: 6.35 | Steps: 365 | Time: 99.03s | Epsilon: 0.3633
Episode 203 | Reward: 8.19 | Steps: 281 | Time: 74.43s | Epsilon: 0.3615
Episode 204 | Reward: 5.27 | Steps: 273 | Time: 71.99s | Epsilon: 0.3597
Episode 205 | Reward: 6.35 | Steps: 265 | Time: 71.21s | Epsilon: 0.3579
Episode 206 | Reward: 14.63 | Steps: 238 | Time: 64.21s | Epsilon: 0.3561
Episode 207 | Reward: 6.80 | Steps: 320 | Time: 85.91s | Epsilon: 0.3543
Episode 208 | Reward: 6.27 | Steps: 273 | Time: 73.88s | Epsilon: 0.3525
Episode 209 | Reward: 7.18 | Steps: 282 | Time: 75.16s | Epsilon: 0.3508
Episode 210 | Reward: 10.89 | Steps: 311 | Time: 83.86s | Epsilon: 0.3490
Episode 211 | Reward: 13.39 | Steps: 262 | Time: 70.70s | Epsilon: 0.3473
Episode 212 | Reward: 8.15 | Steps: 285 | Time: 75.01s | Epsilon: 0.3455
Episode 213 | Reward



Episode 300 | Reward: 5.27 | Steps: 273 | Time: 83.24s | Epsilon: 0.2223
Model saved at episode 300
Episode 301 | Reward: 10.47 | Steps: 353 | Time: 106.97s | Epsilon: 0.2212
Episode 302 | Reward: 2.53 | Steps: 347 | Time: 105.87s | Epsilon: 0.2201
Episode 303 | Reward: 13.09 | Steps: 292 | Time: 88.44s | Epsilon: 0.2190
Episode 304 | Reward: 3.95 | Steps: 305 | Time: 92.41s | Epsilon: 0.2179
Episode 305 | Reward: 3.30 | Steps: 270 | Time: 82.88s | Epsilon: 0.2168
Episode 306 | Reward: 6.06 | Steps: 294 | Time: 90.76s | Epsilon: 0.2157
Episode 307 | Reward: 8.27 | Steps: 273 | Time: 83.79s | Epsilon: 0.2146
Episode 308 | Reward: 14.92 | Steps: 309 | Time: 96.36s | Epsilon: 0.2136
Episode 309 | Reward: 7.26 | Steps: 274 | Time: 83.21s | Epsilon: 0.2125
Episode 310 | Reward: 16.25 | Steps: 276 | Time: 85.86s | Epsilon: 0.2114
Episode 311 | Reward: 14.77 | Steps: 224 | Time: 68.46s | Epsilon: 0.2104
Episode 312 | Reward: 5.83 | Steps: 217 | Time: 68.47s | Epsilon: 0.2093
Episode 313 | Rew



Episode 400 | Reward: 4.80 | Steps: 320 | Time: 115.57s | Epsilon: 0.1347
Model saved at episode 400
Episode 401 | Reward: 7.26 | Steps: 274 | Time: 97.96s | Epsilon: 0.1340
Episode 402 | Reward: 7.26 | Steps: 274 | Time: 96.45s | Epsilon: 0.1333
Episode 403 | Reward: 7.86 | Steps: 314 | Time: 110.77s | Epsilon: 0.1326
Episode 404 | Reward: 19.80 | Steps: 421 | Time: 147.99s | Epsilon: 0.1320
Episode 405 | Reward: 13.92 | Steps: 309 | Time: 106.34s | Epsilon: 0.1313
Episode 406 | Reward: 6.06 | Steps: 294 | Time: 101.98s | Epsilon: 0.1307
Episode 407 | Reward: 8.33 | Steps: 367 | Time: 129.26s | Epsilon: 0.1300
Episode 408 | Reward: 16.09 | Steps: 292 | Time: 103.10s | Epsilon: 0.1294
Episode 409 | Reward: 14.63 | Steps: 338 | Time: 119.40s | Epsilon: 0.1287
Episode 410 | Reward: 14.87 | Steps: 314 | Time: 112.22s | Epsilon: 0.1281
Episode 411 | Reward: 15.35 | Steps: 266 | Time: 95.56s | Epsilon: 0.1274
Episode 412 | Reward: 5.64 | Steps: 336 | Time: 122.33s | Epsilon: 0.1268
Episode 



Episode 500 | Reward: 4.09 | Steps: 291 | Time: 121.39s | Epsilon: 0.0997
Model saved at episode 500
Episode 501 | Reward: 6.00 | Steps: 300 | Time: 126.00s | Epsilon: 0.0997
Episode 502 | Reward: 6.13 | Steps: 387 | Time: 161.06s | Epsilon: 0.0997
Episode 503 | Reward: 9.47 | Steps: 353 | Time: 150.75s | Epsilon: 0.0997
Episode 504 | Reward: 7.80 | Steps: 320 | Time: 134.44s | Epsilon: 0.0997
Episode 505 | Reward: 17.07 | Steps: 294 | Time: 122.13s | Epsilon: 0.0997
Episode 506 | Reward: 16.46 | Steps: 355 | Time: 147.33s | Epsilon: 0.0997
Episode 507 | Reward: 3.50 | Steps: 350 | Time: 148.34s | Epsilon: 0.0997
Episode 508 | Reward: 6.01 | Steps: 299 | Time: 125.56s | Epsilon: 0.0997
Episode 509 | Reward: 14.74 | Steps: 327 | Time: 138.22s | Epsilon: 0.0997
Episode 510 | Reward: 5.06 | Steps: 294 | Time: 120.81s | Epsilon: 0.0997
Episode 511 | Reward: 6.23 | Steps: 277 | Time: 115.53s | Epsilon: 0.0997
Episode 512 | Reward: 9.20 | Steps: 280 | Time: 117.78s | Epsilon: 0.0997
Episode 



Episode 600 | Reward: 5.86 | Steps: 314 | Time: 148.27s | Epsilon: 0.0997
Model saved at episode 600
Episode 601 | Reward: 12.92 | Steps: 309 | Time: 139.00s | Epsilon: 0.0997
Episode 602 | Reward: 6.26 | Steps: 274 | Time: 128.22s | Epsilon: 0.0997
Episode 603 | Reward: 13.85 | Steps: 316 | Time: 150.07s | Epsilon: 0.0997
Episode 604 | Reward: 4.36 | Steps: 264 | Time: 123.80s | Epsilon: 0.0997
Episode 605 | Reward: 17.26 | Steps: 275 | Time: 130.86s | Epsilon: 0.0997
Episode 606 | Reward: 6.86 | Steps: 314 | Time: 152.27s | Epsilon: 0.0997
Episode 607 | Reward: 4.63 | Steps: 337 | Time: 160.39s | Epsilon: 0.0997
Episode 608 | Reward: 7.60 | Steps: 340 | Time: 165.82s | Epsilon: 0.0997
Episode 609 | Reward: 8.27 | Steps: 273 | Time: 125.76s | Epsilon: 0.0997
Episode 610 | Reward: 14.32 | Steps: 269 | Time: 129.34s | Epsilon: 0.0997
Episode 611 | Reward: 11.86 | Steps: 314 | Time: 151.40s | Epsilon: 0.0997
Episode 612 | Reward: 5.46 | Steps: 454 | Time: 221.70s | Epsilon: 0.0997
Episod



Episode 700 | Reward: 14.33 | Steps: 268 | Time: 139.32s | Epsilon: 0.0997
Model saved at episode 700
Episode 701 | Reward: 3.23 | Steps: 277 | Time: 147.61s | Epsilon: 0.0997
Episode 702 | Reward: 5.47 | Steps: 253 | Time: 135.40s | Epsilon: 0.0997
Episode 703 | Reward: 5.51 | Steps: 249 | Time: 131.27s | Epsilon: 0.0997
Episode 704 | Reward: 8.10 | Steps: 390 | Time: 211.15s | Epsilon: 0.0997
Episode 705 | Reward: 5.27 | Steps: 273 | Time: 146.79s | Epsilon: 0.0997
Episode 706 | Reward: 14.22 | Steps: 279 | Time: 152.31s | Epsilon: 0.0997
Episode 707 | Reward: 9.22 | Steps: 278 | Time: 147.91s | Epsilon: 0.0997
Episode 708 | Reward: 2.86 | Steps: 314 | Time: 168.50s | Epsilon: 0.0997
Episode 709 | Reward: 14.83 | Steps: 318 | Time: 167.57s | Epsilon: 0.0997
Episode 710 | Reward: 7.27 | Steps: 273 | Time: 146.43s | Epsilon: 0.0997
Episode 711 | Reward: 6.23 | Steps: 277 | Time: 146.81s | Epsilon: 0.0997
Episode 712 | Reward: 5.23 | Steps: 277 | Time: 147.47s | Epsilon: 0.0997
Episode 



Episode 800 | Reward: 3.23 | Steps: 377 | Time: 215.95s | Epsilon: 0.0997
Model saved at episode 800
Episode 801 | Reward: 7.16 | Steps: 284 | Time: 168.08s | Epsilon: 0.0997
Episode 802 | Reward: 3.86 | Steps: 314 | Time: 187.36s | Epsilon: 0.0997
Episode 803 | Reward: 7.80 | Steps: 320 | Time: 188.48s | Epsilon: 0.0997
Episode 804 | Reward: 8.11 | Steps: 289 | Time: 173.47s | Epsilon: 0.0997
Episode 805 | Reward: 12.85 | Steps: 316 | Time: 185.04s | Epsilon: 0.0997
Episode 806 | Reward: 6.33 | Steps: 267 | Time: 155.87s | Epsilon: 0.0997
Episode 807 | Reward: 3.35 | Steps: 465 | Time: 279.41s | Epsilon: 0.0997
Episode 808 | Reward: 5.11 | Steps: 289 | Time: 167.66s | Epsilon: 0.0997
Episode 809 | Reward: 8.23 | Steps: 377 | Time: 223.98s | Epsilon: 0.0997
Episode 810 | Reward: 16.36 | Steps: 265 | Time: 156.16s | Epsilon: 0.0997
Episode 811 | Reward: 7.34 | Steps: 266 | Time: 155.70s | Epsilon: 0.0997
Episode 812 | Reward: 8.82 | Steps: 318 | Time: 183.56s | Epsilon: 0.0997
Episode 8



Episode 900 | Reward: 3.13 | Steps: 387 | Time: 252.39s | Epsilon: 0.0997
Model saved at episode 900
Episode 901 | Reward: 5.07 | Steps: 293 | Time: 189.62s | Epsilon: 0.0997
Episode 902 | Reward: 7.40 | Steps: 360 | Time: 236.13s | Epsilon: 0.0997
Episode 903 | Reward: 5.82 | Steps: 318 | Time: 203.93s | Epsilon: 0.0997
Episode 904 | Reward: 4.49 | Steps: 451 | Time: 288.05s | Epsilon: 0.0997
Episode 905 | Reward: 3.55 | Steps: 345 | Time: 220.42s | Epsilon: 0.0997
Episode 906 | Reward: 14.74 | Steps: 327 | Time: 212.36s | Epsilon: 0.0997
Episode 907 | Reward: 7.95 | Steps: 305 | Time: 203.82s | Epsilon: 0.0997
Episode 908 | Reward: 15.06 | Steps: 295 | Time: 195.65s | Epsilon: 0.0997
Episode 909 | Reward: 8.12 | Steps: 288 | Time: 182.43s | Epsilon: 0.0997
Episode 910 | Reward: 4.98 | Steps: 402 | Time: 267.11s | Epsilon: 0.0997
Episode 911 | Reward: 13.48 | Steps: 253 | Time: 164.41s | Epsilon: 0.0997
Episode 912 | Reward: 17.39 | Steps: 362 | Time: 239.59s | Epsilon: 0.0997
Episode



Episode 1000 | Reward: 8.07 | Steps: 293 | Time: 211.67s | Epsilon: 0.0997
Model saved at episode 1000
Episode 1001 | Reward: 7.72 | Steps: 328 | Time: 232.40s | Epsilon: 0.0997
Episode 1002 | Reward: 14.75 | Steps: 226 | Time: 161.41s | Epsilon: 0.0997
Episode 1003 | Reward: 16.62 | Steps: 239 | Time: 172.28s | Epsilon: 0.0997
Episode 1004 | Reward: 5.23 | Steps: 277 | Time: 200.02s | Epsilon: 0.0997
Episode 1005 | Reward: 6.14 | Steps: 386 | Time: 275.28s | Epsilon: 0.0997
Episode 1006 | Reward: 13.87 | Steps: 314 | Time: 231.78s | Epsilon: 0.0997
Episode 1007 | Reward: 3.86 | Steps: 314 | Time: 221.84s | Epsilon: 0.0997
Episode 1008 | Reward: 1.95 | Steps: 305 | Time: 225.89s | Epsilon: 0.0997
Episode 1009 | Reward: 16.03 | Steps: 398 | Time: 271.88s | Epsilon: 0.0997
Episode 1010 | Reward: 18.74 | Steps: 327 | Time: 229.13s | Epsilon: 0.0997
Episode 1011 | Reward: 6.27 | Steps: 273 | Time: 196.17s | Epsilon: 0.0997
Episode 1012 | Reward: 6.17 | Steps: 283 | Time: 206.60s | Epsilon:



Episode 1100 | Reward: 4.35 | Steps: 265 | Time: 207.09s | Epsilon: 0.0997
Model saved at episode 1100
Episode 1101 | Reward: 14.85 | Steps: 216 | Time: 167.99s | Epsilon: 0.0997
Episode 1102 | Reward: 7.83 | Steps: 317 | Time: 242.28s | Epsilon: 0.0997
Episode 1103 | Reward: 5.23 | Steps: 277 | Time: 210.10s | Epsilon: 0.0997
Episode 1104 | Reward: 16.25 | Steps: 276 | Time: 207.05s | Epsilon: 0.0997
Episode 1105 | Reward: 14.83 | Steps: 318 | Time: 249.37s | Epsilon: 0.0997
Episode 1106 | Reward: 15.88 | Steps: 313 | Time: 237.58s | Epsilon: 0.0997
Episode 1107 | Reward: 13.64 | Steps: 237 | Time: 179.07s | Epsilon: 0.0997
Episode 1108 | Reward: 5.23 | Steps: 377 | Time: 286.02s | Epsilon: 0.0997
Episode 1109 | Reward: 6.06 | Steps: 294 | Time: 226.25s | Epsilon: 0.0997
Episode 1110 | Reward: 5.97 | Steps: 303 | Time: 234.33s | Epsilon: 0.0997
Episode 1111 | Reward: 14.24 | Steps: 277 | Time: 224.67s | Epsilon: 0.0997
Episode 1112 | Reward: 21.92 | Steps: 309 | Time: 233.56s | Epsilo

MemoryError: Unable to allocate 4.69 MiB for an array with shape (32, 120, 160, 1) and data type float64