In [5]:
import gymnasium as gym
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense
from keras.optimizers import Adam
import cv2
import random
from collections import deque
import time
import os

In [2]:
env = gym.make('ALE/Frogger-v5')
frame, info = env.reset()

In [2]:
# Preprocessing function
def preprocess_frame(frame):
    """Convert RGB frame to grayscale and keep original dimensions (210x160)"""
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    # Keep original dimensions
    normalized_frame = gray_frame / 255.0  # Normalize pixel values
    return normalized_frame

# Building the CNN model for Q-learning
def build_model(action_size):
    """Build a CNN model for Deep Q-Learning"""
    model = Sequential()
    # Input shape: grayscale image of 210x160 (210, 160, 1)
    model.add(Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=(210, 160, 1)))
    model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
    model.add(Conv2D(64, (3, 3), strides=(1, 1), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(action_size, activation='linear'))  # Output layer with one node per action
    
    model.compile(loss='mse', optimizer=Adam(learning_rate=0.00025))
    return model

# Experience replay memory
def create_memory(capacity=100000):
    """Create a memory buffer for experience replay"""
    return deque(maxlen=capacity)

def add_to_memory(memory, state, action, reward, next_state, done):
    """Add experience to memory"""
    memory.append((state, action, reward, next_state, done))

def sample_from_memory(memory, batch_size):
    """Sample random batch from memory"""
    return random.sample(memory, batch_size)

# Epsilon-greedy policy
def epsilon_greedy_action(model, state, epsilon, action_size):
    """Choose action using epsilon-greedy policy"""
    if np.random.random() <= epsilon:
        return random.randrange(action_size)  # Explore: choose random action
    else:
        # Exploit: choose best action
        q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)[0]
        return np.argmax(q_values)  # Choose action with highest Q-value

# Training function
def train_dqn(episodes=10000, 
              max_steps=50000, 
              batch_size=32, 
              gamma=0.99, 
              epsilon_start=1.0, 
              epsilon_end=0.1, 
              epsilon_decay=0.995,
              update_target_freq=10000,
              memory_capacity=100000,
              save_freq=100):
    """Train a DQN model on Frogger"""
    # Create environment
    env = gym.make('ALE/Frogger-v5')
    action_size = env.action_space.n
    
    # Create main and target models
    main_model = build_model(action_size)
    target_model = build_model(action_size)
    target_model.set_weights(main_model.get_weights())  # Initialize target with same weights
    
    # Create memory for experience replay
    memory = create_memory(capacity=memory_capacity)
    
    # Training metrics
    total_steps = 0
    epsilon = epsilon_start
    
    # Model saving directory
    save_dir = "frogger_model"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    # Training loop
    for episode in range(1, episodes + 1):
        # Reset environment and get initial state
        frame, info = env.reset()
        state = preprocess_frame(frame)
        state = np.expand_dims(state, axis=-1)  # Add channel dimension: (120, 160, 1)
        
        episode_reward = 0
        
        for step in range(max_steps):
            # Choose action
            action = epsilon_greedy_action(main_model, state, epsilon, action_size)
            
            # Take action
            next_frame, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated
            
            # Prepare next state
            next_state = preprocess_frame(next_frame)
            next_state = np.expand_dims(next_state, axis=-1)  # Add channel dimension
            
            # Store experience in memory
            add_to_memory(memory, state, action, reward, next_state, done)
            
            # Move to next state
            state = next_state
            episode_reward += reward
            total_steps += 1
            
            # Train with experience replay if memory has enough samples
            if len(memory) > batch_size:
                # Sample batch from memory
                minibatch = sample_from_memory(memory, batch_size)
                
                # Prepare batch for training
                states = np.array([experience[0] for experience in minibatch])
                actions = np.array([experience[1] for experience in minibatch])
                rewards = np.array([experience[2] for experience in minibatch])
                next_states = np.array([experience[3] for experience in minibatch])
                dones = np.array([experience[4] for experience in minibatch])
                
                # Calculate target Q values
                target_q_values = main_model.predict(states, verbose=0)
                next_q_values = target_model.predict(next_states, verbose=0)
                
                for i in range(batch_size):
                    if dones[i]:
                        target_q_values[i, actions[i]] = rewards[i]
                    else:
                        target_q_values[i, actions[i]] = rewards[i] + gamma * np.max(next_q_values[i])
                
                # Train the model
                main_model.fit(states, target_q_values, epochs=1, verbose=0)
            
            # Update target network periodically
            if total_steps % update_target_freq == 0:
                target_model.set_weights(main_model.get_weights())
                print(f"Target network updated at step {total_steps}")
            
            if done:
                break
        
        # Decay epsilon
        if epsilon > epsilon_end:
            epsilon *= epsilon_decay
        
        # Print episode stats
        print(f"Episode: {episode}, Reward: {episode_reward}, Epsilon: {epsilon:.4f}, Steps: {step+1}")
        
        # Save model periodically
        if episode % save_freq == 0:
            main_model.save(f"{save_dir}/frogger_dqn_episode_{episode}.h5")
            print(f"Model saved at episode {episode}")
    
    # Save final model
    main_model.save(f"{save_dir}/frogger_dqn_final.h5")
    print("Training completed!")
    
    env.close()
    return main_model

# Main execution
if __name__ == "__main__":
    # You can adjust these parameters as needed
    train_dqn(episodes=1000,
              max_steps=10000, 
              batch_size=32, 
              gamma=0.99, 
              epsilon_start=1.0, 
              epsilon_end=0.01, 
              epsilon_decay=0.995,
              update_target_freq=1000,
              memory_capacity=50000,
              save_freq=50)

Episode: 1, Reward: 10.0, Epsilon: 0.9950, Steps: 271
Episode: 2, Reward: 7.0, Epsilon: 0.9900, Steps: 232
Episode: 3, Reward: 8.0, Epsilon: 0.9851, Steps: 277
Target network updated at step 1000
Episode: 4, Reward: 9.0, Epsilon: 0.9801, Steps: 290
Episode: 5, Reward: 8.0, Epsilon: 0.9752, Steps: 258
Episode: 6, Reward: 10.0, Epsilon: 0.9704, Steps: 307
Episode: 7, Reward: 8.0, Epsilon: 0.9655, Steps: 248
Target network updated at step 2000
Episode: 8, Reward: 9.0, Epsilon: 0.9607, Steps: 306
Episode: 9, Reward: 7.0, Epsilon: 0.9559, Steps: 262
Episode: 10, Reward: 13.0, Epsilon: 0.9511, Steps: 281
Episode: 11, Reward: 11.0, Epsilon: 0.9464, Steps: 248
Target network updated at step 3000
Episode: 12, Reward: 9.0, Epsilon: 0.9416, Steps: 217
Episode: 13, Reward: 7.0, Epsilon: 0.9369, Steps: 273
Episode: 14, Reward: 9.0, Epsilon: 0.9322, Steps: 325
Target network updated at step 4000
Episode: 15, Reward: 8.0, Epsilon: 0.9276, Steps: 226
Episode: 16, Reward: 12.0, Epsilon: 0.9229, Steps: 



Episode: 50, Reward: 12.0, Epsilon: 0.7783, Steps: 279
Model saved at episode 50
Target network updated at step 14000
Episode: 51, Reward: 13.0, Epsilon: 0.7744, Steps: 270
Episode: 52, Reward: 8.0, Epsilon: 0.7705, Steps: 236
Episode: 53, Reward: 11.0, Epsilon: 0.7667, Steps: 285
Episode: 54, Reward: 10.0, Epsilon: 0.7629, Steps: 259
Target network updated at step 15000
Episode: 55, Reward: 8.0, Epsilon: 0.7590, Steps: 229
Episode: 56, Reward: 7.0, Epsilon: 0.7553, Steps: 249
Episode: 57, Reward: 14.0, Epsilon: 0.7515, Steps: 275
Episode: 58, Reward: 11.0, Epsilon: 0.7477, Steps: 245
Target network updated at step 16000
Episode: 59, Reward: 10.0, Epsilon: 0.7440, Steps: 320
Episode: 60, Reward: 9.0, Epsilon: 0.7403, Steps: 279
Episode: 61, Reward: 8.0, Epsilon: 0.7366, Steps: 210
Target network updated at step 17000
Episode: 62, Reward: 10.0, Epsilon: 0.7329, Steps: 308
Episode: 63, Reward: 11.0, Epsilon: 0.7292, Steps: 260
Episode: 64, Reward: 8.0, Epsilon: 0.7256, Steps: 252
Episode



Episode: 100, Reward: 8.0, Epsilon: 0.6058, Steps: 233
Model saved at episode 100
Episode: 101, Reward: 16.0, Epsilon: 0.6027, Steps: 287
Target network updated at step 28000
Episode: 102, Reward: 11.0, Epsilon: 0.5997, Steps: 318
Episode: 103, Reward: 12.0, Epsilon: 0.5967, Steps: 336
Episode: 104, Reward: 12.0, Epsilon: 0.5937, Steps: 257
Target network updated at step 29000
Episode: 105, Reward: 11.0, Epsilon: 0.5908, Steps: 305
Episode: 106, Reward: 15.0, Epsilon: 0.5878, Steps: 279
Episode: 107, Reward: 11.0, Epsilon: 0.5849, Steps: 306
Episode: 108, Reward: 14.0, Epsilon: 0.5820, Steps: 311
Target network updated at step 30000
Episode: 109, Reward: 14.0, Epsilon: 0.5790, Steps: 286
Episode: 110, Reward: 10.0, Epsilon: 0.5762, Steps: 229
Episode: 111, Reward: 13.0, Epsilon: 0.5733, Steps: 273
Target network updated at step 31000
Episode: 112, Reward: 18.0, Epsilon: 0.5704, Steps: 310
Episode: 113, Reward: 16.0, Epsilon: 0.5676, Steps: 345
Episode: 114, Reward: 15.0, Epsilon: 0.564



Episode: 150, Reward: 18.0, Epsilon: 0.4715, Steps: 321
Model saved at episode 150
Episode: 151, Reward: 19.0, Epsilon: 0.4691, Steps: 327
Target network updated at step 43000
Episode: 152, Reward: 14.0, Epsilon: 0.4668, Steps: 335
Episode: 153, Reward: 12.0, Epsilon: 0.4644, Steps: 327
Episode: 154, Reward: 16.0, Epsilon: 0.4621, Steps: 313
Target network updated at step 44000
Episode: 155, Reward: 18.0, Epsilon: 0.4598, Steps: 352
Episode: 156, Reward: 14.0, Epsilon: 0.4575, Steps: 309
Episode: 157, Reward: 9.0, Epsilon: 0.4552, Steps: 302
Target network updated at step 45000
Episode: 158, Reward: 11.0, Epsilon: 0.4529, Steps: 307
Episode: 159, Reward: 11.0, Epsilon: 0.4507, Steps: 236
Episode: 160, Reward: 11.0, Epsilon: 0.4484, Steps: 300
Target network updated at step 46000
Episode: 161, Reward: 18.0, Epsilon: 0.4462, Steps: 389
Episode: 162, Reward: 11.0, Epsilon: 0.4440, Steps: 265
Episode: 163, Reward: 12.0, Epsilon: 0.4417, Steps: 319
Episode: 164, Reward: 10.0, Epsilon: 0.439



Episode: 200, Reward: 15.0, Epsilon: 0.3670, Steps: 282
Model saved at episode 200
Episode: 201, Reward: 16.0, Epsilon: 0.3651, Steps: 303
Target network updated at step 58000
Episode: 202, Reward: 11.0, Epsilon: 0.3633, Steps: 321
Episode: 203, Reward: 17.0, Epsilon: 0.3615, Steps: 265
Episode: 204, Reward: 20.0, Epsilon: 0.3597, Steps: 319
Target network updated at step 59000
Episode: 205, Reward: 19.0, Epsilon: 0.3579, Steps: 305
Episode: 206, Reward: 19.0, Epsilon: 0.3561, Steps: 330
Episode: 207, Reward: 12.0, Epsilon: 0.3543, Steps: 232
Episode: 208, Reward: 12.0, Epsilon: 0.3525, Steps: 302
Target network updated at step 60000
Episode: 209, Reward: 17.0, Epsilon: 0.3508, Steps: 323
Episode: 210, Reward: 18.0, Epsilon: 0.3490, Steps: 297
Episode: 211, Reward: 10.0, Epsilon: 0.3473, Steps: 251
Target network updated at step 61000
Episode: 212, Reward: 9.0, Epsilon: 0.3455, Steps: 315
Episode: 213, Reward: 12.0, Epsilon: 0.3438, Steps: 270
Episode: 214, Reward: 18.0, Epsilon: 0.342



Episode: 250, Reward: 15.0, Epsilon: 0.2856, Steps: 337
Model saved at episode 250
Episode: 251, Reward: 19.0, Epsilon: 0.2842, Steps: 269
Episode: 252, Reward: 14.0, Epsilon: 0.2828, Steps: 271
Target network updated at step 73000
Episode: 253, Reward: 19.0, Epsilon: 0.2813, Steps: 385
Episode: 254, Reward: 12.0, Epsilon: 0.2799, Steps: 302
Episode: 255, Reward: 14.0, Epsilon: 0.2785, Steps: 334
Target network updated at step 74000
Episode: 256, Reward: 16.0, Epsilon: 0.2771, Steps: 305
Episode: 257, Reward: 18.0, Epsilon: 0.2758, Steps: 434
Episode: 258, Reward: 17.0, Epsilon: 0.2744, Steps: 379
Target network updated at step 75000
Episode: 259, Reward: 17.0, Epsilon: 0.2730, Steps: 319
Episode: 260, Reward: 17.0, Epsilon: 0.2716, Steps: 281
Episode: 261, Reward: 18.0, Epsilon: 0.2703, Steps: 312
Target network updated at step 76000
Episode: 262, Reward: 21.0, Epsilon: 0.2689, Steps: 377
Episode: 263, Reward: 13.0, Epsilon: 0.2676, Steps: 325
Episode: 264, Reward: 16.0, Epsilon: 0.26



Episode: 300, Reward: 19.0, Epsilon: 0.2223, Steps: 380
Model saved at episode 300
Episode: 301, Reward: 12.0, Epsilon: 0.2212, Steps: 393
Episode: 302, Reward: 15.0, Epsilon: 0.2201, Steps: 384
Target network updated at step 89000
Episode: 303, Reward: 16.0, Epsilon: 0.2190, Steps: 299
Episode: 304, Reward: 15.0, Epsilon: 0.2179, Steps: 281
Episode: 305, Reward: 19.0, Epsilon: 0.2168, Steps: 396
Target network updated at step 90000
Episode: 306, Reward: 13.0, Epsilon: 0.2157, Steps: 260
Episode: 307, Reward: 13.0, Epsilon: 0.2146, Steps: 273
Episode: 308, Reward: 20.0, Epsilon: 0.2136, Steps: 420
Target network updated at step 91000
Episode: 309, Reward: 16.0, Epsilon: 0.2125, Steps: 344
Episode: 310, Reward: 14.0, Epsilon: 0.2114, Steps: 319
Episode: 311, Reward: 18.0, Epsilon: 0.2104, Steps: 361
Target network updated at step 92000
Episode: 312, Reward: 17.0, Epsilon: 0.2093, Steps: 387
Episode: 313, Reward: 12.0, Epsilon: 0.2083, Steps: 334
Episode: 314, Reward: 18.0, Epsilon: 0.20



Episode: 350, Reward: 17.0, Epsilon: 0.1730, Steps: 390
Model saved at episode 350
Episode: 351, Reward: 20.0, Epsilon: 0.1721, Steps: 341
Target network updated at step 105000
Episode: 352, Reward: 12.0, Epsilon: 0.1713, Steps: 399
Episode: 353, Reward: 11.0, Epsilon: 0.1704, Steps: 278
Episode: 354, Reward: 18.0, Epsilon: 0.1696, Steps: 396
Episode: 355, Reward: 16.0, Epsilon: 0.1687, Steps: 286
Target network updated at step 106000
Episode: 356, Reward: 11.0, Epsilon: 0.1679, Steps: 317
Episode: 357, Reward: 14.0, Epsilon: 0.1670, Steps: 313
Episode: 358, Reward: 19.0, Epsilon: 0.1662, Steps: 333
Target network updated at step 107000
Episode: 359, Reward: 19.0, Epsilon: 0.1654, Steps: 285
Episode: 360, Reward: 19.0, Epsilon: 0.1646, Steps: 258
Episode: 361, Reward: 17.0, Epsilon: 0.1637, Steps: 299
Target network updated at step 108000
Episode: 362, Reward: 17.0, Epsilon: 0.1629, Steps: 228
Episode: 363, Reward: 17.0, Epsilon: 0.1621, Steps: 344
Episode: 364, Reward: 13.0, Epsilon: 



Episode: 400, Reward: 13.0, Epsilon: 0.1347, Steps: 284
Model saved at episode 400
Episode: 401, Reward: 19.0, Epsilon: 0.1340, Steps: 281
Episode: 402, Reward: 19.0, Epsilon: 0.1333, Steps: 279
Target network updated at step 120000
Episode: 403, Reward: 10.0, Epsilon: 0.1326, Steps: 233
Episode: 404, Reward: 16.0, Epsilon: 0.1320, Steps: 295
Episode: 405, Reward: 16.0, Epsilon: 0.1313, Steps: 379
Target network updated at step 121000
Episode: 406, Reward: 10.0, Epsilon: 0.1307, Steps: 233
Episode: 407, Reward: 13.0, Epsilon: 0.1300, Steps: 394
Episode: 408, Reward: 22.0, Epsilon: 0.1294, Steps: 362
Target network updated at step 122000
Episode: 409, Reward: 13.0, Epsilon: 0.1287, Steps: 281
Episode: 410, Reward: 14.0, Epsilon: 0.1281, Steps: 424
Episode: 411, Reward: 19.0, Epsilon: 0.1274, Steps: 289
Target network updated at step 123000
Episode: 412, Reward: 23.0, Epsilon: 0.1268, Steps: 415
Episode: 413, Reward: 10.0, Epsilon: 0.1262, Steps: 272
Episode: 414, Reward: 15.0, Epsilon: 



Episode: 450, Reward: 14.0, Epsilon: 0.1048, Steps: 281
Model saved at episode 450
Target network updated at step 137000
Episode: 451, Reward: 17.0, Epsilon: 0.1043, Steps: 486
Episode: 452, Reward: 18.0, Epsilon: 0.1038, Steps: 252
Episode: 453, Reward: 20.0, Epsilon: 0.1032, Steps: 318
Target network updated at step 138000
Episode: 454, Reward: 21.0, Epsilon: 0.1027, Steps: 329
Episode: 455, Reward: 19.0, Epsilon: 0.1022, Steps: 447
Episode: 456, Reward: 14.0, Epsilon: 0.1017, Steps: 377
Target network updated at step 139000
Episode: 457, Reward: 20.0, Epsilon: 0.1012, Steps: 305
Episode: 458, Reward: 16.0, Epsilon: 0.1007, Steps: 496
Target network updated at step 140000
Episode: 459, Reward: 16.0, Epsilon: 0.1002, Steps: 394
Episode: 460, Reward: 18.0, Epsilon: 0.0997, Steps: 384
Episode: 461, Reward: 16.0, Epsilon: 0.0992, Steps: 391
Target network updated at step 141000
Episode: 462, Reward: 15.0, Epsilon: 0.0987, Steps: 414
Episode: 463, Reward: 16.0, Epsilon: 0.0982, Steps: 439



Episode: 500, Reward: 18.0, Epsilon: 0.0816, Steps: 305
Model saved at episode 500
Episode: 501, Reward: 16.0, Epsilon: 0.0812, Steps: 369
Episode: 502, Reward: 20.0, Epsilon: 0.0808, Steps: 302
Target network updated at step 157000
Episode: 503, Reward: 15.0, Epsilon: 0.0804, Steps: 735
Episode: 504, Reward: 18.0, Epsilon: 0.0800, Steps: 276
Target network updated at step 158000
Episode: 505, Reward: 17.0, Epsilon: 0.0796, Steps: 713
Target network updated at step 159000
Episode: 506, Reward: 23.0, Epsilon: 0.0792, Steps: 793
Episode: 507, Reward: 13.0, Epsilon: 0.0788, Steps: 395
Episode: 508, Reward: 15.0, Epsilon: 0.0784, Steps: 318
Target network updated at step 160000
Episode: 509, Reward: 12.0, Epsilon: 0.0780, Steps: 312
Episode: 510, Reward: 18.0, Epsilon: 0.0776, Steps: 320
Episode: 511, Reward: 8.0, Epsilon: 0.0772, Steps: 306
Target network updated at step 161000
Episode: 512, Reward: 22.0, Epsilon: 0.0768, Steps: 705
Target network updated at step 162000
Episode: 513, Rewa



Episode: 550, Reward: 13.0, Epsilon: 0.0635, Steps: 291
Model saved at episode 550
Episode: 551, Reward: 17.0, Epsilon: 0.0632, Steps: 275
Target network updated at step 177000
Episode: 552, Reward: 16.0, Epsilon: 0.0629, Steps: 342
Episode: 553, Reward: 9.0, Epsilon: 0.0625, Steps: 409
Episode: 554, Reward: 16.0, Epsilon: 0.0622, Steps: 258
Target network updated at step 178000
Episode: 555, Reward: 26.0, Epsilon: 0.0619, Steps: 495
Episode: 556, Reward: 16.0, Epsilon: 0.0616, Steps: 329
Episode: 557, Reward: 13.0, Epsilon: 0.0613, Steps: 283
Target network updated at step 179000
Episode: 558, Reward: 12.0, Epsilon: 0.0610, Steps: 393
Episode: 559, Reward: 10.0, Epsilon: 0.0607, Steps: 230
Episode: 560, Reward: 14.0, Epsilon: 0.0604, Steps: 458
Target network updated at step 180000
Episode: 561, Reward: 18.0, Epsilon: 0.0601, Steps: 570
Episode: 562, Reward: 20.0, Epsilon: 0.0598, Steps: 440
Target network updated at step 181000
Episode: 563, Reward: 14.0, Epsilon: 0.0595, Steps: 284




Episode: 600, Reward: 14.0, Epsilon: 0.0494, Steps: 337
Model saved at episode 600
Episode: 601, Reward: 13.0, Epsilon: 0.0492, Steps: 250
Episode: 602, Reward: 12.0, Epsilon: 0.0489, Steps: 232
Episode: 603, Reward: 13.0, Epsilon: 0.0487, Steps: 336
Target network updated at step 199000
Episode: 604, Reward: 19.0, Epsilon: 0.0484, Steps: 301
Episode: 605, Reward: 15.0, Epsilon: 0.0482, Steps: 292
Episode: 606, Reward: 16.0, Epsilon: 0.0479, Steps: 265
Target network updated at step 200000
Episode: 607, Reward: 8.0, Epsilon: 0.0477, Steps: 326
Episode: 608, Reward: 17.0, Epsilon: 0.0475, Steps: 297
Episode: 609, Reward: 16.0, Epsilon: 0.0472, Steps: 319
Target network updated at step 201000
Episode: 610, Reward: 22.0, Epsilon: 0.0470, Steps: 363
Episode: 611, Reward: 18.0, Epsilon: 0.0468, Steps: 293
Episode: 612, Reward: 15.0, Epsilon: 0.0465, Steps: 281
Target network updated at step 202000
Episode: 613, Reward: 22.0, Epsilon: 0.0463, Steps: 359
Episode: 614, Reward: 15.0, Epsilon: 0



Episode: 650, Reward: 15.0, Epsilon: 0.0385, Steps: 317
Model saved at episode 650
Target network updated at step 216000
Episode: 651, Reward: 14.0, Epsilon: 0.0383, Steps: 290
Episode: 652, Reward: 14.0, Epsilon: 0.0381, Steps: 385
Episode: 653, Reward: 14.0, Epsilon: 0.0379, Steps: 386
Target network updated at step 217000
Episode: 654, Reward: 14.0, Epsilon: 0.0377, Steps: 281
Episode: 655, Reward: 15.0, Epsilon: 0.0375, Steps: 365
Episode: 656, Reward: 17.0, Epsilon: 0.0373, Steps: 383
Target network updated at step 218000
Episode: 657, Reward: 15.0, Epsilon: 0.0371, Steps: 703
Episode: 658, Reward: 10.0, Epsilon: 0.0369, Steps: 237
Target network updated at step 219000
Episode: 659, Reward: 12.0, Epsilon: 0.0368, Steps: 516
Episode: 660, Reward: 9.0, Epsilon: 0.0366, Steps: 307
Target network updated at step 220000
Episode: 661, Reward: 15.0, Epsilon: 0.0364, Steps: 719
Episode: 662, Reward: 9.0, Epsilon: 0.0362, Steps: 221
Episode: 663, Reward: 15.0, Epsilon: 0.0360, Steps: 283
T



Episode: 700, Reward: 15.0, Epsilon: 0.0299, Steps: 767
Model saved at episode 700
Target network updated at step 239000
Episode: 701, Reward: 15.0, Epsilon: 0.0298, Steps: 550
Episode: 702, Reward: 19.0, Epsilon: 0.0296, Steps: 388
Episode: 703, Reward: 17.0, Epsilon: 0.0295, Steps: 237
Target network updated at step 240000
Episode: 704, Reward: 12.0, Epsilon: 0.0293, Steps: 392
Episode: 705, Reward: 17.0, Epsilon: 0.0292, Steps: 297
Target network updated at step 241000
Episode: 706, Reward: 16.0, Epsilon: 0.0290, Steps: 547
Episode: 707, Reward: 15.0, Epsilon: 0.0289, Steps: 386
Episode: 708, Reward: 18.0, Epsilon: 0.0288, Steps: 321
Target network updated at step 242000
Episode: 709, Reward: 12.0, Epsilon: 0.0286, Steps: 719
Target network updated at step 243000
Episode: 710, Reward: 22.0, Epsilon: 0.0285, Steps: 815
Episode: 711, Reward: 14.0, Epsilon: 0.0283, Steps: 289
Target network updated at step 244000
Episode: 712, Reward: 14.0, Epsilon: 0.0282, Steps: 555
Episode: 713, Rew



Episode: 750, Reward: 20.0, Epsilon: 0.0233, Steps: 333
Model saved at episode 750
Target network updated at step 263000
Episode: 751, Reward: 19.0, Epsilon: 0.0232, Steps: 363
Episode: 752, Reward: 22.0, Epsilon: 0.0231, Steps: 479
Target network updated at step 264000
Episode: 753, Reward: 23.0, Epsilon: 0.0229, Steps: 535
Episode: 754, Reward: 27.0, Epsilon: 0.0228, Steps: 441
Target network updated at step 265000
Episode: 755, Reward: 18.0, Epsilon: 0.0227, Steps: 703
Episode: 756, Reward: 16.0, Epsilon: 0.0226, Steps: 550
Target network updated at step 266000
Episode: 757, Reward: 21.0, Epsilon: 0.0225, Steps: 545
Target network updated at step 267000
Episode: 758, Reward: 20.0, Epsilon: 0.0224, Steps: 719
Episode: 759, Reward: 18.0, Epsilon: 0.0223, Steps: 395
Episode: 760, Reward: 18.0, Epsilon: 0.0222, Steps: 237
Target network updated at step 268000
Episode: 761, Reward: 19.0, Epsilon: 0.0220, Steps: 735
Target network updated at step 269000
Episode: 762, Reward: 14.0, Epsilon



Episode: 800, Reward: 22.0, Epsilon: 0.0181, Steps: 381
Model saved at episode 800
Episode: 801, Reward: 28.0, Epsilon: 0.0180, Steps: 750
Target network updated at step 287000
Episode: 802, Reward: 23.0, Epsilon: 0.0180, Steps: 703
Target network updated at step 288000
Episode: 803, Reward: 20.0, Epsilon: 0.0179, Steps: 719
Episode: 804, Reward: 22.0, Epsilon: 0.0178, Steps: 343
Episode: 805, Reward: 14.0, Epsilon: 0.0177, Steps: 317
Target network updated at step 289000
Episode: 806, Reward: 17.0, Epsilon: 0.0176, Steps: 226
Episode: 807, Reward: 22.0, Epsilon: 0.0175, Steps: 596
Episode: 808, Reward: 17.0, Epsilon: 0.0174, Steps: 297
Target network updated at step 290000
Episode: 809, Reward: 24.0, Epsilon: 0.0173, Steps: 831
Target network updated at step 291000
Episode: 810, Reward: 21.0, Epsilon: 0.0172, Steps: 703
Target network updated at step 292000
Episode: 811, Reward: 25.0, Epsilon: 0.0172, Steps: 783
Episode: 812, Reward: 19.0, Epsilon: 0.0171, Steps: 280
Target network up



Episode: 850, Reward: 25.0, Epsilon: 0.0141, Steps: 751
Model saved at episode 850
Target network updated at step 312000
Episode: 851, Reward: 19.0, Epsilon: 0.0140, Steps: 719
Episode: 852, Reward: 21.0, Epsilon: 0.0140, Steps: 488
Target network updated at step 313000
Episode: 853, Reward: 24.0, Epsilon: 0.0139, Steps: 327
Episode: 854, Reward: 16.0, Epsilon: 0.0138, Steps: 473
Target network updated at step 314000
Episode: 855, Reward: 20.0, Epsilon: 0.0138, Steps: 695
Target network updated at step 315000
Episode: 856, Reward: 16.0, Epsilon: 0.0137, Steps: 703
Episode: 857, Reward: 24.0, Epsilon: 0.0136, Steps: 767
Target network updated at step 316000
Episode: 858, Reward: 15.0, Epsilon: 0.0136, Steps: 225
Episode: 859, Reward: 11.0, Epsilon: 0.0135, Steps: 377
Target network updated at step 317000
Episode: 860, Reward: 20.0, Epsilon: 0.0134, Steps: 633
Episode: 861, Reward: 18.0, Epsilon: 0.0134, Steps: 282
Episode: 862, Reward: 17.0, Epsilon: 0.0133, Steps: 465
Target network up



Episode: 900, Reward: 13.0, Epsilon: 0.0110, Steps: 577
Model saved at episode 900
Episode: 901, Reward: 29.0, Epsilon: 0.0109, Steps: 416
Target network updated at step 337000
Episode: 902, Reward: 14.0, Epsilon: 0.0109, Steps: 265
Episode: 903, Reward: 12.0, Epsilon: 0.0108, Steps: 599
Target network updated at step 338000
Episode: 904, Reward: 11.0, Epsilon: 0.0108, Steps: 719
Episode: 905, Reward: 21.0, Epsilon: 0.0107, Steps: 390
Target network updated at step 339000
Episode: 906, Reward: 22.0, Epsilon: 0.0107, Steps: 440
Episode: 907, Reward: 18.0, Epsilon: 0.0106, Steps: 253
Episode: 908, Reward: 14.0, Epsilon: 0.0106, Steps: 285
Target network updated at step 340000
Episode: 909, Reward: 26.0, Epsilon: 0.0105, Steps: 653
Target network updated at step 341000
Episode: 910, Reward: 24.0, Epsilon: 0.0104, Steps: 847
Episode: 911, Reward: 19.0, Epsilon: 0.0104, Steps: 281
Target network updated at step 342000
Episode: 912, Reward: 25.0, Epsilon: 0.0103, Steps: 496
Episode: 913, Rew



Episode: 950, Reward: 14.0, Epsilon: 0.0100, Steps: 286
Model saved at episode 950
Target network updated at step 361000
Episode: 951, Reward: 16.0, Epsilon: 0.0100, Steps: 646
Target network updated at step 362000
Episode: 952, Reward: 23.0, Epsilon: 0.0100, Steps: 635
Episode: 953, Reward: 19.0, Epsilon: 0.0100, Steps: 475
Target network updated at step 363000
Episode: 954, Reward: 20.0, Epsilon: 0.0100, Steps: 539
Episode: 955, Reward: 17.0, Epsilon: 0.0100, Steps: 719
Target network updated at step 364000
Episode: 956, Reward: 21.0, Epsilon: 0.0100, Steps: 456
Episode: 957, Reward: 17.0, Epsilon: 0.0100, Steps: 280
Episode: 958, Reward: 24.0, Epsilon: 0.0100, Steps: 347
Target network updated at step 365000
Episode: 959, Reward: 15.0, Epsilon: 0.0100, Steps: 448
Episode: 960, Reward: 19.0, Epsilon: 0.0100, Steps: 288
Target network updated at step 366000
Episode: 961, Reward: 20.0, Epsilon: 0.0100, Steps: 473
Episode: 962, Reward: 20.0, Epsilon: 0.0100, Steps: 301
Episode: 963, Rew



Episode: 1000, Reward: 20.0, Epsilon: 0.0100, Steps: 751
Model saved at episode 1000
Training completed!


In [7]:
# Preprocessing function
def preprocess_frame(frame):
    """Convert RGB frame to grayscale and keep original dimensions (210x160)"""
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    # Keep original dimensions
    normalized_frame = gray_frame / 255.0  # Normalize pixel values
    return normalized_frame

In [8]:
def build_model(action_size):
    """Build a CNN model for Deep Q-Learning"""
    model = Sequential()
    # Input shape: grayscale image of 210x160 (210, 160, 1)
    model.add(Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=(210, 160, 1)))
    model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
    model.add(Conv2D(64, (3, 3), strides=(1, 1), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(action_size, activation='linear'))  # Output layer with one node per action
    
    model.compile(loss='mse', optimizer=Adam(learning_rate=0.00025))
    return model

In [9]:
def run_trained_model(env_name, model_path, episodes=5):
    # Initialize the environment
    env = gym.make(env_name, render_mode='human' )
    model = build_model(env.action_space.n)
    model.load_weights(model_path)

    for episode in range(episodes):
        frame, info = env.reset()
        state = preprocess_frame(frame)
        state = np.expand_dims(state, axis=-1)  # Add channel dimension

        episode_reward = 0
        done = False

        while not done:
            env.render()  # Render the environment to see the agent in action
            time.sleep(0.01)  # Slow down simulation for better visualization

            # Predict action from the model
            q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)
            action = np.argmax(q_values[0])  # Choose action with highest predicted Q-value

            # Take the action
            next_frame, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated

            # Prepare next state
            next_state = preprocess_frame(next_frame)
            next_state = np.expand_dims(next_state, axis=-1)

            # Update the current state
            state = next_state
            episode_reward += reward

        print(f"Episode {episode + 1} - Total Reward: {episode_reward}")

    env.close()

# Assuming you have saved your trained model in 'frogger_model/frogger_dqn_final.h5'
run_trained_model(env_name='ALE/Frogger-v5', model_path='frogger_model/frogger_dqn_final.h5')

  logger.warn(


Episode 1 - Total Reward: 23.0


KeyboardInterrupt: 