In [9]:
%pip install numpy==1.25.0

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [10]:
import gymnasium as gym
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers

In [11]:
def create_cnn(input_shape, num_actions):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(32, (8, 8), strides=(4, 4), activation='relu'),
        layers.Conv2D(64, (4, 4), strides=(2, 2), activation='relu'),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dense(num_actions, activation='linear')  # Output layer for Q-values
    ])
    return model

In [14]:
env = gym.make('ALE/Frogger-v5', render_mode='rgb_array')
input_shape = env.observation_space.shape  # This should match the frame size
num_actions = env.action_space.n  # Number of possible actions

model = create_cnn(input_shape, num_actions)
model.compile(optimizer=optimizers.Adam(learning_rate=0.00025),
              loss='mse')  # Mean Squared Error loss for Q-value difference

In [15]:
def train_model(episodes):
    for e in range(episodes):
        state = env.reset()[0]
        state = np.array(state)
        done = False
        total_reward = 0

        while not done:
            # Randomly choose an action or the best predicted action
            if np.random.rand() <= 0.1:  # Exploration rate
                action = env.action_space.sample()
            else:
                q_values = model.predict(state[None, ...])
                action = np.argmax(q_values[0])

            next_state, reward, terminated, truncated, info = env.step(action)
            next_state = np.array(next_state)
            total_reward += reward
            print("Reward:", reward)
            print(info)
            if(terminated or truncated):
                done = True

        print(f"Episode: {e+1}, Total Reward: {total_reward}")
train_model(1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Reward: 0.0
{'lives': 4, 'episode_frame_number': 4, 'frame_number': 4}
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Reward: 0.0
{'lives': 4, 'episode_frame_number': 8, 'frame_number': 8}
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Reward: 0.0
{'lives': 4, 'episode_frame_number': 12, 'frame_number': 12}
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Reward: 0.0
{'lives': 4, 'episode_frame_number': 16, 'frame_number': 16}
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Reward: 0.0
{'lives': 4, 'episode_frame_number': 20, 'frame_number': 20}
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Reward: 0.0
{'lives': 4, 'episode_frame_number': 24, 'frame_number': 24}
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Reward: 0.0
{'lives': 4, 'episode_frame_number': 28, 'frame_nu