In [1]:
import gymnasium as gym
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tqdm import tqdm
import os



In [2]:
# Hyperparameters
learning_rate = 0.00025
epsilon = 0.1  # Exploration rate
episodes = 1  # Number of training episodes
optimizer = optimizers.Adam(learning_rate=learning_rate)
loss_function = 'mse'  # Mean Squared Error loss for Q-value difference

In [3]:
def create_cnn(input_shape, num_actions):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        #layers.Conv2D(64, (8, 8), strides=(4, 4), activation='relu'),
        layers.Conv2D(128, (5, 5), strides=(2, 2), activation='relu'),
        layers.Flatten(),
        #layers.Dense(512, activation='relu'),
        layers.Dense(num_actions, activation='linear')
    ])
    return model

In [4]:
env = gym.make('ALE/Frogger-v5', render_mode='rgb_array')
#env = gym.make('ALE/Frogger-v5', )
input_shape = env.observation_space.shape  # This should match the frame size
num_actions = env.action_space.n  # Number of possible actions

model = create_cnn(input_shape, num_actions)
model.compile(optimizer=optimizers.Adam(learning_rate=0.00025), loss='mse')  # Mean Squared Error loss for Q-value difference

In [5]:
def train_model(model, episodes, save_interval=10):
    # Initialize list to keep track of total rewards for each episode
    episode_rewards = []

    # Create a directory to save model weights
    save_dir = "model_weights"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Set up tqdm progress bar
    with tqdm(total=episodes, desc="Episode", unit='episode') as pbar:
        for e in range(episodes):
            state = env.reset()[0]
            state = np.array(state)
            done = False
            total_reward = 0

            while not done:
                # Randomly choose an action or the best predicted action
                if np.random.rand() <= epsilon:  # Use the global epsilon value
                    action = env.action_space.sample()
                else:
                    q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)
                    action = np.argmax(q_values[0])

                next_state, reward, terminated, truncated, info = env.step(action)
                next_state = np.array(next_state)
                total_reward += reward
                
                if terminated or truncated:
                    done = True

            # Update progress bar
            pbar.update(1)
            pbar.set_description(f"Episode: {e+1}, Reward: {total_reward}")

            # Append the total reward to the rewards list
            episode_rewards.append(total_reward)

            # Save the model every 'save_interval' episodes
            if (e + 1) % save_interval == 0:
                model_path = os.path.join(save_dir, f'model_episode_{e + 1}.h5')
                model.save(model_path)
                print(f"Saved model at episode {e + 1} to {model_path}")

    # Print overall training results
    print(f"Average Reward: {np.mean(episode_rewards)}")
    print(f"Best Reward: {max(episode_rewards)}")

# Example usage
train_model(model, 100)  # Adjust as needed for your setup
env.close()



Saved model at episode 10 to model_weights\model_episode_10.h5




Saved model at episode 20 to model_weights\model_episode_20.h5




Saved model at episode 30 to model_weights\model_episode_30.h5




Saved model at episode 40 to model_weights\model_episode_40.h5




Saved model at episode 50 to model_weights\model_episode_50.h5




Saved model at episode 60 to model_weights\model_episode_60.h5




Saved model at episode 70 to model_weights\model_episode_70.h5




Saved model at episode 80 to model_weights\model_episode_80.h5




Saved model at episode 90 to model_weights\model_episode_90.h5


Episode: 100, Reward: 9.0: 100%|██████████| 100/100 [21:30<00:00, 12.90s/episode]

Saved model at episode 100 to model_weights\model_episode_100.h5
Average Reward: 10.65
Best Reward: 21.0



