In [1]:
import gym
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tqdm import tqdm
import os

In [2]:
# Hyperparameters
learning_rate = 0.00025
epsilon = 0.1  # Exploration rate
episodes = 1  # Number of training episodes
optimizer = optimizers.Adam(learning_rate=learning_rate)
loss_function = 'mse'  # Mean Squared Error loss for Q-value difference

In [3]:
def create_cnn(input_shape, num_actions):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(64, (8, 8), strides=(4, 4), activation='relu'),
        layers.Conv2D(128, (5, 5), strides=(2, 2), activation='relu'),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dense(num_actions, activation='linear')
    ])
    return model

In [4]:
env = gym.make('ALE/Frogger-v5', render_mode='rgb_array')
#env = gym.make('ALE/Frogger-v5', )
input_shape = env.observation_space.shape  # This should match the frame size
num_actions = env.action_space.n  # Number of possible actions

model = create_cnn(input_shape, num_actions)
model.compile(optimizer=optimizers.Adam(learning_rate=0.00025), loss='mse')  # Mean Squared Error loss for Q-value difference

In [5]:
def train_model(model, episodes, save_interval=10):
    # Initialize list to keep track of total rewards for each episode
    episode_rewards = []

    # Create a directory to save model weights
    save_dir = "model_weights"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Set up tqdm progress bar
    with tqdm(total=episodes, desc="Episode", unit='episode') as pbar:
        for e in range(episodes):
            state = env.reset()[0]
            state = np.array(state)
            done = False
            total_reward = 0

            while not done:
                # Randomly choose an action or the best predicted action
                if np.random.rand() <= epsilon:  # Use the global epsilon value
                    action = env.action_space.sample()
                else:
                    q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)
                    action = np.argmax(q_values[0])

                next_state, reward, terminated, truncated, info = env.step(action)
                next_state = np.array(next_state)
                total_reward += reward
                
                if terminated or truncated:
                    done = True

            # Update progress bar
            pbar.update(1)
            pbar.set_description(f"Episode: {e+1}, Reward: {total_reward}")

            # Append the total reward to the rewards list
            episode_rewards.append(total_reward)

            # Save the model every 'save_interval' episodes
            if (e + 1) % save_interval == 0:
                model_path = os.path.join(save_dir, f'model_episode_{e + 1}.h5')
                model.save(model_path)
                print(f"Saved model at episode {e + 1} to {model_path}")

    # Print overall training' results
    print(f"Average Reward: {np.mean(episode_rewards)}")
    print(f"Best Reward: {max(episode_rewards)}")

# Example usage
train_model(model, 500)  # Adjust as needed for your setup
env.close()

Episode:   0%|          | 0/500 [00:00<?, ?episode/s]

  if not isinstance(terminated, (bool, np.bool8)):


Saved model at episode 10 to model_weights\model_episode_10.h5




Saved model at episode 20 to model_weights\model_episode_20.h5




Saved model at episode 30 to model_weights\model_episode_30.h5




Saved model at episode 40 to model_weights\model_episode_40.h5




Saved model at episode 50 to model_weights\model_episode_50.h5




Saved model at episode 60 to model_weights\model_episode_60.h5




Saved model at episode 70 to model_weights\model_episode_70.h5




Saved model at episode 80 to model_weights\model_episode_80.h5




Saved model at episode 90 to model_weights\model_episode_90.h5




Saved model at episode 100 to model_weights\model_episode_100.h5




Saved model at episode 110 to model_weights\model_episode_110.h5




Saved model at episode 120 to model_weights\model_episode_120.h5




Saved model at episode 130 to model_weights\model_episode_130.h5




Saved model at episode 140 to model_weights\model_episode_140.h5




Saved model at episode 150 to model_weights\model_episode_150.h5




Saved model at episode 160 to model_weights\model_episode_160.h5




Saved model at episode 170 to model_weights\model_episode_170.h5




Saved model at episode 180 to model_weights\model_episode_180.h5




Saved model at episode 190 to model_weights\model_episode_190.h5




Saved model at episode 200 to model_weights\model_episode_200.h5




Saved model at episode 210 to model_weights\model_episode_210.h5




Saved model at episode 220 to model_weights\model_episode_220.h5




Saved model at episode 230 to model_weights\model_episode_230.h5




Saved model at episode 240 to model_weights\model_episode_240.h5




Saved model at episode 250 to model_weights\model_episode_250.h5




Saved model at episode 260 to model_weights\model_episode_260.h5




Saved model at episode 270 to model_weights\model_episode_270.h5




Saved model at episode 280 to model_weights\model_episode_280.h5




Saved model at episode 290 to model_weights\model_episode_290.h5




Saved model at episode 300 to model_weights\model_episode_300.h5




Saved model at episode 310 to model_weights\model_episode_310.h5




Saved model at episode 320 to model_weights\model_episode_320.h5




Saved model at episode 330 to model_weights\model_episode_330.h5




Saved model at episode 340 to model_weights\model_episode_340.h5




Saved model at episode 350 to model_weights\model_episode_350.h5




Saved model at episode 360 to model_weights\model_episode_360.h5




Saved model at episode 370 to model_weights\model_episode_370.h5




Saved model at episode 380 to model_weights\model_episode_380.h5




Saved model at episode 390 to model_weights\model_episode_390.h5




Saved model at episode 400 to model_weights\model_episode_400.h5




Saved model at episode 410 to model_weights\model_episode_410.h5




Saved model at episode 420 to model_weights\model_episode_420.h5




Saved model at episode 430 to model_weights\model_episode_430.h5




Saved model at episode 440 to model_weights\model_episode_440.h5




Saved model at episode 450 to model_weights\model_episode_450.h5




Saved model at episode 460 to model_weights\model_episode_460.h5




Saved model at episode 470 to model_weights\model_episode_470.h5


Episode: 471, Reward: 9.0:  94%|█████████▍| 471/500 [2:16:26<06:09, 12.74s/episode] 