<a href="https://colab.research.google.com/github/SammyGbabs/Deep-Q-learning-With-Atari/blob/main/HyperParameters_Tuning_Atari_RL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install stable-baselines3 gymnasium[atari] ale-py pyvirtualdisplay

Collecting stable-baselines3
  Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting pyvirtualdisplay
  Downloading PyVirtualDisplay-3.0-py3-none-any.whl.metadata (943 bytes)
Collecting gymnasium<1.1.0,>=0.29.1 (from stable-baselines3)
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
INFO: pip is looking at multiple versions of gymnasium[atari] to determine which version is compatible with other requirements. This could take a while.
Collecting gymnasium[atari]
  Downloading gymnasium-1.1.0-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from tor

#**Import Required Libraries**



In [33]:
import os
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
import ale_py
import tensorflow as tf
import torch
import numpy as np
from gymnasium.wrappers import RecordVideo

#**Define the Breakout Agent Class and Create Environment and Model**


In [30]:
class BreakoutAgent:
    """Handling the setup, execution, and training of the DQN agent"""

    def __init__(self, model_directory="models", log_dir="logs", total_timesteps=50000):
        """Setting up the agent, including environment and model loading"""
        self.model_directory = model_directory
        self.log_dir = log_dir
        self.total_timesteps = total_timesteps

        # Creating directories if they don't exist
        os.makedirs(self.model_directory, exist_ok=True)
        os.makedirs(self.log_dir, exist_ok=True)

        # Initialize environments
        self.env = self._create_wrapped_env()
        self.eval_env = self._create_wrapped_env()

        # DQN model
        self.model = self._initialize_model()

    @staticmethod
    def _create_env(render_mode=None):
        """Creating the Breakout environment"""
        env = gym.make("ALE/Breakout-v5", render_mode=render_mode)
        env = Monitor(env)
        return env

    def _create_wrapped_env(self):
        """Vectorized environment"""
        return DummyVecEnv([lambda: self._create_env()])

    def _initialize_model(self):
        """Initializing the model"""
        return DQN(
            "CnnPolicy",
            self.env,
            learning_rate=1e-4,
            buffer_size=10000,
            learning_starts=1000,
            batch_size=32,
            gamma=0.99,
            exploration_fraction=0.1,
            exploration_initial_eps=1.0,
            exploration_final_eps=0.05,
            train_freq=4,
            gradient_steps=1,
            target_update_interval=1000,
            verbose=1,
            tensorboard_log=self.log_dir,
        )

    def train(self):
        """Training the DQN agent"""
        checkpoint_callback = CheckpointCallback(
            save_freq=10000, save_path=self.model_directory, name_prefix="dqn_breakout"
        )
        eval_callback = EvalCallback(
            self.eval_env,
            best_model_save_path=f"{self.model_directory}/best_model",
            log_path=self.log_dir,
            eval_freq=10000,
            deterministic=True,
            render=False,
        )

        # Training
        self.model.learn(
            total_timesteps=self.total_timesteps,
            callback=[checkpoint_callback, eval_callback],
            progress_bar=True,
        )

        # Saving the trained model
        self.model.save(f"{self.model_directory}/policy.zip")
        print(f"Training completed! Model saved as '{self.model_directory}/policy.zip'")

    def execute(self, episodes=5):
        """Allowing the agent to play the game for a set number of episodes"""
        for ep in range(episodes):
            observation = self.eval_env.reset()
            total_points = 0
            done = False

            while not done:
                # Selecting an action using the trained model
                action, _ = self.model.predict(observation, deterministic=True)
                observation, reward, done, _ = self.eval_env.step(action)
                total_points += reward

            print(f"Episode {ep + 1}: Total Score: {total_points}")

        self.env.close()

# **Training the Agent**

In [32]:
def run_agent():
    """Initialize, train, and evaluate the Breakout agent"""
    trainer = BreakoutAgent(total_timesteps=50000)

    # Train the agent
    trainer.train()

    # Evaluate the agent
    trainer.execute(episodes=5)

if __name__ == "__main__":
    run_agent()


Using cuda device
Wrapping the env in a VecTransposeImage.
Logging to logs/DQN_1


Output()

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 194      |
|    ep_rew_mean      | 1.25     |
|    exploration_rate | 0.853    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 735      |
|    time_elapsed     | 1        |
|    total_timesteps  | 775      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | 1.62     |
|    exploration_rate | 0.695    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 279      |
|    time_elapsed     | 5        |
|    total_timesteps  | 1603     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000316 |
|    n_updates        | 150      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean    

----------------------------------
| eval/               |          |
|    mean_ep_length   | 2.7e+04  |
|    mean_reward      | 0        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 10000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000246 |
|    n_updates        | 2249     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 287      |
|    ep_rew_mean      | 1.92     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 36       |
|    fps              | 28       |
|    time_elapsed     | 359      |
|    total_timesteps  | 10336    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00152  |
|    n_updates        | 2333     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 297      |
|    ep_rew_mean      | 1.85     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 40       |
|    fps              | 32       |
|    time_elapsed     | 367      |
|    total_timesteps  | 11873    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000296 |
|    n_updates      

----------------------------------
| eval/               |          |
|    mean_ep_length   | 2.7e+04  |
|    mean_reward      | 0        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 20000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000657 |
|    n_updates        | 4749     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 317      |
|    ep_rew_mean      | 1.5      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 64       |
|    fps              | 28       |
|    time_elapsed     | 715      |
|    total_timesteps  | 20303    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000328 |
|    n_updates        | 4825     |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.09e+04 |
|    mean_reward      | 2.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 30000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00265  |
|    n_updates        | 7249     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 276      |
|    ep_rew_mean      | 1.41     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 112      |
|    fps              | 33       |
|    time_elapsed     | 894      |
|    total_timesteps  | 30113    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000835 |
|    n_updates        | 7278     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 276      |
|    ep_rew_mean      | 1.42     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 116      |
|    fps              | 34       |
|    time_elapsed     | 898      |
|    total_timesteps  | 30837    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00227  |
|    n_updates      

----------------------------------
| eval/               |          |
|    mean_ep_length   | 2.24e+04 |
|    mean_reward      | 1        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 40000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0058   |
|    n_updates        | 9749     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 218      |
|    ep_rew_mean      | 1.69     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 156      |
|    fps              | 33       |
|    time_elapsed     | 1202     |
|    total_timesteps  | 40435    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00207  |
|    n_updates        | 9858     |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 1.1e+04  |
|    mean_reward      | 4.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 50000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0126   |
|    n_updates        | 12249    |
----------------------------------


Training completed! Model saved as 'models/policy.zip'
Episode 1: Total Score: [3.]
Episode 2: Total Score: [4.]
Episode 3: Total Score: [4.]
Episode 4: Total Score: [2.]
Episode 5: Total Score: [5.]


# **Playing the game by the Agent and recording its Action**

In [None]:
class BreakoutAgent:
    """Handling the setup and execution for a Breakout game agent with recording support."""

    def __init__(self, model_file="models/policy.zip", video_folder="videos/"):
        """Setting up the agent, including environment, model loading, and video recording."""
        os.makedirs(video_folder, exist_ok=True)
        gym.register_envs(ale_py)
        self.env = RecordVideo(gym.make("ALE/Breakout-v5", render_mode="rgb_array"), video_folder)
        self.model_file = model_file
        self.model = self.load_model()

    def load_model(self):
        """Loading the DQN agent."""
        if not os.path.exists(self.model_file):
            raise FileNotFoundError(f"Model file not found at: {self.model_file}")
        try:
            print(f"Attempting to load model from: {self.model_file}")
            model = DQN.load(self.model_file)
            print(f"Model loaded successfully from: {self.model_file}")
            return model
        except Exception as e:
            print(f"Failed to load model: {e}")
            raise RuntimeError("Check the model file")

    def execute(self, episodes=5):
        """Running the trained agent for a given number of episodes while recording."""
        for ep in range(episodes):
            observation, _ = self.env.reset()
            total_reward = 0
            is_done = False
            steps = 0

            while not is_done:
                action, _ = self.model.predict(observation, deterministic=True)
                observation, reward, terminated, truncated, info = self.env.step(action)
                total_reward += reward
                steps += 1
                is_done = terminated or truncated

            print(f"Episode {ep + 1}: Total Score: {total_reward} | Total Steps: {steps}")

        self.env.close()
        print("Gameplay recorded successfully in the 'videos/' folder.")


def run_agent():
    """Initialization of the Breakout agent and execution."""
    trained_model = "models/policy.zip"
    breakout_agent = BreakoutAgent(model_file=trained_model)
    breakout_agent.execute(episodes=5)


if __name__ == "__main__":
    run_agent()