# GHOST - RL Project

## Imports and Installs

In [1]:
!pip install gymnasium
!pip install gymnasium[atari]
!pip install gymnasium[accept-rom-license]
!pip install pyvirtualdisplay > /dev/null 2>&1
!pip install stable_baselines3

Collecting gymnasium
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/953.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m276.5/953.9 kB[0m [31m8.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1
Collecting shimmy[atari]<1.0,>=0.1.0 (from gymnasium[atari])
  Downloading Shimmy-0.2.1-py3-none-any.whl (25 kB)
Collecting ale-py~=0.8.1 (from shimmy[atari]<1.0,>=0.1.0->gymnasium[atari])
  Downloading ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   

In [2]:
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecFrameStack
import gymnasium as gym
import numpy as np
from IPython import display as ipythondisplay
import os
import pyvirtualdisplay
import base64
import io
import imageio
from datetime import datetime
from IPython.display import HTML
import cv2
import warnings
import matplotlib.pyplot as plt
from stable_baselines3 import A2C
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

In [3]:
warnings.filterwarnings("ignore")

  and should_run_async(code)


In [4]:
def render_as_image(env):
    '''
    Renders the environment as an image using Matplotlib.

    Arguments:
    - env: The environment object to render.

    Returns:
    None
    '''
    plt.imshow(env.render())
    plt.axis('off')
    plt.show()

def embed_video(file_path):
    '''
    Embeds a video file into HTML for display.

    Arguments:
    - file_path: The path to the video file.
    - playback_speed: The speed at which the video should play. Default is 1.0 (normal speed).

    Returns:
    - HTML: HTML code for embedding the video.
    '''
    video_file = open(file_path, "rb").read()
    video_url = f"data:video/mp4;base64,{base64.b64encode(video_file).decode()}"
    return HTML(f"""<video width="640" height="480" controls><source src="{video_url}" type="video/mp4"></video>""")

def random_filename():
    '''
    Generates a random filename in the format "YYYY_MM_DD_HH_MM_SS.mp4".

    Returns:
    - str: Randomly generated filename.
    '''
    return datetime.now().strftime('%Y_%m_%d_%H_%M_%S.mp4')

class VideoRecorder:
    '''
    Utility class for recording video of an environment.

    Methods:
    - __init__: Initializes the video recorder.
    - record_frame: Records a frame from the environment.
    - close: Closes the video writer.
    - play: Plays the recorded video.
    - __enter__: Enters the context manager.
    - __exit__: Exits the context manager.
    '''
    def __init__(self, filename=random_filename(), fps=30):
        '''
        Initializes the VideoRecorder.

        Arguments:
        - filename: The filename to save the recorded video.
        - fps: Frames per second of the recorded video.
        '''
        self.filename = filename
        self.writer = imageio.get_writer(filename, fps=fps)

    def record_frame(self, env, target_width = 608, target_height=400, slowed=True):
        '''
        Records a frame from the environment.

        Arguments:
        - env: The environment object to record.
        - target_width: Width of the target frame.
        - target_height: Height of the target frame.

        Returns:
        None
        '''
        frame = env.render()
        resized_frame = cv2.resize(frame, (target_width, target_height))
        self.writer.append_data(resized_frame)
        if slowed:
            self.writer.append_data(resized_frame)

    def close(self, *args, **kwargs):
        '''
        Closes the video writer.

        Arguments:
        None

        Returns:
        None
        '''
        self.writer.close(*args, **kwargs)

    def play(self):
        '''
        Plays the recorded video.

        Arguments:
        None

        Returns:
        None
        '''
        self.close()
        embed_video(self.filename)

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.play()

## Assult

In [None]:
NUM_ENVS = 1
NUM_STEPS = 2e5

In [None]:
vec_env = make_atari_env("ALE/Assault-v5", n_envs=NUM_ENVS, seed=42, env_kwargs={"render_mode": "rgb_array"})
vec_env = VecFrameStack(vec_env, n_stack=NUM_ENVS)

model = PPO("CnnPolicy", vec_env, verbose=1)
model.learn(total_timesteps=NUM_STEPS)

Using cuda device
Wrapping the env in a VecTransposeImage.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 971      |
|    ep_rew_mean     | 173      |
| time/              |          |
|    fps             | 202      |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.01e+03   |
|    ep_rew_mean          | 191        |
| time/                   |            |
|    fps                  | 190        |
|    iterations           | 2          |
|    time_elapsed         | 21         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.01783194 |
|    clip_fraction        | 0.105      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.93      |
|    explained_variance   |

<stable_baselines3.ppo.ppo.PPO at 0x7a7c132a9ba0>

In [None]:
obs = vec_env.reset()
rec = VideoRecorder()
num_dones = 0
while True:
    action, _states = model.predict(obs, deterministic=False)
    obs, rewards, dones, info = vec_env.step(action)
    rec.record_frame(vec_env)
    if any(dones):
        num_dones +=1
    if num_dones > 10:
        print("Done !")
        break

Done !


In [None]:
rec.close()
embed_video(rec.filename)

### AirRaid

In [None]:
NUM_ENVS = 1
NUM_STEPS = 2e5

In [None]:
vec_env = make_atari_env("ALE/AirRaid-v5", n_envs=NUM_ENVS, seed=42, env_kwargs={"render_mode": "rgb_array"})
vec_env = VecFrameStack(vec_env, n_stack=NUM_ENVS)

model = PPO("CnnPolicy", vec_env, verbose=1)
model.learn(total_timesteps=NUM_STEPS)

Using cuda device
Wrapping the env in a VecTransposeImage.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 738      |
|    ep_rew_mean     | 475      |
| time/              |          |
|    fps             | 213      |
|    iterations      | 1        |
|    time_elapsed    | 9        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 602         |
|    ep_rew_mean          | 436         |
| time/                   |             |
|    fps                  | 195         |
|    iterations           | 2           |
|    time_elapsed         | 20          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.010724972 |
|    clip_fraction        | 0.121       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.78       |
|    explaine

<stable_baselines3.ppo.ppo.PPO at 0x7a7bdfda3f40>

In [None]:
obs = vec_env.reset()
rec = VideoRecorder()
num_dones = 0
while True:
    action, _states = model.predict(obs, deterministic=False)
    obs, rewards, dones, info = vec_env.step(action)
    rec.record_frame(vec_env)
    if any(dones):
        num_dones +=1
    if num_dones > 10:
        print("Done !")
        break

Done !


In [None]:
rec.close()
embed_video(rec.filename)

### Alien

In [None]:
NUM_ENVS = 1
NUM_STEPS = 2e5

In [None]:
vec_env = make_atari_env("ALE/Alien-v5", n_envs=NUM_ENVS, seed=42, env_kwargs={"render_mode": "rgb_array"})
vec_env = VecFrameStack(vec_env, n_stack=NUM_ENVS)

model = PPO("CnnPolicy", vec_env, verbose=1)
model.learn(total_timesteps=NUM_STEPS)

Using cuda device
Wrapping the env in a VecTransposeImage.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 669      |
|    ep_rew_mean     | 353      |
| time/              |          |
|    fps             | 154      |
|    iterations      | 1        |
|    time_elapsed    | 13       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 631         |
|    ep_rew_mean          | 353         |
| time/                   |             |
|    fps                  | 145         |
|    iterations           | 2           |
|    time_elapsed         | 28          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.013120052 |
|    clip_fraction        | 0.146       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.88       |
|    explaine

<stable_baselines3.ppo.ppo.PPO at 0x7a7c091f6da0>

In [None]:
obs = vec_env.reset()
rec = VideoRecorder()
num_dones = 0
while True:
    action, _states = model.predict(obs, deterministic=False)
    obs, rewards, dones, info = vec_env.step(action)
    rec.record_frame(vec_env)
    if any(dones):
        num_dones +=1
    if num_dones > 10:
        print("Done !")
        break

Done !


In [None]:
rec.close()
embed_video(rec.filename)

### Asteroids

In [None]:
NUM_ENVS = 1
NUM_STEPS = 2e5

In [None]:
vec_env = make_atari_env("ALE/Asteroids-v5", n_envs=NUM_ENVS, seed=42, env_kwargs={"render_mode": "rgb_array"})
vec_env = VecFrameStack(vec_env, n_stack=NUM_ENVS)

model = PPO("CnnPolicy", vec_env, verbose=1)
model.learn(total_timesteps=NUM_STEPS)

Using cuda device
Wrapping the env in a VecTransposeImage.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 647      |
|    ep_rew_mean     | 424      |
| time/              |          |
|    fps             | 212      |
|    iterations      | 1        |
|    time_elapsed    | 9        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 680         |
|    ep_rew_mean          | 455         |
| time/                   |             |
|    fps                  | 196         |
|    iterations           | 2           |
|    time_elapsed         | 20          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008747125 |
|    clip_fraction        | 0.0681      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.63       |
|    explaine

<stable_baselines3.ppo.ppo.PPO at 0x7a7c091f5480>

In [None]:
obs = vec_env.reset()
rec = VideoRecorder()
num_dones = 0
while True:
    action, _states = model.predict(obs, deterministic=False)
    obs, rewards, dones, info = vec_env.step(action)
    rec.record_frame(vec_env)
    if any(dones):
        num_dones +=1
    if num_dones > 10:
        print("Done !")
        break

Done !


In [None]:
rec.close()
embed_video(rec.filename)

### Tetris - Vectorized

In [5]:
NUM_ENVS = 8
NUM_STEPS = 2e5

In [6]:
vec_env = make_atari_env("ALE/Tetris-v5", n_envs=NUM_ENVS, seed=42, env_kwargs={"render_mode": "rgb_array"})
vec_env = VecFrameStack(vec_env, n_stack=NUM_ENVS)

model = A2C("CnnPolicy", vec_env, verbose=1)
model.learn(total_timesteps=NUM_STEPS)

Using cuda device
Wrapping the env in a VecTransposeImage.
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 586      |
|    ep_rew_mean        | 0        |
| time/                 |          |
|    fps                | 181      |
|    iterations         | 100      |
|    time_elapsed       | 22       |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -1.61    |
|    explained_variance | 0.651    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.00116 |
|    value_loss         | 3.72e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 581       |
|    ep_rew_mean        | 0.0192    |
| time/                 |           |
|    fps                | 197       |
|    iterations         | 200       |
|    time_elapsed       | 40        |
|    tot

<stable_baselines3.a2c.a2c.A2C at 0x7b99f3fdb490>

In [7]:
obs = vec_env.reset()
rec = VideoRecorder()
num_dones = 0
while True:
    action, _states = model.predict(obs, deterministic=False)
    obs, rewards, dones, info = vec_env.step(action)
    rec.record_frame(vec_env)
    if any(dones):
        num_dones +=1
    if num_dones > 10:
        print("Done !")
        break

Done !


In [8]:
rec.close()
embed_video(rec.filename)

### Tetris - Single Environment

In [9]:
NUM_ENVS = 1
NUM_STEPS = 5e4

In [10]:
vec_env = make_atari_env("ALE/Tetris-v5", n_envs=NUM_ENVS, seed=42, env_kwargs={"render_mode": "rgb_array"})
vec_env = VecFrameStack(vec_env, n_stack=NUM_ENVS)

model = PPO("CnnPolicy", vec_env, verbose=1)
model.learn(total_timesteps=NUM_STEPS)

Using cuda device
Wrapping the env in a VecTransposeImage.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 641      |
|    ep_rew_mean     | 0        |
| time/              |          |
|    fps             | 163      |
|    iterations      | 1        |
|    time_elapsed    | 12       |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 613        |
|    ep_rew_mean          | 0          |
| time/                   |            |
|    fps                  | 151        |
|    iterations           | 2          |
|    time_elapsed         | 26         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.00841292 |
|    clip_fraction        | 0.0536     |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.6       |
|    explained_variance   |

<stable_baselines3.ppo.ppo.PPO at 0x7b99f3fd9480>

In [11]:
rec = VideoRecorder()
obs = vec_env.reset()
num_dones = 0
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = vec_env.step(action)
    rec.record_frame(vec_env)
    if any(dones):
        num_dones +=1
    if num_dones > 10:
        print("Done !")
        break

Done !


In [12]:
rec.close()
embed_video(rec.filename)