# Play Atari games using baselines3

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1n0kyQJMHU2TTb-cZS0BgHAJsBfFg_AYh?usp=sharing) [![Open In Github](https://badgen.net/badge/Open%20Source%20%3F/Yes%21/blue?icon=github)](https://github.com/Alexbeast-CN/Uob_Robotics/tree/main/DRL/PWG/PWG2)

## 1. Preparations



### All parameters for this notebook

In [None]:
ENVIRONMENT = "SpaceInvaders-v0"
PATH = "/content/drive/MyDrive/Reinforcement_Learning/SpaceInvaders2"
GAMMA = 0.99           # discount factor
# This buffer requires about 25G RAM
BUFFER_SIZE = 100000   # replay buffer size
BATCH_SIZE = 32        # Update batch size
LR = 0.0001            # learning rate 
TAU = 0.1               # for soft update of target parameters
UPDATE_EVERY = 100     # how often to update the network
UPDATE_TARGET = 10000  # After which thershold replay to be started 
EPS_START = 0.99       # starting value of epsilon
EPS_END = 0.01         # Ending value of epsilon
EPS_DECAY = 0.01        # Rate by which epsilon to be decayed

LOAD_MODEL = False
MODEL_NUMBER = 9
MODEL_PATH = PATH+f"/model_{MODEL_NUMBER}"

TRAINING = True
DISPLAY = True
COLAB = True

### [For Colab] Install ROM for Atari games

In [None]:
if COLAB:
  # use %%capture to hide the output
  %%capture
  ! wget http://www.atarimania.com/roms/Roms.rar
  ! mkdir /content/ROM/
  ! unrar e /content/Roms.rar /content/ROM/ -y
  ! python -m atari_py.import_roms /content/ROM/

### [For Colab] Mount Google Drive to this notebook for saving model and logs

In [None]:
if COLAB:
  from google.colab import drive
  drive.mount('/content/drive')

Mounted at /content/drive


### Save videos

In colab the game play can't be properly displayed, therefore we need to save the rendered game play into videos for test check.

In [None]:
if COLAB:
  %%capture
  !pip install gym pyvirtualdisplay > /dev/null 2>&1
  !apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
else:
  %%capture
  !pip install gym pyvirtualdisplay 
  !apt-get install -y xvfb python-opengl ffmpeg 

In [None]:
import gym
from gym.wrappers import Monitor
import glob
import io
import base64
from IPython.display import HTML
from pyvirtualdisplay import Display
from IPython import display as ipythondisplay
display = Display(visible=0, size=(1400, 900))
display.start()

"""
Utility functions to enable video recording of gym environment 
and displaying it.
To enable video, just do "env = wrap_env(env)""
"""

def show_video():
  # Save the video to your google drive path
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")

def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

## 2. Create a model and start training

In [None]:
%%capture
!pip install stable-baselines3[extra]
# import the dqn model form stable-baselines
from stable_baselines3 import DQN
# DummyVecEnv can help train multiple agents at the same time
from stable_baselines3.common.vec_env import DummyVecEnv


In [None]:
# Create multi-envirnoment
env = gym.make(ENVIRONMENT)
env = DummyVecEnv([lambda: env])


In [None]:
if LOAD_MODEL:
  model = DQN.load(MODEL_PATH,env=env)
else:
  # Create a DQN model
  model = DQN('CnnPolicy', 
            env, learning_rate=LR, 
            buffer_size=BUFFER_SIZE, 
            learning_starts=UPDATE_TARGET, 
            batch_size=BATCH_SIZE, tau=TAU, 
            gamma= GAMMA, 
            target_update_interval=UPDATE_EVERY, 
            exploration_fraction=EPS_DECAY, 
            exploration_initial_eps=EPS_START, 
            exploration_final_eps=EPS_END, 
            tensorboard_log=PATH)

Train the model

In [None]:
if TRAINING:
  for i in range(30,40):
      model.learn(total_timesteps=100000)
      model.save(f"{PATH}/model_{i}")
      print(f"Model {i} is under training...")
  env.close()

Model 30 is under training...
Model 31 is under training...
Model 32 is under training...
Model 33 is under training...
Model 34 is under training...
Model 35 is under training...
Model 36 is under training...
Model 37 is under training...
Model 38 is under training...
Model 39 is under training...


## 3. Display the result

In [None]:
if DISPLAY:
  episodes = 5
  # Loop the env for 5 episodes
  for episode in range(episodes):
    env = wrap_env(gym.make(ENVIRONMENT))

    # Initialize the environment and get first state
    obs = env.reset()
    done = False
    sorce = 0

    while not done:
        env.render(mode='rgb_array')
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        sorce += reward
    print("Episode {}  score: {}".format(episode, sorce))
    env.close()
    show_video()