In [None]:
%tensorflow_version 1
import tensorflow
print(tensorflow.__version__)
# Check which GPU was allocated by Colab
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
# Check CUDA Version
!nvcc --version

In [None]:
# Requirements for displaying the video after train
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1

In [None]:
# Requirements to train RL Model and environments
!pip install gym_super_mario_bros==7.3.0 nes_py
!pip install stable_baselines==2.10.2

In [None]:
# Displaying the video
from gym.wrappers import Monitor
import glob
import io
import base64
from IPython.display import HTML
from pyvirtualdisplay import Display
from IPython import display as ipythondisplay

display = Display(visible=0, size=(1400, 900))
display.start()

def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")

def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

In [None]:
#Importing Dependencies
import os
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from stable_baselines.bench import Monitor as Monitor1
from stable_baselines.common.atari_wrappers import FrameStack, WarpFrame, MaxAndSkipEnv, EpisodicLifeEnv
from stable_baselines.common.callbacks import CallbackList, EvalCallback, CheckpointCallback
from stable_baselines.deepq.policies import LnCnnPolicy
from stable_baselines import DQN
from matplotlib import pyplot as plt

In [None]:
# Preprocessing
log_dir = "./monitor_logs/"
env = gym_super_mario_bros.make('SuperMarioBros-v0') # Creating Mario Environment
env = JoypadSpace(env, SIMPLE_MOVEMENT) # Restricting controls to Simple Movement
env = WarpFrame(env) # Changing frame to grayscale
env = FrameStack(env, n_frames=4)
env = MaxAndSkipEnv(env, skip=4)
env = Monitor1(env,log_dir)

In [None]:
# Callbacks
run_name = "DQN_Model"
checkpoint_callback = CheckpointCallback(save_freq=10000, save_path='./models/', name_prefix=run_name)
eval_callback = EvalCallback(env, best_model_save_path='./models/', log_path='./models/', eval_freq=10000, deterministic=True, render=False)

In [None]:
# Create the model
model = DQN(LnCnnPolicy, 
            env, 
            batch_size=512,
            learning_starts=10000, 
            learning_rate=0.0001, 
            exploration_fraction=0.1, 
            exploration_initial_eps=1.0, 
            exploration_final_eps=0.1,
            prioritized_replay=True,
            prioritized_replay_alpha=0.6,
            double_q=True,
            tensorboard_log="./mario_tensorboard/")

In [None]:
# Train the model
model.learn(total_timesteps=50000, callback=[checkpoint_callback, eval_callback])

In [None]:
# Save the model
model.save('DQN_BestModel')

In [None]:
# Test the model
model = DQN.load('./DQN_BestModel')
env = wrap_env(gym_super_mario_bros.make('SuperMarioBros-v0'))
env = JoypadSpace(env, SIMPLE_MOVEMENT)
env = WarpFrame(env) # Changing frame to grayscale
env = FrameStack(env, n_frames=4)
env = MaxAndSkipEnv(env, skip=4)
env = Monitor1(env,log_dir)

state = env.reset()
for step in range(100000):
  action,_ = model.predict(state)
  state,reward, done, info = env.step(action)
  #env.render()
env.close()
show_video()

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir ./mario_tensorboard/

In [None]:
from stable_baselines3.common import results_plotter
results_plotter.plot_results([log_dir], 50000, results_plotter.X_TIMESTEPS, "Rewards over episodes")