### Import Required Libraries

In [1]:
!pip install gym_super_mario_bros==7.3.0 nes_py
# Install pytorch
!pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio===0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
# Installation of stable baselines for Reinforced Learning
!pip install stable-baselines3[extra]

Collecting gym_super_mario_bros==7.3.0
  Downloading gym_super_mario_bros-7.3.0-py2.py3-none-any.whl (198 kB)
Collecting nes_py
  Downloading nes_py-8.1.8.tar.gz (76 kB)
Collecting gym>=0.17.2
  Downloading gym-0.23.1.tar.gz (626 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
    Preparing wheel metadata: started
    Preparing wheel metadata: finished with status 'done'
Collecting pyglet<=1.5.11,>=1.4.0
  Downloading pyglet-1.5.11-py3-none-any.whl (1.1 MB)
Collecting importlib-metadata>=4.10.0
  Downloading importlib_metadata-4.11.4-py3-none-any.whl (18 kB)
Collecting gym-notices>=0.0.4
  Downloading gym_notices-0.0.6-py3-none-any.whl (2.7 kB)
Building wheels for collected packages: nes-py, gym
  Building wheel for nes-py (setup.py): started
  Building wheel for nes-py (setup.py): finished with status 'done'
  Creat

In [2]:
# importing the game from gym
import gym_super_mario_bros
# next was to import Joypad wrapper
from nes_py.wrappers import JoypadSpace
# Followed by SIMPLIFIED controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
# Importing os for management of file path
import os 
# Import DQN, Gail as RL learning agents
from stable_baselines3 import DQN
from stable_baselines3 import A2C
# Import Base Callback to save models
from stable_baselines3.common.callbacks import BaseCallback

### Setup Mario

In [3]:
# Setting up the game
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

### Train the RL Model

In [4]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, freq_check, store_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.freq_check = freq_check
        self.store_path = store_path

    def _init_callback(self):
        if self.store_path is not None:
            os.makedirs(self.store_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.freq_check == 0:
            model_path = os.path.join(self.store_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [5]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [6]:
# Setup model saving callback
callback = TrainAndLoggingCallback(freq_check=10000, store_path=CHECKPOINT_DIR)

In [8]:
# This is the AI model started
model = DQN('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001, seed = 10,
             buffer_size = 100)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [9]:
# Train the AI model, this is where the AI model starts to learn
model.learn(total_timesteps=2000000, callback=callback)

Logging to ./logs/DQN_1


  return (self.ram[0x86] - self.ram[0x071c]) % 256


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.93e+04 |
|    ep_rew_mean      | 749      |
|    exploration_rate | 0.634    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 301      |
|    time_elapsed     | 255      |
|    total_timesteps  | 77060    |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.0454   |
|    n_updates        | 6764     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.7e+04  |
|    ep_rew_mean      | 913      |
|    exploration_rate | 0.353    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 203      |
|    time_elapsed     | 670      |
|    total_timesteps  | 136243   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.00207  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.32e+03 |
|    ep_rew_mean      | 1.24e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 68       |
|    fps              | 162      |
|    time_elapsed     | 1385     |
|    total_timesteps  | 225825   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.481    |
|    n_updates        | 43956    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.18e+03 |
|    ep_rew_mean      | 1.24e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 72       |
|    fps              | 162      |
|    time_elapsed     | 1412     |
|    total_timesteps  | 229217   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.0962   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 826      |
|    ep_rew_mean      | 1.14e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 132      |
|    fps              | 153      |
|    time_elapsed     | 1830     |
|    total_timesteps  | 280436   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.0852   |
|    n_updates        | 57608    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 828      |
|    ep_rew_mean      | 1.14e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 136      |
|    fps              | 152      |
|    time_elapsed     | 1854     |
|    total_timesteps  | 283359   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.371    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 745      |
|    ep_rew_mean      | 1.1e+03  |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 196      |
|    fps              | 148      |
|    time_elapsed     | 2159     |
|    total_timesteps  | 320734   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.219    |
|    n_updates        | 67683    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 755      |
|    ep_rew_mean      | 1.11e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 200      |
|    fps              | 148      |
|    time_elapsed     | 2186     |
|    total_timesteps  | 324070   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.133    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 954      |
|    ep_rew_mean      | 1.35e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 260      |
|    fps              | 142      |
|    time_elapsed     | 2754     |
|    total_timesteps  | 393779   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.174    |
|    n_updates        | 85944    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 978      |
|    ep_rew_mean      | 1.38e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 264      |
|    fps              | 142      |
|    time_elapsed     | 2792     |
|    total_timesteps  | 398481   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.17     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 1.72e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 324      |
|    fps              | 138      |
|    time_elapsed     | 3456     |
|    total_timesteps  | 479409   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.51     |
|    n_updates        | 107352   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.29e+03 |
|    ep_rew_mean      | 1.73e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 328      |
|    fps              | 138      |
|    time_elapsed     | 3496     |
|    total_timesteps  | 484098   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.792    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.38e+03 |
|    ep_rew_mean      | 1.83e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 388      |
|    fps              | 135      |
|    time_elapsed     | 4189     |
|    total_timesteps  | 568560   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.2      |
|    n_updates        | 129639   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.38e+03 |
|    ep_rew_mean      | 1.84e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 392      |
|    fps              | 135      |
|    time_elapsed     | 4226     |
|    total_timesteps  | 572993   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.108    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.65e+03 |
|    ep_rew_mean      | 1.91e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 452      |
|    fps              | 133      |
|    time_elapsed     | 5114     |
|    total_timesteps  | 682808   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.772    |
|    n_updates        | 158201   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.61e+03 |
|    ep_rew_mean      | 1.92e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 456      |
|    fps              | 133      |
|    time_elapsed     | 5187     |
|    total_timesteps  | 691889   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.18     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.88e+03 |
|    ep_rew_mean      | 2.12e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 516      |
|    fps              | 131      |
|    time_elapsed     | 6093     |
|    total_timesteps  | 803587   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.92     |
|    n_updates        | 188396   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.87e+03 |
|    ep_rew_mean      | 2.11e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 520      |
|    fps              | 131      |
|    time_elapsed     | 6144     |
|    total_timesteps  | 809999   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.04     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.86e+03 |
|    ep_rew_mean      | 1.99e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 580      |
|    fps              | 130      |
|    time_elapsed     | 7008     |
|    total_timesteps  | 914636   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.753    |
|    n_updates        | 216158   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.8e+03  |
|    ep_rew_mean      | 1.98e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 584      |
|    fps              | 130      |
|    time_elapsed     | 7044     |
|    total_timesteps  | 918991   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.539    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.57e+03 |
|    ep_rew_mean      | 1.68e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 644      |
|    fps              | 129      |
|    time_elapsed     | 7842     |
|    total_timesteps  | 1015501  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.15     |
|    n_updates        | 241375   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.54e+03 |
|    ep_rew_mean      | 1.64e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 648      |
|    fps              | 129      |
|    time_elapsed     | 7873     |
|    total_timesteps  | 1019342  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.379    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 1.36e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 708      |
|    fps              | 128      |
|    time_elapsed     | 8461     |
|    total_timesteps  | 1090570  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.22     |
|    n_updates        | 260142   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.23e+03 |
|    ep_rew_mean      | 1.33e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 712      |
|    fps              | 128      |
|    time_elapsed     | 8472     |
|    total_timesteps  | 1091962  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.2      |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 880      |
|    ep_rew_mean      | 1.22e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 772      |
|    fps              | 128      |
|    time_elapsed     | 8939     |
|    total_timesteps  | 1148515  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.293    |
|    n_updates        | 274628   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 882      |
|    ep_rew_mean      | 1.22e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 776      |
|    fps              | 128      |
|    time_elapsed     | 8955     |
|    total_timesteps  | 1150423  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.4      |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 866      |
|    ep_rew_mean      | 1.26e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 836      |
|    fps              | 128      |
|    time_elapsed     | 9407     |
|    total_timesteps  | 1205520  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.929    |
|    n_updates        | 288879   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 856      |
|    ep_rew_mean      | 1.27e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 840      |
|    fps              | 128      |
|    time_elapsed     | 9438     |
|    total_timesteps  | 1209363  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.538    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.04e+03 |
|    ep_rew_mean      | 1.54e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 900      |
|    fps              | 127      |
|    time_elapsed     | 9945     |
|    total_timesteps  | 1270549  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.67     |
|    n_updates        | 305137   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.05e+03 |
|    ep_rew_mean      | 1.55e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 904      |
|    fps              | 127      |
|    time_elapsed     | 9975     |
|    total_timesteps  | 1274210  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 2.6      |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 1.68e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 964      |
|    fps              | 127      |
|    time_elapsed     | 10716    |
|    total_timesteps  | 1365125  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 2.67     |
|    n_updates        | 328781   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 1.68e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 968      |
|    fps              | 127      |
|    time_elapsed     | 10748    |
|    total_timesteps  | 1369116  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.47     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 1.57e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1028     |
|    fps              | 127      |
|    time_elapsed     | 11314    |
|    total_timesteps  | 1437541  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.88     |
|    n_updates        | 346885   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 1.54e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1032     |
|    fps              | 127      |
|    time_elapsed     | 11348    |
|    total_timesteps  | 1441511  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.813    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | 1.5e+03  |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1092     |
|    fps              | 126      |
|    time_elapsed     | 12008    |
|    total_timesteps  | 1521898  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.581    |
|    n_updates        | 367974   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.26e+03 |
|    ep_rew_mean      | 1.51e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1096     |
|    fps              | 126      |
|    time_elapsed     | 12045    |
|    total_timesteps  | 1526519  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 2.54     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.59e+03 |
|    ep_rew_mean      | 1.52e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1156     |
|    fps              | 126      |
|    time_elapsed     | 12929    |
|    total_timesteps  | 1633754  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 2.8      |
|    n_updates        | 395938   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.62e+03 |
|    ep_rew_mean      | 1.54e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1160     |
|    fps              | 126      |
|    time_elapsed     | 12988    |
|    total_timesteps  | 1640936  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.28     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.99e+03 |
|    ep_rew_mean      | 1.89e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1220     |
|    fps              | 125      |
|    time_elapsed     | 13994    |
|    total_timesteps  | 1762621  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.06     |
|    n_updates        | 428155   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.02e+03 |
|    ep_rew_mean      | 1.91e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1224     |
|    fps              | 125      |
|    time_elapsed     | 14053    |
|    total_timesteps  | 1769818  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.825    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.88e+03 |
|    ep_rew_mean      | 1.95e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1284     |
|    fps              | 125      |
|    time_elapsed     | 14916    |
|    total_timesteps  | 1874329  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.826    |
|    n_updates        | 456082   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.96e+03 |
|    ep_rew_mean      | 1.96e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1288     |
|    fps              | 125      |
|    time_elapsed     | 15019    |
|    total_timesteps  | 1886844  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.59     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.69e+03 |
|    ep_rew_mean      | 1.82e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1348     |
|    fps              | 125      |
|    time_elapsed     | 15771    |
|    total_timesteps  | 1978208  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.247    |
|    n_updates        | 482051   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.67e+03 |
|    ep_rew_mean      | 1.81e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1352     |
|    fps              | 125      |
|    time_elapsed     | 15814    |
|    total_timesteps  | 1983289  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.49     |
|    n_updates      

<stable_baselines3.dqn.dqn.DQN at 0x1cd08536400>

In [10]:

model.save("dqn_cnn_model_seed10")