### Import Required Libraries

In [1]:
!pip install gym_super_mario_bros==7.3.0 nes_py
# Install pytorch
!pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio===0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
# Installation of stable baselines for Reinforced Learning
!pip install stable-baselines3[extra]

Collecting gym_super_mario_bros==7.3.0
  Downloading gym_super_mario_bros-7.3.0-py2.py3-none-any.whl (198 kB)
Collecting nes_py
  Downloading nes_py-8.1.8.tar.gz (76 kB)
Collecting gym>=0.17.2
  Downloading gym-0.23.1.tar.gz (626 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
    Preparing wheel metadata: started
    Preparing wheel metadata: finished with status 'done'
Collecting pyglet<=1.5.11,>=1.4.0
  Downloading pyglet-1.5.11-py3-none-any.whl (1.1 MB)
Collecting gym-notices>=0.0.4
  Downloading gym_notices-0.0.6-py3-none-any.whl (2.7 kB)
Collecting importlib-metadata>=4.10.0
  Downloading importlib_metadata-4.11.4-py3-none-any.whl (18 kB)
Building wheels for collected packages: nes-py, gym
  Building wheel for nes-py (setup.py): started
  Building wheel for nes-py (setup.py): finished with status 'done'
  Creat

In [2]:
# importing the game from gym
import gym_super_mario_bros
# next was to import Joypad wrapper
from nes_py.wrappers import JoypadSpace
# Followed by SIMPLIFIED controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
# Importing os for management of file path
import os 
# Import DQN, Gail as RL learning agents
from stable_baselines3 import DQN
from stable_baselines3 import A2C
# Import Base Callback to save models
from stable_baselines3.common.callbacks import BaseCallback

### Setup Mario

In [3]:
# Setting up the game
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

### Train the RL Model

In [4]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, freq_check, store_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.freq_check = freq_check
        self.store_path = store_path

    def _init_callback(self):
        if self.store_path is not None:
            os.makedirs(self.store_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.freq_check == 0:
            model_path = os.path.join(self.store_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [5]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [6]:
# Setup model saving callback
callback = TrainAndLoggingCallback(freq_check=10000, store_path=CHECKPOINT_DIR)

In [7]:
# This is the AI model started
model = DQN('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001, seed = 20,
             buffer_size = 100)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [8]:
# Train the AI model, this is where the AI model starts to learn
model.learn(total_timesteps=2000000, callback=callback)

Logging to ./logs/DQN_1


  return (self.ram[0x86] - self.ram[0x071c]) % 256


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+04 |
|    ep_rew_mean      | 718      |
|    exploration_rate | 0.757    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 610      |
|    time_elapsed     | 83       |
|    total_timesteps  | 51123    |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.39     |
|    n_updates        | 280      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.22e+04 |
|    ep_rew_mean      | 748      |
|    exploration_rate | 0.536    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 245      |
|    time_elapsed     | 398      |
|    total_timesteps  | 97775    |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.402    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.4e+03  |
|    ep_rew_mean      | 1.51e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 68       |
|    fps              | 157      |
|    time_elapsed     | 1468     |
|    total_timesteps  | 231301   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.858    |
|    n_updates        | 45325    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.28e+03 |
|    ep_rew_mean      | 1.5e+03  |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 72       |
|    fps              | 156      |
|    time_elapsed     | 1506     |
|    total_timesteps  | 235903   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.523    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.21e+03 |
|    ep_rew_mean      | 1.49e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 132      |
|    fps              | 144      |
|    time_elapsed     | 2145     |
|    total_timesteps  | 310900   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.344    |
|    n_updates        | 65224    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.21e+03 |
|    ep_rew_mean      | 1.48e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 136      |
|    fps              | 144      |
|    time_elapsed     | 2191     |
|    total_timesteps  | 316252   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.554    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.38e+03 |
|    ep_rew_mean      | 1.71e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 196      |
|    fps              | 137      |
|    time_elapsed     | 2944     |
|    total_timesteps  | 404356   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.422    |
|    n_updates        | 88588    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.41e+03 |
|    ep_rew_mean      | 1.73e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 200      |
|    fps              | 136      |
|    time_elapsed     | 3008     |
|    total_timesteps  | 411662   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.298    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.64e+03 |
|    ep_rew_mean      | 1.93e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 260      |
|    fps              | 132      |
|    time_elapsed     | 3857     |
|    total_timesteps  | 510975   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.0352   |
|    n_updates        | 115243   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.66e+03 |
|    ep_rew_mean      | 1.94e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 264      |
|    fps              | 132      |
|    time_elapsed     | 3916     |
|    total_timesteps  | 518007   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.315    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.73e+03 |
|    ep_rew_mean      | 1.95e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 324      |
|    fps              | 129      |
|    time_elapsed     | 4834     |
|    total_timesteps  | 624747   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.0475   |
|    n_updates        | 143686   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.71e+03 |
|    ep_rew_mean      | 1.94e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 328      |
|    fps              | 129      |
|    time_elapsed     | 4888     |
|    total_timesteps  | 631103   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.796    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.55e+03 |
|    ep_rew_mean      | 1.81e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 388      |
|    fps              | 127      |
|    time_elapsed     | 5625     |
|    total_timesteps  | 716550   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 2.81     |
|    n_updates        | 166637   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.54e+03 |
|    ep_rew_mean      | 1.81e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 392      |
|    fps              | 127      |
|    time_elapsed     | 5664     |
|    total_timesteps  | 720970   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 2.01     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 1.52e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 452      |
|    fps              | 126      |
|    time_elapsed     | 6270     |
|    total_timesteps  | 792416   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.628    |
|    n_updates        | 185603   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 1.51e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 456      |
|    fps              | 126      |
|    time_elapsed     | 6298     |
|    total_timesteps  | 795757   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.49     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.16e+03 |
|    ep_rew_mean      | 1.42e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 516      |
|    fps              | 125      |
|    time_elapsed     | 6918     |
|    total_timesteps  | 868961   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.777    |
|    n_updates        | 204740   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.14e+03 |
|    ep_rew_mean      | 1.41e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 520      |
|    fps              | 125      |
|    time_elapsed     | 6940     |
|    total_timesteps  | 871425   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.987    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.11e+03 |
|    ep_rew_mean      | 1.37e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 580      |
|    fps              | 124      |
|    time_elapsed     | 7489     |
|    total_timesteps  | 936098   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.782    |
|    n_updates        | 221524   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.08e+03 |
|    ep_rew_mean      | 1.34e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 584      |
|    fps              | 124      |
|    time_elapsed     | 7520     |
|    total_timesteps  | 939763   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 3.82     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.23e+03 |
|    ep_rew_mean      | 1.41e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 644      |
|    fps              | 124      |
|    time_elapsed     | 8205     |
|    total_timesteps  | 1019171  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.777    |
|    n_updates        | 242292   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.24e+03 |
|    ep_rew_mean      | 1.4e+03  |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 648      |
|    fps              | 124      |
|    time_elapsed     | 8234     |
|    total_timesteps  | 1022363  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.9      |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 1.31e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 708      |
|    fps              | 123      |
|    time_elapsed     | 8953     |
|    total_timesteps  | 1105555  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.09     |
|    n_updates        | 263888   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 1.32e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 712      |
|    fps              | 123      |
|    time_elapsed     | 9011     |
|    total_timesteps  | 1112204  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.18     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.18e+03 |
|    ep_rew_mean      | 1.26e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 772      |
|    fps              | 123      |
|    time_elapsed     | 9542     |
|    total_timesteps  | 1173703  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.824    |
|    n_updates        | 280925   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.16e+03 |
|    ep_rew_mean      | 1.24e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 776      |
|    fps              | 122      |
|    time_elapsed     | 9585     |
|    total_timesteps  | 1178814  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.03     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 974      |
|    ep_rew_mean      | 1.2e+03  |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 836      |
|    fps              | 122      |
|    time_elapsed     | 10046    |
|    total_timesteps  | 1232172  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.667    |
|    n_updates        | 295542   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 988      |
|    ep_rew_mean      | 1.21e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 840      |
|    fps              | 122      |
|    time_elapsed     | 10101    |
|    total_timesteps  | 1238603  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.48     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 961      |
|    ep_rew_mean      | 1.31e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 900      |
|    fps              | 122      |
|    time_elapsed     | 10586    |
|    total_timesteps  | 1294702  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.73     |
|    n_updates        | 311175   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 935      |
|    ep_rew_mean      | 1.31e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 904      |
|    fps              | 122      |
|    time_elapsed     | 10602    |
|    total_timesteps  | 1296564  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.72     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.06e+03 |
|    ep_rew_mean      | 1.29e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 964      |
|    fps              | 121      |
|    time_elapsed     | 11256    |
|    total_timesteps  | 1371837  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 3.37     |
|    n_updates        | 330459   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.08e+03 |
|    ep_rew_mean      | 1.28e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 968      |
|    fps              | 121      |
|    time_elapsed     | 11296    |
|    total_timesteps  | 1376503  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.25     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.04e+03 |
|    ep_rew_mean      | 1.19e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1028     |
|    fps              | 121      |
|    time_elapsed     | 11826    |
|    total_timesteps  | 1437819  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.901    |
|    n_updates        | 346954   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.04e+03 |
|    ep_rew_mean      | 1.18e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1032     |
|    fps              | 121      |
|    time_elapsed     | 11849    |
|    total_timesteps  | 1440337  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.07     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.48e+03 |
|    ep_rew_mean      | 1.43e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1092     |
|    fps              | 121      |
|    time_elapsed     | 12811    |
|    total_timesteps  | 1552269  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.803    |
|    n_updates        | 375567   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.51e+03 |
|    ep_rew_mean      | 1.47e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1096     |
|    fps              | 121      |
|    time_elapsed     | 12858    |
|    total_timesteps  | 1557768  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.866    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.78e+03 |
|    ep_rew_mean      | 1.7e+03  |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1156     |
|    fps              | 120      |
|    time_elapsed     | 13771    |
|    total_timesteps  | 1663401  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.783    |
|    n_updates        | 403350   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.82e+03 |
|    ep_rew_mean      | 1.73e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1160     |
|    fps              | 120      |
|    time_elapsed     | 13873    |
|    total_timesteps  | 1675201  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.7      |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.96e+03 |
|    ep_rew_mean      | 1.66e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1220     |
|    fps              | 120      |
|    time_elapsed     | 14838    |
|    total_timesteps  | 1787629  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 2.22     |
|    n_updates        | 434407   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.9e+03  |
|    ep_rew_mean      | 1.64e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1224     |
|    fps              | 120      |
|    time_elapsed     | 14896    |
|    total_timesteps  | 1794356  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.882    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.89e+03 |
|    ep_rew_mean      | 1.56e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1284     |
|    fps              | 120      |
|    time_elapsed     | 15784    |
|    total_timesteps  | 1896918  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.654    |
|    n_updates        | 461729   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.85e+03 |
|    ep_rew_mean      | 1.57e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1288     |
|    fps              | 120      |
|    time_elapsed     | 15819    |
|    total_timesteps  | 1900938  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.711    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+03  |
|    ep_rew_mean      | 1.29e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1348     |
|    fps              | 120      |
|    time_elapsed     | 16390    |
|    total_timesteps  | 1966931  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.01     |
|    n_updates        | 479232   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.31e+03 |
|    ep_rew_mean      | 1.3e+03  |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1352     |
|    fps              | 119      |
|    time_elapsed     | 16433    |
|    total_timesteps  | 1971800  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.253    |
|    n_updates      

<stable_baselines3.dqn.dqn.DQN at 0x184171c9730>

In [9]:
model.save("dqn_cnn_model_seed20")