### Import Required Libraries

In [1]:
!pip install gym_super_mario_bros==7.3.0 nes_py
# Install pytorch
!pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio===0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
# Installation of stable baselines for Reinforced Learning
!pip install stable-baselines3[extra]

Collecting gym_super_mario_bros==7.3.0
  Downloading gym_super_mario_bros-7.3.0-py2.py3-none-any.whl (198 kB)
Collecting nes_py
  Downloading nes_py-8.1.8.tar.gz (76 kB)
Collecting gym>=0.17.2
  Downloading gym-0.23.1.tar.gz (626 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
    Preparing wheel metadata: started
    Preparing wheel metadata: finished with status 'done'
Collecting pyglet<=1.5.11,>=1.4.0
  Downloading pyglet-1.5.11-py3-none-any.whl (1.1 MB)
Collecting gym-notices>=0.0.4
  Downloading gym_notices-0.0.6-py3-none-any.whl (2.7 kB)
Collecting importlib-metadata>=4.10.0
  Downloading importlib_metadata-4.11.4-py3-none-any.whl (18 kB)
Building wheels for collected packages: nes-py, gym
  Building wheel for nes-py (setup.py): started
  Building wheel for nes-py (setup.py): finished with status 'done'
  Creat

In [2]:
# importing the game from gym
import gym_super_mario_bros
# next was to import Joypad wrapper
from nes_py.wrappers import JoypadSpace
# Followed by SIMPLIFIED controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
# Importing os for management of file path
import os 
# Import DQN, Gail as RL learning agents
from stable_baselines3 import DQN
from stable_baselines3 import A2C
# Import Base Callback to save models
from stable_baselines3.common.callbacks import BaseCallback

### Setup Mario

In [3]:
# Setting up the game
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

### Train the RL Model

In [4]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, freq_check, store_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.freq_check = freq_check
        self.store_path = store_path

    def _init_callback(self):
        if self.store_path is not None:
            os.makedirs(self.store_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.freq_check == 0:
            model_path = os.path.join(self.store_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [5]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [6]:
# Setup model saving callback
callback = TrainAndLoggingCallback(freq_check=10000, store_path=CHECKPOINT_DIR)

In [7]:
# This is the AI model started
model = DQN('MlpPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001, seed = 10,
             buffer_size = 256)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [8]:
# Train the AI model, this is where the AI model starts to learn
model.learn(total_timesteps=2000000, callback=callback)

Logging to ./logs/DQN_1


  return (self.ram[0x86] - self.ram[0x071c]) % 256


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.54e+04 |
|    ep_rew_mean      | 854      |
|    exploration_rate | 0.707    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 462      |
|    time_elapsed     | 133      |
|    total_timesteps  | 61695    |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.545    |
|    n_updates        | 2923     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.58e+04 |
|    ep_rew_mean      | 848      |
|    exploration_rate | 0.398    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 278      |
|    time_elapsed     | 455      |
|    total_timesteps  | 126774   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.45     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.61e+03 |
|    ep_rew_mean      | 1.59e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 68       |
|    fps              | 216      |
|    time_elapsed     | 1134     |
|    total_timesteps  | 245652   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.179    |
|    n_updates        | 48912    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.47e+03 |
|    ep_rew_mean      | 1.58e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 72       |
|    fps              | 215      |
|    time_elapsed     | 1157     |
|    total_timesteps  | 249746   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.273    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 1.57e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 132      |
|    fps              | 204      |
|    time_elapsed     | 1629     |
|    total_timesteps  | 332872   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.255    |
|    n_updates        | 70717    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 1.56e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 136      |
|    fps              | 203      |
|    time_elapsed     | 1663     |
|    total_timesteps  | 338471   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.155    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.36e+03 |
|    ep_rew_mean      | 1.54e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 196      |
|    fps              | 197      |
|    time_elapsed     | 2110     |
|    total_timesteps  | 416606   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.316    |
|    n_updates        | 91651    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 1.53e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 200      |
|    fps              | 196      |
|    time_elapsed     | 2138     |
|    total_timesteps  | 421308   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.16     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.39e+03 |
|    ep_rew_mean      | 1.51e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 260      |
|    fps              | 192      |
|    time_elapsed     | 2623     |
|    total_timesteps  | 504575   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.528    |
|    n_updates        | 113643   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.38e+03 |
|    ep_rew_mean      | 1.51e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 264      |
|    fps              | 192      |
|    time_elapsed     | 2653     |
|    total_timesteps  | 509691   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.92     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.36e+03 |
|    ep_rew_mean      | 1.52e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 324      |
|    fps              | 189      |
|    time_elapsed     | 3122     |
|    total_timesteps  | 591117   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.662    |
|    n_updates        | 135279   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.39e+03 |
|    ep_rew_mean      | 1.53e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 328      |
|    fps              | 189      |
|    time_elapsed     | 3158     |
|    total_timesteps  | 597686   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.104    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.37e+03 |
|    ep_rew_mean      | 1.53e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 388      |
|    fps              | 187      |
|    time_elapsed     | 3627     |
|    total_timesteps  | 678692   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.2      |
|    n_updates        | 157172   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.34e+03 |
|    ep_rew_mean      | 1.51e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 392      |
|    fps              | 186      |
|    time_elapsed     | 3651     |
|    total_timesteps  | 682752   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.358    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | 1.51e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 452      |
|    fps              | 185      |
|    time_elapsed     | 4119     |
|    total_timesteps  | 764094   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.0868   |
|    n_updates        | 178523   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.33e+03 |
|    ep_rew_mean      | 1.49e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 456      |
|    fps              | 185      |
|    time_elapsed     | 4139     |
|    total_timesteps  | 767430   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.275    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.4e+03  |
|    ep_rew_mean      | 1.52e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 516      |
|    fps              | 183      |
|    time_elapsed     | 4667     |
|    total_timesteps  | 855413   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.81     |
|    n_updates        | 201353   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.39e+03 |
|    ep_rew_mean      | 1.52e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 520      |
|    fps              | 183      |
|    time_elapsed     | 4693     |
|    total_timesteps  | 859797   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.01     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.43e+03 |
|    ep_rew_mean      | 1.57e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 580      |
|    fps              | 181      |
|    time_elapsed     | 5224     |
|    total_timesteps  | 949836   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 2.41     |
|    n_updates        | 224958   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.44e+03 |
|    ep_rew_mean      | 1.57e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 584      |
|    fps              | 181      |
|    time_elapsed     | 5257     |
|    total_timesteps  | 955396   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.263    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.36e+03 |
|    ep_rew_mean      | 1.49e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 644      |
|    fps              | 180      |
|    time_elapsed     | 5720     |
|    total_timesteps  | 1034920  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.426    |
|    n_updates        | 246229   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.36e+03 |
|    ep_rew_mean      | 1.47e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 648      |
|    fps              | 180      |
|    time_elapsed     | 5760     |
|    total_timesteps  | 1041630  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.941    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.44e+03 |
|    ep_rew_mean      | 1.5e+03  |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 708      |
|    fps              | 179      |
|    time_elapsed     | 6313     |
|    total_timesteps  | 1135572  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.34     |
|    n_updates        | 271392   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.45e+03 |
|    ep_rew_mean      | 1.51e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 712      |
|    fps              | 179      |
|    time_elapsed     | 6341     |
|    total_timesteps  | 1140158  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.205    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | 1.51e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 772      |
|    fps              | 179      |
|    time_elapsed     | 6855     |
|    total_timesteps  | 1229431  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.496    |
|    n_updates        | 294857   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.51e+03 |
|    ep_rew_mean      | 1.53e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 776      |
|    fps              | 179      |
|    time_elapsed     | 6900     |
|    total_timesteps  | 1237101  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.252    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.66e+03 |
|    ep_rew_mean      | 1.61e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 836      |
|    fps              | 178      |
|    time_elapsed     | 7489     |
|    total_timesteps  | 1336432  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.35     |
|    n_updates        | 321607   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.67e+03 |
|    ep_rew_mean      | 1.61e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 840      |
|    fps              | 178      |
|    time_elapsed     | 7515     |
|    total_timesteps  | 1340981  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.493    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.47e+03 |
|    ep_rew_mean      | 1.55e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 900      |
|    fps              | 178      |
|    time_elapsed     | 8017     |
|    total_timesteps  | 1427494  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.57     |
|    n_updates        | 344373   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.46e+03 |
|    ep_rew_mean      | 1.56e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 904      |
|    fps              | 178      |
|    time_elapsed     | 8046     |
|    total_timesteps  | 1432288  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.87     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.45e+03 |
|    ep_rew_mean      | 1.56e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 964      |
|    fps              | 177      |
|    time_elapsed     | 8575     |
|    total_timesteps  | 1521860  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.316    |
|    n_updates        | 367964   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.47e+03 |
|    ep_rew_mean      | 1.56e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 968      |
|    fps              | 177      |
|    time_elapsed     | 8613     |
|    total_timesteps  | 1528324  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.277    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.56e+03 |
|    ep_rew_mean      | 1.53e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1028     |
|    fps              | 176      |
|    time_elapsed     | 9175     |
|    total_timesteps  | 1623114  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.543    |
|    n_updates        | 393278   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.55e+03 |
|    ep_rew_mean      | 1.53e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1032     |
|    fps              | 176      |
|    time_elapsed     | 9212     |
|    total_timesteps  | 1629552  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.562    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.61e+03 |
|    ep_rew_mean      | 1.54e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1092     |
|    fps              | 176      |
|    time_elapsed     | 9781     |
|    total_timesteps  | 1726446  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.155    |
|    n_updates        | 419111   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.65e+03 |
|    ep_rew_mean      | 1.56e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1096     |
|    fps              | 176      |
|    time_elapsed     | 9818     |
|    total_timesteps  | 1733027  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.75     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.46e+03 |
|    ep_rew_mean      | 1.45e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1156     |
|    fps              | 176      |
|    time_elapsed     | 10335    |
|    total_timesteps  | 1821212  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.37     |
|    n_updates        | 442802   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.48e+03 |
|    ep_rew_mean      | 1.46e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1160     |
|    fps              | 176      |
|    time_elapsed     | 10371    |
|    total_timesteps  | 1827373  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.15     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.54e+03 |
|    ep_rew_mean      | 1.53e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1220     |
|    fps              | 175      |
|    time_elapsed     | 10931    |
|    total_timesteps  | 1922151  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.284    |
|    n_updates        | 468037   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.53e+03 |
|    ep_rew_mean      | 1.52e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1224     |
|    fps              | 175      |
|    time_elapsed     | 10960    |
|    total_timesteps  | 1926933  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.39     |
|    n_updates      

<stable_baselines3.dqn.dqn.DQN at 0x20073894730>

In [9]:
model.save("dqn_mlp_model_seed10")