### Import Required Libraries

In [1]:
!pip install gym_super_mario_bros==7.3.0 nes_py
# Install pytorch
!pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio===0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
# Installation of stable baselines for Reinforced Learning
!pip install stable-baselines3[extra]

Collecting gym_super_mario_bros==7.3.0
  Downloading gym_super_mario_bros-7.3.0-py2.py3-none-any.whl (198 kB)
Collecting nes_py
  Downloading nes_py-8.1.8.tar.gz (76 kB)
Collecting gym>=0.17.2
  Downloading gym-0.23.1.tar.gz (626 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
    Preparing wheel metadata: started
    Preparing wheel metadata: finished with status 'done'
Collecting pyglet<=1.5.11,>=1.4.0
  Downloading pyglet-1.5.11-py3-none-any.whl (1.1 MB)
Collecting gym-notices>=0.0.4
  Downloading gym_notices-0.0.6-py3-none-any.whl (2.7 kB)
Collecting importlib-metadata>=4.10.0
  Downloading importlib_metadata-4.11.4-py3-none-any.whl (18 kB)
Building wheels for collected packages: nes-py, gym
  Building wheel for nes-py (setup.py): started
  Building wheel for nes-py (setup.py): finished with status 'done'
  Creat

In [2]:
# importing the game from gym
import gym_super_mario_bros
# next was to import Joypad wrapper
from nes_py.wrappers import JoypadSpace
# Followed by SIMPLIFIED controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
# Importing os for management of file path
import os 
# Import DQN, Gail as RL learning agents
from stable_baselines3 import DQN
from stable_baselines3 import A2C
# Import Base Callback to save models
from stable_baselines3.common.callbacks import BaseCallback

### Setup Mario

In [3]:
# Setting up the game
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

### Train the RL Model

In [4]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, freq_check, store_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.freq_check = freq_check
        self.store_path = store_path

    def _init_callback(self):
        if self.store_path is not None:
            os.makedirs(self.store_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.freq_check == 0:
            model_path = os.path.join(self.store_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [5]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [6]:
# Setup model saving callback
callback = TrainAndLoggingCallback(freq_check=10000, store_path=CHECKPOINT_DIR)

In [7]:
# This is the AI model started
model = DQN('MlpPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001, seed = 20,
             buffer_size = 100)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [8]:
# Train the AI model, this is where the AI model starts to learn
model.learn(total_timesteps=2000000, callback=callback)

Logging to ./logs/DQN_1


  return (self.ram[0x86] - self.ram[0x071c]) % 256


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.28e+04 |
|    ep_rew_mean      | 715      |
|    exploration_rate | 0.757    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 660      |
|    time_elapsed     | 77       |
|    total_timesteps  | 51113    |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 2.5      |
|    n_updates        | 278      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.52e+04 |
|    ep_rew_mean      | 794      |
|    exploration_rate | 0.424    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 295      |
|    time_elapsed     | 410      |
|    total_timesteps  | 121239   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.0178   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.91e+03 |
|    ep_rew_mean      | 1.01e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 68       |
|    fps              | 229      |
|    time_elapsed     | 1155     |
|    total_timesteps  | 265646   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.283    |
|    n_updates        | 53911    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.76e+03 |
|    ep_rew_mean      | 1.02e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 72       |
|    fps              | 228      |
|    time_elapsed     | 1184     |
|    total_timesteps  | 271063   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.554    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.43e+03 |
|    ep_rew_mean      | 993      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 132      |
|    fps              | 216      |
|    time_elapsed     | 1658     |
|    total_timesteps  | 358291   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.317    |
|    n_updates        | 77072    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.43e+03 |
|    ep_rew_mean      | 1.01e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 136      |
|    fps              | 215      |
|    time_elapsed     | 1678     |
|    total_timesteps  | 361951   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.161    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.32e+03 |
|    ep_rew_mean      | 1.08e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 196      |
|    fps              | 208      |
|    time_elapsed     | 2149     |
|    total_timesteps  | 448668   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.000692 |
|    n_updates        | 99666    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.39e+03 |
|    ep_rew_mean      | 1.07e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 200      |
|    fps              | 208      |
|    time_elapsed     | 2207     |
|    total_timesteps  | 459368   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.167    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.47e+03 |
|    ep_rew_mean      | 1.1e+03  |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 260      |
|    fps              | 203      |
|    time_elapsed     | 2636     |
|    total_timesteps  | 536587   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.344    |
|    n_updates        | 121646   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.42e+03 |
|    ep_rew_mean      | 1.09e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 264      |
|    fps              | 203      |
|    time_elapsed     | 2660     |
|    total_timesteps  | 541011   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.177    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.18e+03 |
|    ep_rew_mean      | 1.17e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 324      |
|    fps              | 200      |
|    time_elapsed     | 3090     |
|    total_timesteps  | 618077   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.302    |
|    n_updates        | 142019   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.19e+03 |
|    ep_rew_mean      | 1.15e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 328      |
|    fps              | 199      |
|    time_elapsed     | 3114     |
|    total_timesteps  | 622430   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.149    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.35e+03 |
|    ep_rew_mean      | 1.1e+03  |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 388      |
|    fps              | 197      |
|    time_elapsed     | 3598     |
|    total_timesteps  | 712004   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.115    |
|    n_updates        | 165500   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.37e+03 |
|    ep_rew_mean      | 1.11e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 392      |
|    fps              | 197      |
|    time_elapsed     | 3625     |
|    total_timesteps  | 716910   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.419    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.57e+03 |
|    ep_rew_mean      | 1.15e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 452      |
|    fps              | 196      |
|    time_elapsed     | 4119     |
|    total_timesteps  | 808695   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.101    |
|    n_updates        | 189673   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.57e+03 |
|    ep_rew_mean      | 1.16e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 456      |
|    fps              | 196      |
|    time_elapsed     | 4183     |
|    total_timesteps  | 820802   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.273    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.66e+03 |
|    ep_rew_mean      | 1.19e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 516      |
|    fps              | 195      |
|    time_elapsed     | 4720     |
|    total_timesteps  | 922213   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.331    |
|    n_updates        | 218053   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.73e+03 |
|    ep_rew_mean      | 1.24e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 520      |
|    fps              | 195      |
|    time_elapsed     | 4772     |
|    total_timesteps  | 931894   |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.037    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.68e+03 |
|    ep_rew_mean      | 1.21e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 580      |
|    fps              | 194      |
|    time_elapsed     | 5246     |
|    total_timesteps  | 1020374  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.321    |
|    n_updates        | 242593   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.71e+03 |
|    ep_rew_mean      | 1.2e+03  |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 584      |
|    fps              | 194      |
|    time_elapsed     | 5284     |
|    total_timesteps  | 1027284  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.0833   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.8e+03  |
|    ep_rew_mean      | 1.23e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 644      |
|    fps              | 193      |
|    time_elapsed     | 5907     |
|    total_timesteps  | 1141328  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.257    |
|    n_updates        | 272831   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.72e+03 |
|    ep_rew_mean      | 1.22e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 648      |
|    fps              | 193      |
|    time_elapsed     | 5928     |
|    total_timesteps  | 1145260  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.233    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.81e+03 |
|    ep_rew_mean      | 1.22e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 708      |
|    fps              | 192      |
|    time_elapsed     | 6483     |
|    total_timesteps  | 1248230  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.306    |
|    n_updates        | 299557   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.92e+03 |
|    ep_rew_mean      | 1.23e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 712      |
|    fps              | 192      |
|    time_elapsed     | 6570     |
|    total_timesteps  | 1264148  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.635    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.85e+03 |
|    ep_rew_mean      | 1.23e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 772      |
|    fps              | 191      |
|    time_elapsed     | 7136     |
|    total_timesteps  | 1366975  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.33     |
|    n_updates        | 329243   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.86e+03 |
|    ep_rew_mean      | 1.21e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 776      |
|    fps              | 191      |
|    time_elapsed     | 7167     |
|    total_timesteps  | 1372490  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.466    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.81e+03 |
|    ep_rew_mean      | 1.28e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 836      |
|    fps              | 190      |
|    time_elapsed     | 7825     |
|    total_timesteps  | 1491974  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.713    |
|    n_updates        | 360493   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.73e+03 |
|    ep_rew_mean      | 1.26e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 840      |
|    fps              | 190      |
|    time_elapsed     | 7841     |
|    total_timesteps  | 1494938  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.248    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.91e+03 |
|    ep_rew_mean      | 1.23e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 900      |
|    fps              | 189      |
|    time_elapsed     | 8470     |
|    total_timesteps  | 1608802  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.722    |
|    n_updates        | 389700   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.81e+03 |
|    ep_rew_mean      | 1.19e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 904      |
|    fps              | 189      |
|    time_elapsed     | 8482     |
|    total_timesteps  | 1610958  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.166    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.75e+03 |
|    ep_rew_mean      | 1.19e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 964      |
|    fps              | 189      |
|    time_elapsed     | 9014     |
|    total_timesteps  | 1706680  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.176    |
|    n_updates        | 414169   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.78e+03 |
|    ep_rew_mean      | 1.19e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 968      |
|    fps              | 189      |
|    time_elapsed     | 9071     |
|    total_timesteps  | 1716928  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.162    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.69e+03 |
|    ep_rew_mean      | 1.17e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1028     |
|    fps              | 188      |
|    time_elapsed     | 9598     |
|    total_timesteps  | 1813640  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.479    |
|    n_updates        | 440909   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.74e+03 |
|    ep_rew_mean      | 1.16e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1032     |
|    fps              | 188      |
|    time_elapsed     | 9665     |
|    total_timesteps  | 1826155  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 1.19     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.12e+03 |
|    ep_rew_mean      | 1.33e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1092     |
|    fps              | 188      |
|    time_elapsed     | 10417    |
|    total_timesteps  | 1965330  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.6      |
|    n_updates        | 478832   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.17e+03 |
|    ep_rew_mean      | 1.34e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1096     |
|    fps              | 188      |
|    time_elapsed     | 10465    |
|    total_timesteps  | 1974186  |
| train/              |          |
|    learning_rate    | 1e-06    |
|    loss             | 0.148    |
|    n_updates      

<stable_baselines3.dqn.dqn.DQN at 0x20e88537790>

In [9]:
model.save("dqn_mlp_model_seed20")