In [2]:
import time
import os
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines3 import DQN
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from gym.wrappers import GrayScaleObservation
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3.common.callbacks import BaseCallback
import optuna



In [2]:
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)
monitor_dir = r'./DraftRun_Logs/DR_Monitor'
os.makedirs(monitor_dir,exist_ok=True)
env = Monitor(env,monitor_dir)
env = GrayScaleObservation(env,keep_dim=True)
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env,4,channels_order='last')

In [3]:
tensorboard_dir = r'./DraftRun_Logs/Tensorboard_Logs'
model_param_1 = { 'buffer_size' : 1000, 'learning_rate' : 1e-3}
model = DQN("CnnPolicy", env, verbose=1,tensorboard_log = tensorboard_dir, **model_param_1)

Using cuda device
Wrapping the env in a VecTransposeImage.


In [4]:
class SaveOnBestTrainingRewardCallback(BaseCallback):
    def __init__(self, check_freq, save_model_dir, verbose=1):
        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = os.path.join(save_model_dir, 'best_model/')
        self.best_mean_reward = -np.inf

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:
          print('self.n_calls:',self.n_calls)
          model_path_1 = os.path.join(self.save_path, 'model_{}'.format(self.n_calls))
          self.model.save(model_path_1)

        return True

In [5]:
save_model_dir_1 = r'./DraftRun_Logs/DraftRun_Bestmodel'
callback_1 = SaveOnBestTrainingRewardCallback(check_freq = 10000, save_model_dir = save_model_dir_1)
model.learn(total_timesteps = 500000, callback = callback_1)
model.save("DraftRun_Mario_2")

Logging to ./DraftRun_Logs/Tensorboard_Logs\DQN_1


  return (self.ram[0x86] - self.ram[0x071c]) % 256


self.n_calls: 10000
self.n_calls: 20000
self.n_calls: 30000
self.n_calls: 40000
self.n_calls: 50000
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+04  |
|    ep_rew_mean      | 990      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 277      |
|    time_elapsed     | 187      |
|    total_timesteps  | 52018    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.147    |
|    n_updates        | 504      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 6.9e+03  |
|    ep_rew_mean      | 1.04e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 225      |
|    time_elapsed     | 244      |
|    total_timesteps  | 55174    |
| train/              |  

In [7]:
model_dir = r'C:\\Users\\admin\\Desktop\\Mav\\Mario Code\\DraftRun_Logs\DraftRun_Bestmodel\\best_model\\model_490000.zip'
model = DQN.load(model_dir)
obs = env.reset()
while True:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
      obs = env.reset()

  return (self.ram[0x86] - self.ram[0x071c]) % 256


KeyboardInterrupt: 

In [8]:
env.close()