# Header
1. [Setup Mario](#a-namesetupmario1-setup-marioa)
2. [Preprocess Environment](#a-namepreprocessenvironment2-preprocess-environmenta)
3. [Train the RL Model](#a-nametraintherlmodel3-train-the-rl-modela)
4. [Test it Out](#a-nametestitout4-test-it-outa)

## <a name='setup_mario'>1. Setup Mario</a>

In [1]:
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

In [2]:
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

In [3]:
done = True
for step in range(5000):
    if done:
        env.reset()
    action = env.action_space.sample()
    state, reward, done, info = env.step(action)
    env.render()
env.close()



## <a name='preprocess_environment'>2. Preprocess Environment</a>

In [4]:
from gym.wrappers import GrayScaleObservation
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from matplotlib import pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)
env = GrayScaleObservation(env, keep_dim=True)
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, 4, channels_order='last')

In [6]:
state = env.reset()
state.shape

(1, 240, 256, 4)

In [7]:
plt.imshow(state[0, ...,3])

Error: Canceled future for execute_request message before replies were done

## <a name='train_the_rl_model'>3. Train the RL Model</a>

In [None]:
import os
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback

In [None]:
class TrainAndLoggingCallback(BaseCallback):
    def __init__(self, check_freq, save_path, verbose=1):
        super().__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
    
    def __init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
    
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, f'best_model_{self.n_calls}')
            self.model.save(model_path)
        return True

In [None]:
CHECKPOINT_DIR = './data/models/'
LOGS_DIR = './data/logs/'

In [None]:
callback = TrainAndLoggingCallback(check_freq=100000, save_path=CHECKPOINT_DIR)

In [None]:
model = PPO(
    'CnnPolicy', 
    env, 
    verbose=1, 
    tensorboard_log=LOGS_DIR, 
    learning_rate=10e-6,
    n_steps=512
    )

## <a name='test_it_out'>4. Test it Out</a>