In [None]:
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT #Importing simple controls

## Preprocess Environment


In [None]:
from gym.wrappers import GrayScaleObservation #GrayScale convert the color frames to gray
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv #Import Vectorization Wrappers
from matplotlib import pyplot as plt #Matplotlib to show the impact of frames stacking
import datetime

In [None]:
#1. Create the base environment
env = gym_super_mario_bros.make('SuperMarioBros-v0')
#2. Simplify the cotnrols
env = JoypadSpace(env, SIMPLE_MOVEMENT)
#3. Transform the RGB frames to GrayScale
env = GrayScaleObservation(env, keep_dim=True)
#4 Vectorize the environment
env = DummyVecEnv([lambda: env]) #Parameter: A list of functions that will create the environments 
                                 #(each callable returns a Gym.Env instance when called)
#5 Frame stacking wrapper for vectorized environment
env = VecFrameStack(env, 4, channels_order='last') #2º parameter: number of frames to stack

In [None]:
state = env.reset()

In [None]:
print(state.shape)
a = 240*256*1
b = 240*256*3
print("If we don't use Gray Scale, we'd have to process %d frames, but we will only process %d frames" % (b, a))

In [None]:
plt.imshow(state[0])

## Train the AI model

In [None]:
# Import os for file path management
import os 
# Import PPO for algos
from stable_baselines3 import PPO
# Import Base Callback for saving models
from stable_baselines3.common.callbacks import BaseCallback

In [None]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [None]:
#Setup where to save the models
CHECKPOINT_DIR = './train'
LOG_DIR = './logs'

In [None]:
#Setup model saving callback
callback = TrainAndLoggingCallback(check_freq=1000, save_path=CHECKPOINT_DIR)

In [None]:
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=1e-4, n_steps = 512)

In [None]:
print(">>>Training starts at ", datetime.datetime.now())
model.learn(total_timesteps=1000000, callback=callback)
print(">>>Training ends at ", datetime.datetime.now())


In [None]:
print(">>>Training ends at ", datetime.datetime.now())

In [None]:
model = PPO.load('./train/best_model_2000')


In [None]:
state = env.reset()

In [None]:
# Start the game 
state = env.reset()
# Loop through the game
while True: 
    
    action, _ = model.predict(state)
    state, reward, done, info = env.step(action)
    env.render()