In [None]:
# For the Environment
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from gym.wrappers import GrayScaleObservation
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv, VecTransposeImage

# For the Learning Model
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback, CallbackList
from stable_baselines3 import PPO

# 1. Create and Preprocess Environments

In [None]:
env_name = 'SuperMarioBros-v0'

def create_and_preprocess_env(env_name):
    env = gym_super_mario_bros.make(env_name)
    env = JoypadSpace(env, SIMPLE_MOVEMENT)
    env = GrayScaleObservation(env, keep_dim=True)
    env = Monitor(env)
    env = DummyVecEnv([lambda: env])
    env = VecFrameStack(env, 4, channels_order='last')
    env = VecTransposeImage(env)
    return env
    
train_env = create_and_preprocess_env(env_name)
eval_env = create_and_preprocess_env(env_name)

# 2. Create and Train Optimized Agent

In [None]:
save_path = './Optimized/Saved Models/'
log_path = './Optimized/Logs/'

checkpoint_callback = CheckpointCallback(
    save_freq = 200000, 
    save_path = save_path,
    name_prefix = 'Optimized')

eval_callback = EvalCallback(
    eval_env, 
    eval_freq = 20000, 
    best_model_save_path = save_path)

callback = CallbackList([checkpoint_callback, eval_callback])

In [None]:
model = PPO('CnnPolicy', train_env, verbose=1, tensorboard_log=log_path,
            # These are the optimized values
            learning_rate = 3e-5,
            n_steps = 512,
            batch_size = 128,
            n_epochs = 20)

model.learn(total_timesteps=2000000, callback=callback)