# 1. Setup Mario

In [1]:
!pip install gym_super_mario_bros==7.3.0 nes_py

Collecting gym_super_mario_bros==7.3.0
  Downloading gym_super_mario_bros-7.3.0-py2.py3-none-any.whl (198 kB)
[K     |████████████████████████████████| 198 kB 4.4 MB/s 
[?25hCollecting nes_py
  Downloading nes_py-8.1.8.tar.gz (76 kB)
[K     |████████████████████████████████| 76 kB 4.4 MB/s 
Building wheels for collected packages: nes-py
  Building wheel for nes-py (setup.py) ... [?25l[?25hdone
  Created wheel for nes-py: filename=nes_py-8.1.8-cp37-cp37m-linux_x86_64.whl size=439148 sha256=8bacc10e76296a08e3785a7691b949dc4d1733b05a1da7757b4f802405170c47
  Stored in directory: /root/.cache/pip/wheels/f2/05/1f/608f15ab43187096eb5f3087506419c2d9772e97000f3ba025
Successfully built nes-py
Installing collected packages: nes-py, gym-super-mario-bros
Successfully installed gym-super-mario-bros-7.3.0 nes-py-8.1.8


In [2]:
# Import the game
import gym_super_mario_bros
# Import the Joypad wrapper
from nes_py.wrappers import JoypadSpace
# Import the SIMPLIFIED controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

In [None]:
# Setup game
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

In [None]:
SIMPLE_MOVEMENT

In [None]:
# Create a flag - restart or not
done = True
# Loop through each frame in the game
for step in range(100000): 
    # Start the game to begin with 
    if done: 
        # Start the gamee
        env.reset()
    # Do random actions
    state, reward, done, info = env.step(env.action_space.sample())
    # Show the game on the screen
    env.render()
# Close the game
env.close()

# 2. Preprocess Environment


In [None]:
# Install pytorch
!pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio===0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

Looking in links: https://download.pytorch.org/whl/cu113/torch_stable.html
Collecting torch==1.10.1+cu113
  Downloading https://download.pytorch.org/whl/cu113/torch-1.10.1%2Bcu113-cp37-cp37m-linux_x86_64.whl (1821.5 MB)
[K     |██████████████▋                 | 834.1 MB 1.4 MB/s eta 0:12:04tcmalloc: large alloc 1147494400 bytes == 0x56104ff42000 @  0x7f8f1d7eb615 0x56104d4833bc 0x56104d56418a 0x56104d4861cd 0x56104d578b3d 0x56104d4fa458 0x56104d4f502f 0x56104d487aba 0x56104d4fa2c0 0x56104d4f502f 0x56104d487aba 0x56104d4f6cd4 0x56104d579986 0x56104d4f6350 0x56104d579986 0x56104d4f6350 0x56104d579986 0x56104d4f6350 0x56104d487f19 0x56104d4cba79 0x56104d486b32 0x56104d4fa1dd 0x56104d4f502f 0x56104d487aba 0x56104d4f6cd4 0x56104d4f502f 0x56104d487aba 0x56104d4f5eae 0x56104d4879da 0x56104d4f6108 0x56104d4f502f
[K     |██████████████████▌             | 1055.7 MB 1.4 MB/s eta 0:08:51tcmalloc: large alloc 1434370048 bytes == 0x561094598000 @  0x7f8f1d7eb615 0x56104d4833bc 0x56104d56418a 0x561

In [None]:
# Install stable baselines for RL stuff
!pip install stable-baselines3[extra]

In [None]:
# Import Frame Stacker Wrapper and GrayScaling Wrapper
from gym.wrappers import GrayScaleObservation
# Import Vectorization Wrappers
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
# Import Matplotlib to show the impact of frame stacking
from matplotlib import pyplot as plt
import os    
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [None]:
# 1. Create the base environment
env = gym_super_mario_bros.make('SuperMarioBros-v0')
# 2. Simplify the controls 
env = JoypadSpace(env, SIMPLE_MOVEMENT)
# 3. Grayscale
env = GrayScaleObservation(env, keep_dim=True)
# 4. Wrap inside the Dummy Environment
env = DummyVecEnv([lambda: env])
# 5. Stack the frames
env = VecFrameStack(env, 4, channels_order='last')

In [None]:
state = env.reset()

In [None]:
plt.figure(figsize=(20,16))
for idx in range(state.shape[3]):
    plt.subplot(1,4,idx+1)
    plt.imshow(state[0][:,:,idx])


# 3. Train the RL Model

In [None]:
# Import os for file path management
import os 
# Import PPO for algos
from stable_baselines3 import PPO
# Import Base Callback for saving models
from stable_baselines3.common.callbacks import BaseCallback

ModuleNotFoundError: No module named 'stable_baselines3'

In [None]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [None]:
CHECKPOINT_DIR = './trainCNN/'
LOG_DIR = './logs/'

In [None]:
# Setup model saving callback
callback = TrainAndLoggingCallback(check_freq=50000, save_path=CHECKPOINT_DIR)

In [None]:
# This is the AI model started
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001, 
            n_steps=512) 

In [None]:
# This is the AI model started
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001, 
            n_steps=512) 

In [None]:
# Load model
model = PPO.load('./trainCNN/CNNPart1/best_model_600000')

In [None]:
# Train the AI model, this is where the AI model starts to learn
model.learn(total_timesteps=4000000, callback=callback)

In [None]:
model.save('thisisatestmodel')

# 4. Test it Out


In [None]:
# Load model
model = PPO.load('./trainCNN/best_model_1750000')

NameError: name 'PPO' is not defined

In [None]:
#Continue training a pretrained model
model.set_env(env)
model.learn(total_timesteps=4000000, callback=callback)

In [None]:
state = env.reset()

In [None]:
# Start the game 
state = env.reset()
# Loop through the game
while True: 
    
    action, _ = model1.predict(state)
    state, reward, done, info = env.step(action)
    env.render()

In [None]:
env.close()