1. Setup Mario

In [17]:
!pip install gym_super_mario_bros==7.4.0 nes_py

Collecting gym_super_mario_bros==7.4.0
  Downloading gym_super_mario_bros-7.4.0-py3-none-any.whl (199 kB)
     -------------------------------------- 199.1/199.1 kB 2.4 MB/s eta 0:00:00
Installing collected packages: gym_super_mario_bros
  Attempting uninstall: gym_super_mario_bros
    Found existing installation: gym-super-mario-bros 7.3.0
    Uninstalling gym-super-mario-bros-7.3.0:
      Successfully uninstalled gym-super-mario-bros-7.3.0
Successfully installed gym_super_mario_bros-7.4.0


In [18]:
#import the game

import gym_super_mario_bros

#import the joypad emulator

from nes_py.wrappers import JoypadSpace

#import the simplified controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

In [19]:
SIMPLE_MOVEMENT  #SEVEN TYPE OF ACTIONS THAT CAN BE DONE BY MARIO , NOOP - no operation

[['NOOP'],
 ['right'],
 ['right', 'A'],
 ['right', 'B'],
 ['right', 'A', 'B'],
 ['A'],
 ['left']]

In [20]:
#setup game  environment
env = gym_super_mario_bros.make("SuperMarioBros-v3") #look for website for documnetation (just a type of lvl)

#wrap the environment with joystick
env = JoypadSpace(env, SIMPLE_MOVEMENT)

In [21]:
env.action_space  #the smaller number of output is beacuse of wrapping. And it helps the AI to learn faster

Discrete(7)

In [22]:
env.observation_space.shape #this is what we will get when we play the agian, the dimension of game frame

(240, 256, 3)

In [None]:
#create a flag - restart or not
done = True  

#loop through each frame in the game
for step in range(1000):
  #start the game to begin with
  if done:
    #start the game
    state = env.reset()
  #pass an action to our game: like jump,move.   env.action_space.sample():helps to take random action from SIMPLE_MOVEMENT
  state,reward,done,info,_ = env.step(env.action_space.sample())
  #show the game on screen
  env.render()

#close the game
env.close()

In [9]:
env.reset().shape

(240, 256, 3)

In [None]:
env.step(1)

2. Preprocess Environment

In [None]:
!pip install stable-baselines3

In [None]:
#import pytorch
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
#install stable baseline for RL Stuff
pip install stable-baselines3[extra]

In [None]:
#import frame stacker wrapper and grayscaling wrapper
from gym.wrappers import FrameStack, GrayScaleObservation

#import Vectorization wrappers
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv

#import matplotlib to show the impact of frame stacking
from matplotlib import pyplot as plt

In [None]:
from nes_py import NESEnv
_reset = NESEnv.reset

def reset(*args, **kwargs):
    obs_info = _reset(*args, **kwargs)
    obs, info = obs_info if type(obs_info) == tuple else (obs_info, {})
    return obs, info

NESEnv.reset = reset

In [None]:
#1. Create the base environment
env = gym_super_mario_bros.make("SuperMarioBros-v0")

#2. simplify the controls
env = JoypadSpace(env, SIMPLE_MOVEMENT)

#3.Grayscale
env = GrayScaleObservation(env, keep_dim = True) #keep_dim = True is needed to stack the frames in 5th step

#4.Wrap inside the Dummy Environment
env = DummyVecEnv([lambda : env])

#5. Stack the frames
env = VecFrameStack(env,4,channels_order='last')

In [None]:
state = env.reset()

In [None]:
state.shape #grayscaled value

In [None]:
state , reward, done, info = env.step([env.action_space.sample()])

In [None]:
#use matplotlib to plot the grayscale frame of mario
plt.imshow(state[0])

In [None]:
state.shape

In [None]:
#visualise
plt.figure(figsize = (10,8))
for idx in range(state.shape[3]):
  plt.subplot(1,4,idx+1)
  plt.imshow(state[0][:,:,idx])

plt.show()

"""
whenever you run : state , reward, done, info = env.step([env.action_space.sample()]) 
another frame is displayed, so run 4 times to display all the 4 stacked frame.
reset the environment to start over again
"""

3. Build the RL Model

In [None]:
#Import os for file path mgmt
import os

#import PPO (RL algorithm by OpenAI) 
from stable_baselines3 import PPO

#import Base Callback for saving models
#from stable_baselines3.common.callbacks import BaseCallback

In [None]:
#1 . create PPO model
model = PPO("CnnPolicy", #convolutional nueral network policy
            env,        
            verbose = 1,
            learning_rate = 0.000001,
            n_steps = 512 #frames to wait until the game update
            )

In [None]:
#2 . run the model
model.learn(total_timesteps = 10000)

4. Test it Out

In [None]:
model.save("RL-Mario-Model")

In [None]:
model.load("RL-Mario-Model")

In [None]:
state = env.reset()
model.predict(state)

In [None]:
SIMPLE_MOVEMENT[model.predict(state)[0][0]]  #KEY TO PRESS IN THIS PARTICULAR STATE

In [None]:
#test the model
state = env.reset()

#loop through the game
while True:
  
  action, _ = model.predict(state)
  state,reward, done, info = env.step(action)
  env.render()