# Street Fighter Tutorial
This notebook accompanies the YouTube tutorial on <a href='https://www.youtube.com/c/NicholasRenotte'>Nicholas Renotte</a>

# Setup StreetFighter

In [6]:
%pip install gym==0.25.2
%pip install --upgrade setuptools==66
%pip install importlib-metadata==4.8.1
%pip install pyglet==1.3.2

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting gym==0.21.0Note: you may need to restart the kernel to use updated packages.

  Using cached gym-0.21.0.tar.gz (1.5 MB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: gym
  Building wheel for gym (setup.py): started
  Building wheel for gym (setup.py): finished with status 'error'
  Running setup.py clean for gym
Failed to build gym


  error: subprocess-exited-with-error
  
  × python setup.py bdist_wheel did not run successfully.
  │ exit code: 1
  ╰─> [481 lines of output]
      running bdist_wheel
      running build
      running build_py
      creating build
      creating build\lib
      creating build\lib\gym
      copying gym\core.py -> build\lib\gym
      copying gym\error.py -> build\lib\gym
      copying gym\logger.py -> build\lib\gym
      copying gym\version.py -> build\lib\gym
      copying gym\__init__.py -> build\lib\gym
      creating build\lib\gym\envs
      copying gym\envs\registration.py -> build\lib\gym\envs
      copying gym\envs\__init__.py -> build\lib\gym\envs
      creating build\lib\gym\spaces
      copying gym\spaces\box.py -> build\lib\gym\spaces
      copying gym\spaces\dict.py -> build\lib\gym\spaces
      copying gym\spaces\discrete.py -> build\lib\gym\spaces
      copying gym\spaces\multi_binary.py -> build\lib\gym\spaces
      copying gym\spaces\multi_discrete.py -> build\lib\gym\

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [7]:
# Import retro to play Street Fighter using a ROM
import retro
# Import time to slow down game
import time

In [8]:
# See the different retro games
retro.data.list_games()

['1942-Nes',
 '1943-Nes',
 '3NinjasKickBack-Genesis',
 '8Eyes-Nes',
 'AaahhRealMonsters-Genesis',
 'AbadoxTheDeadlyInnerWar-Nes',
 'AcceleBrid-Snes',
 'ActRaiser2-Snes',
 'ActionPachio-Snes',
 'AddamsFamily-GameBoy',
 'AddamsFamily-Genesis',
 'AddamsFamily-Nes',
 'AddamsFamily-Sms',
 'AddamsFamily-Snes',
 'AddamsFamilyPugsleysScavengerHunt-Nes',
 'AddamsFamilyPugsleysScavengerHunt-Snes',
 'AdvancedBusterhawkGleylancer-Genesis',
 'Adventure-Atari2600',
 'AdventureIsland-GameBoy',
 'AdventureIsland3-Nes',
 'AdventureIslandII-Nes',
 'AdventuresOfBatmanAndRobin-Genesis',
 'AdventuresOfBayouBilly-Nes',
 'AdventuresOfDinoRiki-Nes',
 'AdventuresOfDrFranken-Snes',
 'AdventuresOfKidKleets-Snes',
 'AdventuresOfMightyMax-Genesis',
 'AdventuresOfMightyMax-Snes',
 'AdventuresOfRockyAndBullwinkleAndFriends-Genesis',
 'AdventuresOfRockyAndBullwinkleAndFriends-Nes',
 'AdventuresOfRockyAndBullwinkleAndFriends-Snes',
 'AdventuresOfStarSaver-GameBoy',
 'AdventuresOfYogiBear-Snes',
 'AeroFighters-Snes',
 

In [9]:
# python -m retro.import . # Run this from the roms folder, or where you have your game roms 

In [10]:
# Starts up the game environment
env = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis')

RuntimeError: Cannot create multiple emulator instances per process, make sure to call env.close() on each environment before creating a new one

In [None]:
# Closes the game environment - important given we can only run one at a time 
# env.close()

In [None]:
# Sample the observation space
env.observation_space

Box(0, 255, (200, 256, 3), uint8)

In [None]:
# Sample the actions that are avaialble - MultiBinary
env.action_space.sample()

array([0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0], dtype=int8)

In [None]:
# Reset game to starting state
obs = env.reset()
# Set flag to flase
done = False
for game in range(1): 
    while not done: 
        if done: 
            obs = env.reset()
        env.render()
        obs, reward, done, info = env.step(env.action_space.sample())
        time.sleep(0.01)
        print(reward)

ModuleNotFoundError: No module named 'gym.envs.classic_control.rendering'

In [None]:
env.close()

In [None]:
info

# Setup Environment
## What we are going to do! FUNNN
- Observation Preprocess - grayscale (DONE), frame delta, resize the frame so we have less pixels  (DONE) 
- Filter the action - parameter DONE
- Reward function - set this to the score

In [None]:
%pip install opencv-python




In [None]:
# Import environment base class for a wrapper 
from gym import Env 
# Import the space shapes for the environment
from gym.spaces import MultiBinary, Box
# Import numpy to calculate frame delta 
import numpy as np
# Import opencv for grayscaling
import cv2
# Import matplotlib for plotting the image
from matplotlib import pyplot as plt

In [None]:
# Create custom environment 
class StreetFighter(Env): 
    def __init__(self):
        super().__init__()
        # Specify action space and observation space 
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        # Startup and instance of the game 
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs 
        
        # Create a attribute to hold the score delta 
        self.score = 0 
        return obs
    
    def preprocess(self, observation): 
        # Grayscaling 
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Resize 
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (84,84,1))
        return channels 
    
    def step(self, action): 
        # Take a step 
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs) 
        
        # Frame delta 
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs 
        
        # Reshape the reward function
        reward = info['score'] - self.score 
        self.score = info['score'] 
        
        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs):
        self.game.render()
        
    def close(self):
        self.game.close()

In [None]:
env = StreetFighter()

RuntimeError: Cannot create multiple emulator instances per process, make sure to call env.close() on each environment before creating a new one

In [None]:
env.observation_space.shape

In [None]:
env.action_space.shape

In [None]:
# Reset game to starting state
obs = env.reset()
# Set flag to flase
done = False
for game in range(1): 
    while not done: 
        if done: 
            obs = env.reset()
        env.render()
        obs, reward, done, info = env.step(env.action_space.sample())
        time.sleep(0.01)
        if reward > 0: 
            print(reward)

In [None]:
obs = env.reset()

In [None]:
obs, reward, done, info = env.step(env.action_space.sample())

In [None]:
plt.imshow(cv2.cvtColor(obs, cv2.COLOR_BGR2RGB))

# Hyperparameter tune

In [None]:
%pip install torch==1.10.2+cu113 torchvision==0.11.3+cu113 torchaudio===0.10.2+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

Looking in links: https://download.pytorch.org/whl/cu113/torch_stable.html
Note: you may need to restart the kernel to use updated packages.


In [None]:
%pip install optuna

Collecting optunaNote: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
rich 6.2.0 requires typing-extensions<4.0.0,>=3.7.4, but you have typing-extensions 4.7.1 which is incompatible.



  Using cached optuna-3.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Using cached alembic-1.12.1-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Using cached colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting sqlalchemy>=1.3.0 (from optuna)
  Using cached SQLAlchemy-2.0.23-cp37-cp37m-win_amd64.whl.metadata (9.8 kB)
Collecting PyYAML (from optuna)
  Using cached PyYAML-6.0.1-cp37-cp37m-win_amd64.whl.metadata (2.1 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Using cached Mako-1.2.4-py3-none-any.whl (78 kB)
Collecting typing-extensions>=4 (from alembic>=1.5.0->optuna)
  Using cached typing_extensions-4.7.1-py3-none-any.whl.metadata (3.1 kB)
Collecting greenlet!=0.4.17 (from sqlalchemy>=1.3.0->optuna)
  Using cached greenlet-3.0.1-cp37-cp37m-win_amd64.whl.metadata (3.8 kB)
Collecting MarkupSafe>=0.9.2 (from Mako->alembic>=1.5.0->optuna)
  Using cached MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl.metadata (3.1 kB)
Using cach

In [None]:
%pip install stable-baselines3[extra]

Collecting stable-baselines3[extra]
  Using cached stable_baselines3-2.1.0-py3-none-any.whl.metadata (5.2 kB)
Collecting tensorboard>=2.9.1 (from stable-baselines3[extra])
  Using cached tensorboard-2.14.0-py3-none-any.whl.metadata (1.8 kB)
Collecting rich (from stable-baselines3[extra])
  Using cached rich-13.6.0-py3-none-any.whl.metadata (18 kB)
Collecting shimmy~=1.1.0 (from shimmy[atari]~=1.1.0; extra == "extra"->stable-baselines3[extra])
  Using cached Shimmy-1.1.0-py3-none-any.whl.metadata (3.3 kB)
Collecting autorom~=0.6.1 (from autorom[accept-rom-license]~=0.6.1; extra == "extra"->stable-baselines3[extra])
  Using cached AutoROM-0.6.1-py3-none-any.whl (9.4 kB)
Collecting AutoROM.accept-rom-license (from autorom[accept-rom-license]~=0.6.1; extra == "extra"->stable-baselines3[extra])
  Using cached AutoROM.accept_rom_license-0.6.1-py3-none-any.whl
Collecting ale-py~=0.8.1 (from shimmy[atari]~=1.1.0; extra == "extra"->stable-baselines3[extra])
  Using cached ale_py-0.8.1-cp38-cp38

In [1]:
# Importing the optimzation frame - HPO
import optuna
# PPO algo for RL
from stable_baselines3 import PPO
# Bring in the eval policy method for metric calculation
from stable_baselines3.common.evaluation import evaluate_policy
# Import the sb3 monitor for logging 
from stable_baselines3.common.monitor import Monitor
# Import the vec wrappers to vectorize and frame stack
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
# Import os to deal with filepaths
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
LOG_DIR = './logs/'
OPT_DIR = './opt/'

In [3]:
# Function to return test hyperparameters - define the object function
def optimize_ppo(trial): 
    return {
        'n_steps':trial.suggest_int('n_steps', 2048, 8192),
        'gamma':trial.suggest_loguniform('gamma', 0.8, 0.9999),
        'learning_rate':trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
        'clip_range':trial.suggest_uniform('clip_range', 0.1, 0.4),
        'gae_lambda':trial.suggest_uniform('gae_lambda', 0.8, 0.99)
    }

In [4]:
SAVE_PATH = os.path.join(OPT_DIR, 'trial_{}_best_model'.format(1))

In [5]:
# Run a training loop and return mean reward 
def optimize_agent(trial):
    try:
        model_params = optimize_ppo(trial) 

        # Create environment 
        env = StreetFighter()
        env = Monitor(env, LOG_DIR)
        env = DummyVecEnv([lambda: env])
        env = VecFrameStack(env, 4, channels_order='last')

        # Create algo 
        model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=0, **model_params)
        model.learn(total_timesteps=30000)
        #model.learn(total_timesteps=100000)

        # Evaluate model 
        mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=5)
        env.close()

        SAVE_PATH = os.path.join(OPT_DIR, 'trial_{}_best_model'.format(trial.number))
        model.save(SAVE_PATH)

        return mean_reward

    except Exception as e:
        return -1000

In [6]:
# Creating the experiment 
study = optuna.create_study(direction='maximize')
# study.optimize(optimize_agent, n_trials=10, n_jobs=1)
study.optimize(optimize_agent, n_trials=100, n_jobs=1)

[I 2023-11-14 00:50:43,048] A new study created in memory with name: no-name-8c99e2f4-9e6c-40b3-968a-f99faa77c4c1
  'gamma':trial.suggest_loguniform('gamma', 0.8, 0.9999),
  'learning_rate':trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
  'clip_range':trial.suggest_uniform('clip_range', 0.1, 0.4),
  'gae_lambda':trial.suggest_uniform('gae_lambda', 0.8, 0.99)
[I 2023-11-14 00:50:43,064] Trial 0 finished with value: -1000.0 and parameters: {'n_steps': 2578, 'gamma': 0.9442301795777465, 'learning_rate': 4.43203095674032e-05, 'clip_range': 0.13869761349106371, 'gae_lambda': 0.9803850284549107}. Best is trial 0 with value: -1000.0.
[I 2023-11-14 00:50:43,073] Trial 1 finished with value: -1000.0 and parameters: {'n_steps': 2368, 'gamma': 0.9172616244252935, 'learning_rate': 4.254610061977688e-05, 'clip_range': 0.2915621797271041, 'gae_lambda': 0.8347986014456947}. Best is trial 0 with value: -1000.0.
[I 2023-11-14 00:50:43,078] Trial 2 finished with value: -1000.0 and parameters: {'

In [None]:
study.best_params

{'n_steps': 6177,
 'gamma': 0.9141035935776617,
 'learning_rate': 4.555647896453844e-05,
 'clip_range': 0.17950577872799717,
 'gae_lambda': 0.8532103042940042}

In [None]:
study.best_trial

FrozenTrial(number=0, state=TrialState.COMPLETE, values=[-1000.0], datetime_start=datetime.datetime(2023, 11, 13, 22, 45, 46, 417448), datetime_complete=datetime.datetime(2023, 11, 13, 22, 45, 46, 454464), params={'n_steps': 6177, 'gamma': 0.9141035935776617, 'learning_rate': 4.555647896453844e-05, 'clip_range': 0.17950577872799717, 'gae_lambda': 0.8532103042940042}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_steps': IntDistribution(high=8192, log=False, low=2048, step=1), 'gamma': FloatDistribution(high=0.9999, log=True, low=0.8, step=None), 'learning_rate': FloatDistribution(high=0.0001, log=True, low=1e-05, step=None), 'clip_range': FloatDistribution(high=0.4, log=False, low=0.1, step=None), 'gae_lambda': FloatDistribution(high=0.99, log=False, low=0.8, step=None)}, trial_id=0, value=None)

In [None]:
model = PPO.load(os.path.join(OPT_DIR, 'trial_5_best_model.zip'))

FileNotFoundError: [Errno 2] No such file or directory: 'opt\\trial_5_best_model.zip.zip'

# Setup Callback

In [None]:
# Import base callback 
from stable_baselines3.common.callbacks import BaseCallback

In [None]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [None]:
CHECKPOINT_DIR = './train/'

In [None]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

# Train Model

In [None]:
# Create environment 
env = StreetFighter()
env = Monitor(env, LOG_DIR)
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, 4, channels_order='last')

In [None]:
model_params = study.best_params
model_params['n_steps'] = 7488  # set n_steps to 7488 or a factor of 64
# model_params['learning_rate'] = 5e-7
model_params

In [None]:
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, **model_params)

In [None]:
# Reload previous weights from HPO
model.load(os.path.join(OPT_DIR, 'trial_5_best_model.zip'))

In [None]:
# Kick off training 
model.learn(total_timesteps=100000, callback=callback)
# model.learn(total_timestep=5000000) 

# Evaluate the Model

In [None]:
model = PPO.load('./train/best_model_10000.zip')

In [None]:
mean_reward, _ = evaluate_policy(model, env, render=True, n_eval_episodes=1)

In [None]:
mean_reward

# Test out the Model

In [None]:
obs = env.reset()

In [None]:
obs.shape

In [None]:
env.step(model.predict(obs)[0])

In [None]:
# Reset game to starting state
obs = env.reset()
# Set flag to flase
done = False
for game in range(1): 
    while not done: 
        if done: 
            obs = env.reset()
        env.render()
        action = model.predict(obs)[0]
        obs, reward, done, info = env.step(action)
        time.sleep(0.01)
        print(reward)