Testing the Bot

In [None]:
import retro
import time
import os
import gym
from gym import Env
from gym.spaces import MultiBinary, Box, Discrete
import numpy as np
import cv2
from matplotlib import pyplot as plt
import optuna
from stable_baselines3 import PPO, A2C, DQN
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
import tensorboard
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from stable_baselines3.common.callbacks import BaseCallback

In [None]:
combos = [["DOWN", "LEFT"],["DOWN", "LEFT", "A"],["DOWN", "LEFT", "B"],["DOWN", "LEFT", "C"],["DOWN", "LEFT", "X"],["DOWN", "LEFT", "Y"], ["DOWN", "LEFT", "Z"], 
          ["DOWN", "RIGHT"], ["DOWN", "RIGHT", "A"],["DOWN", "RIGHT", "B"],["DOWN", "RIGHT", "C"],["DOWN", "RIGHT", "X"],["DOWN", "RIGHT", "Y"], ["DOWN", "RIGHT", "Z"], 
          ["DOWN"], ["DOWN", "A"], ["DOWN", "B"], ["DOWN", "C"], ["DOWN", "X"], ["DOWN", "Y"], ["DOWN", "Z"],
          ["UP", "LEFT"],["UP", "LEFT", "A"],["UP", "LEFT", "B"],["UP", "LEFT", "C"],["UP", "LEFT", "X"],["UP", "LEFT", "Y"], ["UP", "LEFT", "Z"], 
          ["UP", "RIGHT"], ["UP", "RIGHT", "A"],["UP", "RIGHT", "B"],["UP", "RIGHT", "C"],["UP", "RIGHT", "X"],["UP", "RIGHT", "Y"], ["UP", "RIGHT", "Z"], 
          ["UP"],["UP", "A"], ["UP", "B"], ["UP", "C"], ["UP", "X"], ["UP", "Y"], ["UP", "Z"],
          ["LEFT"],["LEFT", "A"], ["LEFT", "B"], ["LEFT", "C"], ["LEFT", "X"], ["LEFT", "Y"], ["LEFT", "Z"],
          ["RIGHT"],["RIGHT", "A"], ["RIGHT", "B"], ["RIGHT", "C"], ["RIGHT", "X"], ["RIGHT", "Y"], ["RIGHT", "Z"],
          ["A"],["B"],["C"],["X"],["Y"],["Z"]]


In [None]:
class Discretizer(gym.ActionWrapper):
    """
    Wrap a gym environment and make it use discrete actions.

    Args:
        combos: ordered list of lists of valid button combinations
    """

    def __init__(self, env, combos):
        super().__init__(env)
        assert isinstance(env.action_space, gym.spaces.MultiBinary)
        buttons = env.unwrapped.buttons
        self._decode_discrete_action = []
        for combo in combos:
            arr = np.array([0] * env.action_space.n)
            for button in combo:
                arr[buttons.index(button)] = 1
            self._decode_discrete_action.append(arr)

        self.action_space = gym.spaces.Discrete(len(self._decode_discrete_action))

    def action(self, act):
        return self._decode_discrete_action[act].copy()


class StreetFighterDiscretizer(Discretizer):
    def __init__(self, env):
        super().__init__(env=env, combos=combos)
class StreetFighter(Env):

    def __init__(self):
        super().__init__() 
        #the observation space is a 84x84 box with each value correp to a colour
        self.observation_space = Box(low=0, high=255, 
                                     shape=(84,84,1), dtype=np.uint8)
        
        #12-long vector where each action corresps to a 0 or a 1
        self.action_space = MultiBinary(12)
        self.buttons = ["B", "A", "MODE", "START", "UP", "DOWN", "LEFT", "RIGHT", "C", "Y", "X", "Z"]
        #start up an instance of the game
        #use restricted actions ensures that only valid button combinations are chosen
        self.game = retro.make(game="StreetFighterIISpecialChampionEdition-Genesis")
    
    def preprocess(self, observation):
        #turn to grey
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        #resize
        resize = cv2.resize(gray, (84,84), interpolation= cv2.INTER_CUBIC)
        #need to regain the channels value. need this for stable baselines (the RL package we use here)
        channels = np.reshape(resize, (84,84,1))
        return channels
        
    def render(self, *args, **kwargs):
        self.game.render()

    def step(self,action):
        #take a step
        obs, reward, done, info = self.game.step(action)

        #want to preprocess the observation
        obs = self.preprocess(obs)

        #frame delta: pixel change
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs

        #reshape the reward function. want the change in score, so we just subtract scores.
        #what other info can the game give us?
        reward = info['score'] - self.score
        self.score = info['score']

        return frame_delta, reward, done, info
    
    def reset(self):
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs 
        
        # Create a attribute to hold the score delta 
        self.score = 0 
        return obs
    
    def close(self):
        self.game.close()

In [None]:
try: 
    env.close()
except:
    pass

In [None]:
LOG_DIR = './PPOlogs/'
OPT_DIR = './PPOopt/'
#Create an instance of the environment
env = StreetFighter()
env = StreetFighterDiscretizer(env)
env = Monitor(env, LOG_DIR)
env = DummyVecEnv([lambda:env])
env = VecFrameStack(env, 4, channels_order='last')

In [None]:
#load the model
model = DQN('CnnPolicy',env, tensorboard_log=LOG_DIR, verbose=0, **model_params)
model.load('/Users/Cheks/Desktop/Durham /Durham Part 2/Data Science/Project/mySFBot/trained_bots/DQN_1000000.zip')

In [None]:
#test the model
obs = env.reset()
done = False
for game in range(1): 
    while not done: 
        if done: 
            obs = env.reset()
        env.render()
        action = model.predict(obs)[0]
        obs, reward, done, info = env.step(action)
        #time.sleep(0.005)
        print(reward)

In [None]:
mean_reward,_ = evaluate_policy(model, env, render=False, n_eval_episodes=30)
mean_reward