Testing the Bot

In [None]:
import retro
import time
import os
from gym import Env
from gym.spaces import MultiBinary, Box
import numpy as np
import cv2
from matplotlib import pyplot as plt
import optuna
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack

In [None]:
#define custom gym env
class StreetFighter(Env):
    def __init__(self):
        super().__init__() 
        self.observation_space = Box(low=0, high=255, 
                                     shape=(84,84,1), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        self.game = retro.make(game="StreetFighterIISpecialChampionEdition-Genesis",
                               use_restricted_actions = retro.Actions.FILTERED)

    def step(self,action):
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        reward = info['score'] - self.score
        self.score = info['score']

        return frame_delta, reward, done, info

    def render(self, *args, **kwargs):
        self.game.render()

    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        self.score=0
        return obs

    def preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84,84), interpolation= cv2.INTER_CUBIC)
        channels = np.reshape(resize, (84,84,1))
        return channels

    def close(self):
        self.game.close()

In [None]:
try: 
    env.close()
except:
    pass

In [None]:
LOG_DIR = './PPOlogs/'
OPT_DIR = './PPOopt/'
#Create an instance of the environment
env = StreetFighter()
env = Monitor(env, LOG_DIR)
env = DummyVecEnv([lambda:env])
env = VecFrameStack(env, 4, channels_order='last')


In [None]:
#load the model
model_params = {'n_steps': 5568,
 'gamma': 0.8157202903839094,
 'learning_rate': 1.154858774456118e-06,
 'clip_range': 0.26012333935931625,
 'gae_lambda': 0.879540718426021}
model = PPO('CnnPolicy',env, tensorboard_log=LOG_DIR, verbose=0, **model_params)
#enter the path of the PPO model to test.
model.load('trained_bots/PPO_2800000.zip')

In [None]:
#test the model
obs = env.reset()
done = False
for game in range(1): 
    while not done: 
        if done: 
            obs = env.reset()
        env.render()
        action = model.predict(obs)[0]
        obs, reward, done, info = env.step(action)
        #time.sleep(0.005)
        print(reward)

In [None]:
mean_reward,_ = evaluate_policy(model, env, render=False, n_eval_episodes=30)
mean_reward