In [1]:
# Import retro to play Street Fighter using a ROM
import retro
# Import time to slow down game
import time
# Import environment base class for a wrapper 
from gym import Env 
# Import the space shapes for the environment
from gym.spaces import MultiBinary, Box
# Import numpy to calculate frame delta 
import numpy as np
# Import opencv for grayscaling
import cv2
# Import matplotlib for plotting the image
from matplotlib import pyplot as plt
print(chr(sum(range(ord(min(str(not())))))))

In [3]:
# Class based on github
class StreetFighter(Env): # pass in basic env from above to preprocessing
    def __init__(self):
        super().__init__() # inherit from base env
        # Specify action space and observation space 
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8) # grayscaled frame, smaller amt of pixels
        self.action_space = MultiBinary(12) # type of actions that can be taken
        self.health = 144
        self.enemy_health = 144
        self.score = 0
        self.matches_won = 0
        self.continue_timer = 100
        self.enemy_matches_won = 0
        # self.previous_action = np.zeros(12)
        # self.combo_scaler = 1
        # self.last_damage_instance = 0
        # Startup and instance of the game 
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED) # used to get valid button combos
    
    def reset(self): # restart
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs # sets previous frame to current frame
        
        # Create a attribute to hold the score delta 
        self.score = 0 
        return obs
    
    def preprocess(self, observation): # grayscale, resize
        # Grayscaling 
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Resize 
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (84,84,1))
        return channels 
    
    def reward_function(self, state):
        # Extract variables
        continuetimer = state['continuetimer']
        enemy_matches_won = state['enemy_matches_won']
        enemy_health = state['enemy_health']
        health = state['health']
        matches_won = state['matches_won']
        score = state['score']

        # Initialize reward
        reward = 0

        # Reward for increasing score each frame (scaled down to avoid excessively large rewards)
        reward += score * 0.001  

        enemy_health_diff = self.enemy_health - enemy_health
        health_diff = self.health - health

        # catching edge cases to make sure no reward is being earned outside of a fight (i.e. in between rounds)
        if (self.enemy_health != 0 and state['enemy_health'] == 0 and self.health != 0 and state['health'] == 0) or (enemy_health_diff == 0 and health_diff == 0) or (self.health == 0 and self.enemy_health == 0):
            reward += 0
        else:
            if enemy_health_diff > health_diff:
                reward += ((enemy_health_diff) - (health_diff)) * 10
            else:
                reward += ((enemy_health_diff) - (health_diff))

        # Update previous states to enable frame-by-frame comparison
        self.enemy_health = enemy_health
        self.health = health
        self.matches_won = matches_won
        self.enemy_matches_won = enemy_matches_won
        self.continue_timer = continuetimer
        self.score = score
        # self.last_damage_instance += 1

        return reward
    
    def step(self, action): # how do we process action
        # Take a step 
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs) 

        self.previous_action = action
        
        # Frame delta 
        frame_delta = obs - self.previous_frame # change in pixels (was dropped in final model of tutorial)
        self.previous_frame = obs 
        
        # Reshape the reward function
        reward = self.reward_function(info)

        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs): # unpack any args and kwargs from stable baseline
        self.game.render()
        
    def close(self):
        self.game.close()

In [None]:
# score env
class StreetFighter(Env): # pass in basic env from above to preprocessing
    def __init__(self):
        super().__init__() # inherit from base env
        # Specify action space and observation space 
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8) # grayscaled frame, smaller amt of pixels
        self.action_space = MultiBinary(12) # type of actions that can be taken
        # Startup and instance of the game 
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED) # used to get valid button combos
    
    def reset(self): # restart
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs # sets previous frame to current frame
        
        # Create a attribute to hold the score delta 
        self.score = 0 
        return obs
    
    def preprocess(self, observation): # grayscale, resize
        # Grayscaling 
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Resize 
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (84,84,1))
        return channels 
    
    def step(self, action): # how do we process action
        # Take a step 
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs) 
        
        # Frame delta 
        frame_delta = obs - self.previous_frame # change in pixels (was dropped in final model of tutorial)
        self.previous_frame = obs 
        
        # Reshape the reward function
        reward = info['score'] - self.score 
        self.score = info['score'] 
        
        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs): # unpack any args and kwargs from stable baseline
        self.game.render()
        
    def close(self):
        self.game.close()

In [None]:
# Combo based reward function
class StreetFighter(Env): # pass in basic env from above to preprocessing
    def __init__(self):
        super().__init__() # inherit from base env
        # Specify action space and observation space 
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8) # grayscaled frame, smaller amt of pixels
        self.action_space = MultiBinary(12) # type of actions that can be taken
        self.health = 144
        self.enemy_health = 144
        self.score = 0
        self.matches_won = 0
        self.continue_timer = 100
        self.enemy_matches_won = 0
        self.previous_action = np.zeros(12)
        self.combo_scaler = 1
        self.last_damage_instance = 0
        # Startup and instance of the game 
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED) # used to get valid button combos
    
    def reset(self): # restart
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs # sets previous frame to current frame
        
        # Create a attribute to hold the score delta 
        self.score = 0 
        return obs
    
    def preprocess(self, observation): # grayscale, resize
        # Grayscaling 
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Resize 
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (84,84,1))
        return channels 
    
    def reward_function(self, state):
        # Extract variables
        continuetimer = state['continuetimer']
        enemy_matches_won = state['enemy_matches_won']
        enemy_health = state['enemy_health']
        health = state['health']
        matches_won = state['matches_won']
        score = state['score']

        # Initialize reward
        reward = 0

        # Reward for increasing score each frame (scaled down to avoid excessively large rewards)
        reward += score * 0.001  

        # Reward for inflicting damage on the enemy, only if enemy_health is reduced. Bonus for combos
        if enemy_health < self.enemy_health:
            if self.last_damage_instance < 200:
                self.combo_scaler *= 2
            else:
                self.combo_scaler = 1

            self.last_damage_instance = 0

            reward += (self.enemy_health - enemy_health) * 100 * self.combo_scaler

        # Penalty for losing health (scaled so health loss gives a clear penalty)
        if health < self.health:
            reward -= (self.health - health) * 100

        # Bonus if the player maintains health
        reward += health * 0.5  

        # Encourage the AI to win matches
        if matches_won > self.matches_won:
            reward += 2000  # Winning a match should give a significant reward

        # Penalize the AI if the enemy wins a match
        if enemy_matches_won > self.enemy_matches_won:
            reward -= 2000

        # Update previous states to enable frame-by-frame comparison
        self.enemy_health = enemy_health
        self.health = health
        self.matches_won = matches_won
        self.enemy_matches_won = enemy_matches_won
        self.continue_timer = continuetimer
        self.score = score
        self.last_damage_instance += 1

        return reward
    
    def step(self, action): # how do we process action
        # Take a step 
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs) 

        self.previous_action = action
        
        # Frame delta 
        frame_delta = obs - self.previous_frame # change in pixels (was dropped in final model of tutorial)
        self.previous_frame = obs 
        
        # Reshape the reward function
        reward = self.reward_function(info)

        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs): # unpack any args and kwargs from stable baseline
        self.game.render()
        
    def close(self):
        self.game.close()