In [1]:
# Import retro to play Street Fighter using a ROM
import retro
# Import time to slow down game
import time
# Import environment base class for a wrapper 
from gym import Env 
# Import the space shapes for the environment
from gym.spaces import MultiBinary, Box
# Import numpy to calculate frame delta 
import numpy as np
# Import opencv for grayscaling
import cv2
# Import matplotlib for plotting the image
from matplotlib import pyplot as plt
# Import deque for the frame stack
from collections import deque
print(chr(sum(range(ord(min(str(not())))))))

In [3]:
# Class based on github
class StreetFighter(Env): # pass in basic env from above to preprocessing
    def __init__(self):
        super().__init__() # inherit from base env
        # Specify action space and observation space 
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8) # grayscaled frame, smaller amt of pixels
        self.action_space = MultiBinary(12) # type of actions that can be taken
        self.health = 144
        self.enemy_health = 144
        self.score = 0
        self.matches_won = 0
        self.continue_timer = 100
        self.enemy_matches_won = 0
        # self.previous_action = np.zeros(12)
        # self.combo_scaler = 1
        # self.last_damage_instance = 0
        # Startup and instance of the game 
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED) # used to get valid button combos
    
    def reset(self): # restart
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs # sets previous frame to current frame
        
        # Create a attribute to hold the score delta 
        self.score = 0 
        return obs
    
    def preprocess(self, observation): # grayscale, resize
        # Grayscaling 
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Resize 
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (84,84,1))
        return channels 
    
    def reward_function(self, state):
        # Extract variables
        continuetimer = state['continuetimer']
        enemy_matches_won = state['enemy_matches_won']
        enemy_health = state['enemy_health']
        health = state['health']
        matches_won = state['matches_won']
        score = state['score']

        # Initialize reward
        reward = 0

        # Reward for increasing score each frame (scaled down to avoid excessively large rewards)
        reward += score * 0.001  

        enemy_health_diff = self.enemy_health - enemy_health
        health_diff = self.health - health

        # catching edge cases to make sure no reward is being earned outside of a fight (i.e. in between rounds)
        if (self.enemy_health != 0 and state['enemy_health'] == 0 and self.health != 0 and state['health'] == 0) or (enemy_health_diff == 0 and health_diff == 0) or (self.health == 0 and self.enemy_health == 0):
            reward += 0
        else:
            if enemy_health_diff > health_diff:
                reward += ((enemy_health_diff) - (health_diff)) * 10
            else:
                reward += ((enemy_health_diff) - (health_diff))

        # Update previous states to enable frame-by-frame comparison
        self.enemy_health = enemy_health
        self.health = health
        self.matches_won = matches_won
        self.enemy_matches_won = enemy_matches_won
        self.continue_timer = continuetimer
        self.score = score
        # self.last_damage_instance += 1

        return reward
    
    def step(self, action): # how do we process action
        # Take a step 
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs) 

        self.previous_action = action
        
        # Frame delta 
        frame_delta = obs - self.previous_frame # change in pixels (was dropped in final model of tutorial)
        self.previous_frame = obs 
        
        # Reshape the reward function
        reward = self.reward_function(info)

        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs): # unpack any args and kwargs from stable baseline
        self.game.render()
        
    def close(self):
        self.game.close()

In [None]:
# score env
class StreetFighter(Env): # pass in basic env from above to preprocessing
    def __init__(self):
        super().__init__() # inherit from base env
        # Specify action space and observation space 
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8) # grayscaled frame, smaller amt of pixels
        self.action_space = MultiBinary(12) # type of actions that can be taken
        # Startup and instance of the game 
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED) # used to get valid button combos
    
    def reset(self): # restart
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs # sets previous frame to current frame
        
        # Create a attribute to hold the score delta 
        self.score = 0 
        return obs
    
    def preprocess(self, observation): # grayscale, resize
        # Grayscaling 
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Resize 
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (84,84,1))
        return channels 
    
    def step(self, action): # how do we process action
        # Take a step 
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs) 
        
        # Frame delta 
        frame_delta = obs - self.previous_frame # change in pixels (was dropped in final model of tutorial)
        self.previous_frame = obs 
        
        # Reshape the reward function
        reward = info['score'] - self.score 
        self.score = info['score'] 
        
        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs): # unpack any args and kwargs from stable baseline
        self.game.render()
        
    def close(self):
        self.game.close()

In [None]:
# Combo based reward function
class StreetFighter(Env): # pass in basic env from above to preprocessing
    def __init__(self):
        super().__init__() # inherit from base env
        # Specify action space and observation space 
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8) # grayscaled frame, smaller amt of pixels
        self.action_space = MultiBinary(12) # type of actions that can be taken
        self.health = 144
        self.enemy_health = 144
        self.score = 0
        self.matches_won = 0
        self.continue_timer = 100
        self.enemy_matches_won = 0
        self.previous_action = np.zeros(12)
        self.combo_scaler = 1
        self.last_damage_instance = 0
        # Startup and instance of the game 
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED) # used to get valid button combos
    
    def reset(self): # restart
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs # sets previous frame to current frame
        
        # Create a attribute to hold the score delta 
        self.score = 0 
        return obs
    
    def preprocess(self, observation): # grayscale, resize
        # Grayscaling 
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Resize 
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (84,84,1))
        return channels 
    
    def reward_function(self, state):
        # Extract variables
        continuetimer = state['continuetimer']
        enemy_matches_won = state['enemy_matches_won']
        enemy_health = state['enemy_health']
        health = state['health']
        matches_won = state['matches_won']
        score = state['score']

        # Initialize reward
        reward = 0

        # Reward for increasing score each frame (scaled down to avoid excessively large rewards)
        reward += score * 0.001  

        # Reward for inflicting damage on the enemy, only if enemy_health is reduced. Bonus for combos
        if enemy_health < self.enemy_health:
            if self.last_damage_instance < 200:
                self.combo_scaler *= 2
            else:
                self.combo_scaler = 1

            self.last_damage_instance = 0

            reward += (self.enemy_health - enemy_health) * 100 * self.combo_scaler

        # Penalty for losing health (scaled so health loss gives a clear penalty)
        if health < self.health:
            reward -= (self.health - health) * 100

        # Bonus if the player maintains health
        reward += health * 0.5  

        # Encourage the AI to win matches
        if matches_won > self.matches_won:
            reward += 2000  # Winning a match should give a significant reward

        # Penalize the AI if the enemy wins a match
        if enemy_matches_won > self.enemy_matches_won:
            reward -= 2000

        # Update previous states to enable frame-by-frame comparison
        self.enemy_health = enemy_health
        self.health = health
        self.matches_won = matches_won
        self.enemy_matches_won = enemy_matches_won
        self.continue_timer = continuetimer
        self.score = score
        self.last_damage_instance += 1

        return reward
    
    def step(self, action): # how do we process action
        # Take a step 
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs) 

        self.previous_action = action
        
        # Frame delta 
        frame_delta = obs - self.previous_frame # change in pixels (was dropped in final model of tutorial)
        self.previous_frame = obs 
        
        # Reshape the reward function
        reward = self.reward_function(info)

        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs): # unpack any args and kwargs from stable baseline
        self.game.render()
        
    def close(self):
        self.game.close()

In [None]:
# Claude version 1

class StreetFighter(Env):
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        
        # Initialize state tracking variables
        self.health = 144
        self.enemy_health = 144
        self.score = 0
        self.matches_won = 0
        self.enemy_matches_won = 0
        self.continue_timer = 100
        
        # Track damage patterns
        self.last_health_diff = 0
        self.last_enemy_health_diff = 0
        self.last_score = 0
        
        # Constants for reward shaping
        self.HEALTH_SCALE = 10.0
        self.ROUND_WIN_BONUS = 100.0
        self.MATCH_WIN_BONUS = 500.0
        self.DAMAGE_TRADE_SCALE = 1.5  # Reward for trading damage favorably
        
        self.game = retro.make(
            game='StreetFighterIISpecialChampionEdition-Genesis',
            use_restricted_actions=retro.Actions.FILTERED
        )

    def reward_function(self, state):
        """
        Reward function using only available state variables:
        - enemy_matches_won
        - score
        - matches_won
        - continuetimer
        - enemy_health
        - health
        """
        reward = 0
        
        # Extract current state
        enemy_health = state['enemy_health']
        health = state['health']
        score = state['score']
        matches_won = state['matches_won']
        enemy_matches_won = state['enemy_matches_won']
        continue_timer = state['continuetimer']
        
        # Calculate changes
        score_diff = score - self.last_score
        enemy_health_diff = self.enemy_health - enemy_health
        health_diff = self.health - health
        
        # Check if round is active (both healths > 0)
        is_round_active = (enemy_health > 0 or health > 0)
        
        if is_round_active:
            # Reward for dealing damage
            if enemy_health_diff > 0:
                reward += enemy_health_diff * self.HEALTH_SCALE
                
                # Extra reward for trading damage favorably
                if health_diff > 0:  # If also took damage
                    if enemy_health_diff > health_diff:  # But dealt more than received
                        reward += (enemy_health_diff - health_diff) * self.DAMAGE_TRADE_SCALE
            
            # Penalty for taking damage
            if health_diff > 0:
                reward -= health_diff * self.HEALTH_SCALE
            
            # Small reward for score increases (style points, etc)
            if score_diff > 0:
                reward += score_diff * 0.1
        
        # Round end rewards
        if self.enemy_health > 0 and enemy_health == 0:  # Won the round
            reward += self.ROUND_WIN_BONUS
            if matches_won > self.matches_won:  # Won the match
                reward += self.MATCH_WIN_BONUS
        
        # Round loss penalties
        if self.health > 0 and health == 0:  # Lost the round
            reward -= self.ROUND_WIN_BONUS / 2
            if enemy_matches_won > self.enemy_matches_won:  # Lost the match
                reward -= self.MATCH_WIN_BONUS / 2
        
        # Store current state for next frame comparison
        self.health = health
        self.enemy_health = enemy_health
        self.last_score = score
        self.matches_won = matches_won
        self.enemy_matches_won = enemy_matches_won
        self.continue_timer = continue_timer
        
        return reward

    def step(self, action):
        obs, _, done, info = self.game.step(action)
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        
        reward = self.reward_function(info)
        return frame_delta, reward, done, info

    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        
        # Reset state variables
        self.health = 144
        self.enemy_health = 144
        self.score = 0
        self.last_score = 0
        self.matches_won = 0
        self.enemy_matches_won = 0
        
        return obs

    def preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        channels = np.reshape(resize, (84, 84, 1))
        return channels

    def render(self, *args, **kwargs):
        self.game.render()

    def close(self):
        self.game.close()

In [None]:
# Claude version 2

class StreetFighter(Env):
    def __init__(self):
        super().__init__()
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        self.action_space = MultiBinary(12)
        
        # Initialize state tracking variables
        self.health = 144
        self.enemy_health = 144
        self.score = 0
        self.matches_won = 0
        self.enemy_matches_won = 0
        self.continue_timer = 100
        
        # Combo tracking variables
        self.damage_window = deque(maxlen=30)  # Track damage over 30 frames
        self.score_window = deque(maxlen=30)   # Track score changes over 30 frames
        self.current_combo = 0
        self.frames_since_last_hit = 0
        
        # Anti-spam tracking
        self.action_history = deque(maxlen=60)  # Track last 60 frames of actions
        self.last_hit_frame = 0                 # Track when we last dealt damage
        self.whiff_counter = 0                  # Count actions without score/health changes
        
        # Constants for reward shaping
        self.HEALTH_SCALE = 10.0
        self.ROUND_WIN_BONUS = 100.0
        self.MATCH_WIN_BONUS = 500.0
        self.DAMAGE_TRADE_SCALE = 1.5
        self.COMBO_SCALE = 2.0
        self.MAX_COMBO_BONUS = 5.0
        self.COMBO_TIMEOUT = 30
        
        # Spam prevention constants
        self.WHIFF_PENALTY = -2.0           # Penalty for missing attacks
        self.SPAM_THRESHOLD = 0.8           # Percentage of similar actions that triggers spam penalty
        self.SPAM_PENALTY = -5.0            # Penalty for move spamming
        
        self.game = retro.make(
            game='StreetFighterIISpecialChampionEdition-Genesis',
            use_restricted_actions=retro.Actions.FILTERED
        )
        
        # Initialize last state
        self.last_state = {
            'enemy_health': 144,
            'health': 144,
            'score': 0,
            'matches_won': 0,
            'enemy_matches_won': 0,
            'continuetimer': 100
        }

    def calculate_action_diversity(self):
        """
        Calculate how diverse the recent actions have been
        Returns a penalty if actions are too repetitive
        """
        if len(self.action_history) < 30:
            return 0
        
        # Convert binary actions to move types for easier analysis
        recent_moves = list(self.action_history)
        move_counts = {}
        
        for move in recent_moves:
            move_str = ''.join(map(str, move))
            move_counts[move_str] = move_counts.get(move_str, 0) + 1
        
        # Calculate the ratio of the most common move
        most_common_ratio = max(move_counts.values()) / len(recent_moves)
        
        # Apply penalty if the same move is being spammed
        if most_common_ratio > self.SPAM_THRESHOLD:
            return self.SPAM_PENALTY
        return 0

    def detect_combo(self, enemy_health_diff, score_diff):
        """
        Detect combos based on damage and score changes
        """
        # Update tracking windows
        self.damage_window.append(enemy_health_diff)
        self.score_window.append(score_diff)
        
        # If we dealt damage or got points this frame
        if enemy_health_diff > 0 or score_diff > 0:
            self.whiff_counter = 0  # Reset whiff counter on successful hit
            if self.frames_since_last_hit < self.COMBO_TIMEOUT:
                self.current_combo += 1
            else:
                self.current_combo = 1
            self.frames_since_last_hit = 0
            self.last_hit_frame = 0
        else:
            self.frames_since_last_hit += 1
            
            # If we're executing moves but not getting results
            if any(self.action_history[-1] if self.action_history else [0]):
                self.whiff_counter += 1
        
        # Reset combo if too much time has passed
        if self.frames_since_last_hit >= self.COMBO_TIMEOUT:
            self.current_combo = 0
            
        # Calculate combo multiplier
        combo_multiplier = min(1.0 + (self.current_combo * 0.5), self.MAX_COMBO_BONUS)
        
        # Detect if this seems to be a "true" combo
        recent_damage = sum(self.damage_window)
        recent_score = sum(self.score_window)
        
        is_true_combo = (
            self.current_combo > 1 and 
            (recent_damage > 10 or recent_score > 100)
        )
        
        return is_true_combo, combo_multiplier

    def reward_function(self, state):
        reward = 0
        
        # Extract current state
        enemy_health = state['enemy_health']
        health = state['health']
        score = state['score']
        matches_won = state['matches_won']
        enemy_matches_won = state['enemy_matches_won']
        
        # Calculate changes
        score_diff = score - self.last_state['score']
        enemy_health_diff = self.last_state['enemy_health'] - enemy_health
        health_diff = self.last_state['health'] - health
        
        # Check if round is active (either player has health)
        is_round_active = (enemy_health > 0 or health > 0)
        
        if is_round_active:
            # Detect combo state
            is_combo, combo_multiplier = self.detect_combo(enemy_health_diff, score_diff)
            
            # Reward for dealing damage, with combo scaling
            if enemy_health_diff > 0:
                base_damage_reward = enemy_health_diff * self.HEALTH_SCALE
                if is_combo:
                    reward += base_damage_reward * combo_multiplier
                    reward += self.COMBO_SCALE * self.current_combo
                else:
                    reward += base_damage_reward
                
                # Extra reward for trading damage favorably
                if health_diff > 0:
                    if enemy_health_diff > health_diff:
                        reward += (enemy_health_diff - health_diff) * self.DAMAGE_TRADE_SCALE
            
            # Apply anti-spam mechanics
            spam_penalty = self.calculate_action_diversity()
            whiff_penalty = self.WHIFF_PENALTY * min(self.whiff_counter, 5) if self.whiff_counter > 2 else 0
            
            reward += spam_penalty
            reward += whiff_penalty
            
            # Penalty for taking damage
            if health_diff > 0:
                reward -= health_diff * self.HEALTH_SCALE
                self.current_combo = 0
                self.frames_since_last_hit = self.COMBO_TIMEOUT
            
            # Small reward for score increases
            if score_diff > 0:
                if is_combo:
                    reward += score_diff * 0.2
                else:
                    reward += score_diff * 0.1
        
        # Round end rewards/penalties
        if self.last_state['enemy_health'] > 0 and enemy_health == 0:  # Won the round
            reward += self.ROUND_WIN_BONUS
            if matches_won > self.last_state['matches_won']:  # Won the match
                reward += self.MATCH_WIN_BONUS
        
        if self.last_state['health'] > 0 and health == 0:  # Lost the round
            reward -= self.ROUND_WIN_BONUS / 2
            if enemy_matches_won > self.last_state['enemy_matches_won']:  # Lost the match
                reward -= self.MATCH_WIN_BONUS / 2
        
        # Update last state
        self.last_state = {
            'enemy_health': enemy_health,
            'health': health,
            'score': score,
            'matches_won': matches_won,
            'enemy_matches_won': enemy_matches_won,
            'continuetimer': state['continuetimer']
        }
        
        return reward

    def step(self, action):
        obs, _, done, info = self.game.step(action)
        self.action_history.append(action)  # Track action for spam detection
        obs = self.preprocess(obs)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        
        reward = self.reward_function(info)
        return frame_delta, reward, done, info

    def reset(self):
        obs = self.game.reset()
        obs = self.preprocess(obs)
        self.previous_frame = obs
        
        # Reset state variables
        self.last_state = {
            'enemy_health': 144,
            'health': 144,
            'score': 0,
            'matches_won': 0,
            'enemy_matches_won': 0,
            'continuetimer': 100
        }
        
        # Reset tracking variables
        self.damage_window.clear()
        self.score_window.clear()
        self.action_history.clear()
        self.current_combo = 0
        self.frames_since_last_hit = self.COMBO_TIMEOUT
        self.whiff_counter = 0
        
        return obs

    def preprocess(self, observation):
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_CUBIC)
        channels = np.reshape(resize, (84, 84, 1))
        return channels

    def render(self, *args, **kwargs):
        self.game.render()

    def close(self):
        self.game.close()