In [1]:
import gymnasium
from gymnasium import spaces
import numpy as np

In [2]:
class CompetitiveAGIEnv(gymnasium.Env):
    """Competitive AGI Race Environment with Two Parties and State Validation"""

    def __init__(self):
        super(CompetitiveAGIEnv, self).__init__()

        # Define collaboration modes
        self.COLLAB_MODES = {
            'INDEPENDENT': 0,   # Both parties working independently (never collaborated)
            'COLLABORATIVE': 1, # Both parties actively collaborating
            'POST_COLLAB': 2,   # Both previously collaborated but now independent
        }
        
        # Define party statuses
        self.PARTY_STATUS = {
            'EXPLORING': 0,     # Actively exploring
            'RETREATED': 1,     # Retreated
            'FOUND_AGI': 2,     # Found AGI
        }
        
        # Collaboration mode (applies to both parties)
        self.collab_mode = self.COLLAB_MODES['INDEPENDENT']
        
        # Individual party statuses
        self.party_1_status = self.PARTY_STATUS['EXPLORING']
        self.party_2_status = self.PARTY_STATUS['EXPLORING']
        
        # Track which party has broken collaboration
        self.party_1_broke_collab = False
        self.party_2_broke_collab = False
        
        # Track resources
        self.party_1_resources = 100
        self.party_2_resources = 100
        
        # 4 actions for each party:
        # 0 = Explore independently
        # 1 = Retreat (take a break)
        # 2 = Initiate/continue collaboration
        # 3 = Break collaboration (if currently collaborating)
        # Each party chooses one action, so the combined action space is (4, 4)
        self.action_space = spaces.Tuple((spaces.Discrete(4), spaces.Discrete(4)))
        
        # The observation space includes:
        # - Collaboration mode (0, 1, 2)
        # - Party 1 status (0, 1, 2)
        # - Party 2 status (0, 1, 2)
        # - Party 1 resources (float)
        # - Party 2 resources (float)
        self.observation_space = spaces.Tuple((
            spaces.Discrete(3),  # collaboration mode
            spaces.Discrete(3),  # party_1_status
            spaces.Discrete(3),  # party_2_status
            spaces.Box(low=0, high=float('inf'), shape=(1,), dtype=np.float32),  # party_1_resources
            spaces.Box(low=0, high=float('inf'), shape=(1,), dtype=np.float32),  # party_2_resources
        ))
        
        # Base probabilities for AGI discovery
        self.independent_agi_prob = 0.01
        self.collaborative_agi_prob = 0.03
        self.post_collab_agi_prob = 0.015
        
        # Resource dynamics
        self.explore_independent_cost = 1
        self.explore_collaborative_cost = 3
        self.retreat_reward = 10
        self.collaboration_initiation_cost = 5
        self.agi_reward = 100
        
        # Competition penalty: if the opponent finds AGI first
        self.competition_penalty = -50
        
        # Max environment steps
        self.max_steps = 1000
        self.current_step = 0

    def _get_observation(self):
        """Return the current observation (state)."""
        return (
            self.collab_mode,
            self.party_1_status,
            self.party_2_status,
            np.array([self.party_1_resources], dtype=np.float32),
            np.array([self.party_2_resources], dtype=np.float32),
        )

    def _is_terminal(self):
        """Check if the episode has reached a terminal state."""
        return (self.party_1_status != self.PARTY_STATUS['EXPLORING'] or 
                self.party_2_status != self.PARTY_STATUS['EXPLORING'] or 
                self.current_step >= self.max_steps or
                self.party_1_resources <= 0 or 
                self.party_2_resources <= 0)

    def _get_terminal_info(self):
        """Gather information about how the episode ended."""
        info = {}
        
        if self.party_1_resources <= 0:
            info["bankrupt"] = "party_1"
        if self.party_2_resources <= 0:
            info["bankrupt"] = "party_2" if "bankrupt" not in info else "both"
        
        if self.current_step >= self.max_steps:
            info["timeout"] = True
            
        if self.party_1_status == self.PARTY_STATUS['FOUND_AGI'] and self.party_2_status == self.PARTY_STATUS['FOUND_AGI']:
            info["winner"] = "both"
        elif self.party_1_status == self.PARTY_STATUS['FOUND_AGI']:
            info["winner"] = "party_1"
        elif self.party_2_status == self.PARTY_STATUS['FOUND_AGI']:
            info["winner"] = "party_2"
        
        if "winner" not in info and "bankrupt" not in info and not info.get("timeout", False):
            # Someone retreated
            if self.party_1_status == self.PARTY_STATUS['RETREATED'] and self.party_2_status == self.PARTY_STATUS['RETREATED']:
                info["both_retreated"] = True
            elif self.party_1_status == self.PARTY_STATUS['RETREATED']:
                info["retreated"] = "party_1"
            elif self.party_2_status == self.PARTY_STATUS['RETREATED']:
                info["retreated"] = "party_2"
                
        return info

    def _process_collaboration_actions(self, action_1, action_2):
        """Process collaboration dynamics based on both parties' actions.
            HERE, True and False JUST INDICATE WHETHER WE MADE SOME CHANGE BASED ON THEIR STATE, AND COLLABORATION-RELATED ACTION OR NOT. 
            WE COULD HAVE AVOIDED TRUE, AND FALSE ALTOGETHER"""
        
        # Starting collaboration requires both parties to choose action 2
        if (self.collab_mode != self.COLLAB_MODES['COLLABORATIVE'] and 
            action_1 == 2 and action_2 == 2):
            
            # Both parties agree to collaborate
            self.collab_mode = self.COLLAB_MODES['COLLABORATIVE']
            self.party_1_resources -= self.collaboration_initiation_cost
            self.party_2_resources -= self.collaboration_initiation_cost
            return True
            
        # Breaking collaboration happens if either party chooses action 3
        elif (self.collab_mode == self.COLLAB_MODES['COLLABORATIVE'] and 
              (action_1 == 3 or action_2 == 3)):
            
            # Collaboration is broken
            self.collab_mode = self.COLLAB_MODES['POST_COLLAB']
            
            # Record who broke collaboration
            if action_1 == 3:
                self.party_1_broke_collab = True
            if action_2 == 3:
                self.party_2_broke_collab = True
            return True
            
        # Continuing collaboration requires both to choose action 2
        elif (self.collab_mode == self.COLLAB_MODES['COLLABORATIVE'] and 
              not (action_1 == 2 and action_2 == 2)):
            
            # Collaboration ends (not actively broken, but not continued)
            self.collab_mode = self.COLLAB_MODES['POST_COLLAB']
            return True
            
        return False

    def _get_agi_probability(self, party_id):
        """Get the probability of finding AGI based on collaboration mode."""
        if self.collab_mode == self.COLLAB_MODES['INDEPENDENT']:
            return self.independent_agi_prob
        elif self.collab_mode == self.COLLAB_MODES['COLLABORATIVE']:
            return self.collaborative_agi_prob
        else:  # POST_COLLAB
            return self.post_collab_agi_prob

    def step(self, action):
        """Take a step in the environment with actions from both parties."""
        pass

    def reset(self, seed=None, options=None):
        """Reset the environment to initial state."""
        if seed is not None:
            np.random.seed(seed)
            
        self.collab_mode = self.COLLAB_MODES['INDEPENDENT']
        self.party_1_status = self.PARTY_STATUS['EXPLORING']
        self.party_2_status = self.PARTY_STATUS['EXPLORING']
        self.party_1_broke_collab = False
        self.party_2_broke_collab = False
        self.party_1_resources = 100
        self.party_2_resources = 100
        self.current_step = 0
        
        return self._get_observation(), {}

    def close(self):
        """Clean up resources."""
        pass