In [54]:
from pydantic import BaseModel, Field
from typing import Any, List, Optional

class GameState(BaseModel):
  generation: int
  round: int
  
  # configs
  donation_multiplier: float = 2
  trace_depth: int = 3
  base_wallet: int = 10
  generations: int = 10
  rounds: int = 12
  players: int = 12
  cutoff_threshold: float = 0.5




class Decision(BaseModel):
  recipient_snapshot: Any
  game_state_snapshot: GameState
  donation_percent: float
  
  class Config:
    arbitrary_types_allowed = True

class StrategyBuilder(BaseModel):
  """Build the strategy"""
  thoughts: List[str] = Field(..., description="Briefly describe your thought process for the strategy to take for this round.")
  strategy: str = Field(..., description="The strategy to be used, must begin with 'My strategy will be'.")

class DonationBuilder(BaseModel):
  """Build the donation"""
  thoughts: List[str] = Field(..., description="Briefly describe your thought process for the donation to make for this round.")
  donation: float = Field(..., description="The percentage amount of resources to donate. MUST BE A FLOAT BETWEEN 0 AND 1.")


class Player:
  def __init__(self, game_state: GameState, i: int, parents: List["Player"] = [], strategy: Optional[str] = None):
    self.name: str = f"{game_state.generation}_{i}"
    self.parents: List[Player] = parents
    self.history: List[Decision] = [] 
    self.wallet: int = game_state.base_wallet
    if strategy:
      self.strategy: str = strategy
    else:
      self.strategy: str = self.generate_strategy(game_state=game_state, parents=parents)

  
  def clone(self):
    player = Player(name=self.name, parents=self.parents, strategy=self.strategy)
    player.wallet = self.wallet
    player.history = self.history
    return player



  def system_prompt(self, game_state: GameState):
    return f"""Each player is given an initial endowment of 10 units of a resource.
In each round, you are randomly paired with another individual.
One of you is a donor, and the other is a recipient.
The donor decides to give up some amount of the resource.
The recipient receives {game_state.donation_multiplier}x the number of units that the donor gave up.
If you were a donor in one round, you will be a recipient in the next round, and vice versa.
Your goal is to maximize the number of units you have after the final round.
After the game has finished, the best-performing half of agents will survive to the next generation, and continue playing."""

  def strategy_prompt(self, parents: List["Player"] = []):
    if parents: 
      parent_advice = "\n".join([f"\n\n{parent.name} with score {parent.wallet} strategy: {parent.strategy}" for parent in parents])
      generation_prompt = f"""How would you approach the game? Here is the advice of the best-performing 50% of the previous generation, along with their final scores.
{parent_advice}
Modify this advice to create your own strategy."""
    else: 
      generation_prompt = "Based on the description of the game, create a strategy that you will follow in  the game."
    
    return f"""Our name is {self.name}.

{generation_prompt}

As a donor, you will receive limited information about the recipient and their recent interactions: You will see what the recipient (let’s call them A) did in the immediately preceding round only.
You will also see what A’s partner from that round (let’s call them B) did in their preceding round.
This chain continues for at most three rounds back.
Important: You do NOT have access to multiple rounds of information about A directly.
You only know what A did in the single most recent round.

Example (assuming it’s now round 4): You are paired with A.
You know: In round 3, A donated X% to B.
You also know: In round 2, B donated Y% to C.
And finally: In round 1, C donated Z% to D.
Remember: This is all the information you have.
You don’t know what A did in rounds 1 or 2, only in round 3.

In the first round, there will be no information about the recipient’s previous behavior - think carefully about how much to donate in that case.
Before formulating your strategy, briefly think step by step about what would be a successful strategy in this game.
Then describe your strategy briefly without explanation in one sentence that starts: My strategy will be."""
  
  def donation_prompt(self, game_state: GameState, recipient: "Player"):
    def build_trace(depth: int):
      trace = ""
      current_recipient = recipient
      for _ in range(depth):
        if not current_recipient.history:
          break
        trace += f"In round {current_recipient.history[0].game_state_snapshot.round}, {current_recipient.name} donated {current_recipient.history[0].donation_percent}% of their resources to {current_recipient.history[0].recipient_snapshot.name}.\n"
        current_recipient = current_recipient.history[0].recipient_snapshot
      return trace
    
    trace = build_trace(game_state.trace_depth)

    # TODO: donor is invisible to recipient
    return f"""Your name is 2_4.
As you will recall, here is the strategy you decided to follow: {self.strategy}
This is generation {game_state.generation}.
It is now round {game_state.round}.
In this round, you have been paired with {recipient.name}.
They currently have {recipient.wallet} units of the valuable resource.
Here is what {recipient.name} and their earlier partners did in previous rounds: {trace}
You will take the role of donor. You currently have {self.wallet} units of the valuable resource.

How many units do you give up? Very briefly think step by step about how you apply your strategy in this situation and then provide your answer."""
  
  def generate_strategy(self, game_state: GameState, parents: List["Player"] = []) -> str:
    res = client.beta.chat.completions.parse(
      model="gpt-4o-mini",
      messages=[
        {"role": "system", "content": self.system_prompt(game_state=game_state)},
        {"role": "user", "content": self.strategy_prompt(parents=parents)},
      ],
      response_format=StrategyBuilder
    )
    built_strategy = res.choices[0].message.parsed

    print(f"\n\n{self.name} strategy:\n{built_strategy.model_dump_json(indent=2)}")

    return built_strategy.strategy
  
  def generate_donation(self, game_state: GameState, recipient: "Player") -> float:
    res = client.beta.chat.completions.parse(
      model="gpt-4o-mini",
      messages=[
        {"role": "system", "content": self.system_prompt(game_state=game_state)},
        {"role": "user", "content": self.donation_prompt(game_state=game_state, recipient=recipient)},
      ],
      response_format=DonationBuilder
    )
    built_donation = res.choices[0].message.parsed

    print(f"\n\n{self.name} donation:\n{built_donation.model_dump_json(indent=2)}")

    return built_donation.donation
  
  def execute_donation(self, recipient: "Player", game_state: GameState):
    donation_percent = self.generate_donation(game_state, recipient)
    donation_amount = self.wallet * donation_percent
    self.wallet -= donation_amount
    recipient.wallet += donation_amount * game_state.donation_multiplier

    game_state_copy = GameState(**game_state.model_dump())
    recipient_copy = Player(game_state=game_state_copy, i = recipient.name.split("_")[1], parents=recipient.parents, strategy=recipient.strategy)
    decision = Decision(recipient_snapshot=recipient_copy, game_state_snapshot=game_state_copy, donation_percent=donation_percent)

    self.history.append(decision)
    
    return donation_percent
  

In [48]:
# test by creating two players
game_state = GameState(generation=1, round=1)
player1 = Player(game_state=game_state, i=1)
player2 = Player(game_state=game_state, i=2)



1_1 strategy:
{
  "thoughts": [
    "In the first round, as there is no prior information about the recipient's behavior, I will start with a small donation to test their willingness to reciprocate in the future rounds. As I gain information about their past behavior, I will adjust my donation strategy accordingly, potentially increasing it if they have shown a tendency to be generous, or decreasing it if they have been selfish. The key is to leverage the twofold return of donations while also protecting my own resources if the recipient is not likely to reciprocate."
  ],
  "strategy": "My strategy will be to start by donating 2 units in the first round and then adjust my donations based on the generosity displayed by my recipient in their previous round."
}


1_2 strategy:
{
  "thoughts": [
    "In the first round, I have no prior information about my recipient's behavior, so I should donate a moderate amount to encourage cooperation and signal trust, without risking too much of my

In [49]:
player1.execute_donation(player2, game_state)



1_1 donation:
{
  "thoughts": [
    "I recall my strategy is to donate 2 units in the first round.",
    "Since I have 10 units and this is my first round, I will stick to my original plan without any adjustments, as I have no prior information about 1_2's behavior.",
    "Donating 2 units will potentially yield a good return depending on how 1_2 behaves in future rounds."
  ],
  "donation": 0.2
}


0.2

In [50]:
print(player1.wallet)
print(player2.wallet)

8.0
14.0


In [71]:
from typing import Tuple
from typing import List
import numpy as np


class Orchestrator:
  def __init__(self, game_state: GameState):
    self.game_state = game_state
    self.players = [Player(game_state=game_state, i=i) for i in range(game_state.players)]
  
  def create_donor_recipient_pairs(self) -> List[Tuple[Player, Player]]:
    # each player should be both a donor and recipient and it should be randomized 
    pairs: List[Player] = []
    donors_map = {}
    recipient_pool: List[Player] = self.players.copy()
    donor_pool: List[Player] = self.players.copy()
    for donor in donor_pool:
      recipient = np.random.choice(recipient_pool)
      while recipient == donor or donors_map.get(recipient.name, "") == donor.name:
        recipient = np.random.choice(recipient_pool)
      pairs.append((donor, recipient))
      recipient_pool.remove(recipient)
    
    return pairs
  
  def play_round(self):
    pairs = self.create_donor_recipient_pairs()
    for donor, recipient in pairs:
      donor.execute_donation(recipient, self.game_state)
    self.game_state.round += 1

  def evolve(self):
    self.game_state.round = 1
    self.game_state.generation += 1
    print(f"\n\nEvolving to generation {self.game_state.generation}")
    # sort players by wallet
    self.players = sorted(self.players, key=lambda x: x.wallet, reverse=True)
    # get top half
    top_half = self.players[:int(len(self.players)*self.game_state.cutoff_threshold)]
    # clone players
    top_players_strings = "\n".join([f"player {player.name} with wallet {player.wallet} and strategy: {player.strategy}" for player in top_half])
    print(f"\n\nTop half:\n{top_players_strings}")
    self.players = [Player(game_state=self.game_state, i=i, parents=top_half) for i in range(self.game_state.players)]

  def run(self) -> List[Player]:
    for _ in range(self.game_state.generations):
      print(f"\n\nGeneration {self.game_state.generation}")
      for _ in range(self.game_state.rounds):
        print(f"\n\n\tGeneration {self.game_state.generation} Round {self.game_state.round}")
        self.play_round()
      if self.game_state.generation < self.game_state.generations:
        self.evolve()
    
    return self.players
    
  


    



In [75]:
game_state = GameState(generation=1, round=1, generations=4, rounds=6, players=4, cutoff_threshold=0.5)
orchestrator = Orchestrator(game_state)
final_players = orchestrator.run()

final_players = sorted(final_players, key=lambda x: x.wallet, reverse=True)
top_players_strings = "\n".join([f"player {player.name} with wallet {player.wallet} and strategy: {player.strategy}" for player in final_players])
print(f"\n\nFinal players: \n{top_players_strings}")



1_0 strategy:
{
  "thoughts": [
    "As a donor in the initial round, I have no information about the recipient A. I should consider starting with a conservative donation to avoid losing too many units early on. In subsequent rounds, my donation will be influenced by A's previous behavior, where if A donated generously, I will match that generosity or slightly increase it, and if A donated poorly, I may choose to donate less. The strategy should focus on building trust and cooperation, as reciprocation could maximize my resources over multiple rounds."
  ],
  "strategy": "My strategy will be to start with a small donation of 1 unit in the first round and adjust my donation in subsequent rounds based on A's previous round's behavior, aiming to build trust or retaliate as necessary."
}


1_1 strategy:
{
  "thoughts": [
    "In the first round, I cannot gauge the recipient's past behavior, so I will donate a small amount to test their willingness to reciprocate in the next round.",
    

KeyboardInterrupt: 