In [None]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt

payoff = np.array([
    [[2, 2], [3, 0]], 
    [[0, 3], [4, 4]]
])

def get_payoff(player1_strategy, player2_strategy, player_id):
    return payoff[player1_strategy, player2_strategy][player_id]

class StagHuntAgent:
    def __init__(self, u_i=None, t_init=1.0, gamma_inc=1.1, gamma_dec=0.8, eta=0.1, noise_sigma=0.02):
        self.x = u_i if u_i is not None else 1 - ((get_payoff(0,0,0) - get_payoff(1,0,0)) / ((get_payoff(0,0,0) - get_payoff (1,0,0)) + (get_payoff(1,1,0) - get_payoff(0,1,0))))
        self.t = t_init  
        self.gamma_inc = gamma_inc
        self.gamma_dec = gamma_dec
        self.eta = eta  
        self.noise_sigma = noise_sigma  
    
    def expected_p(self):
        return (1 + self.x * self.t) / (2 + self.t)
    
    def make_decision(self):
        p = self.expected_p()
        E_V_hare = get_payoff(0, 0, 0) + p
        E_V_stag = get_payoff(1, 1, 0) * p
        D = E_V_stag - E_V_hare  
        return 1 if D > 0 else 0  
    
    def compute_new_trust(self, partner_choice):
        expected_choice = 1 if self.x > 0.5 else 0  
        new_trust = self.t * (self.gamma_inc if partner_choice == expected_choice else self.gamma_dec)
        return max(new_trust, 0.1)
    
    def compute_new_signal(self, P_obs):
        noise = np.random.normal(0, self.noise_sigma)  
        return np.clip(self.x + self.eta * (P_obs - self.x) + noise, 0, 1)
    
    def update_state(self, new_trust, new_signal):
        self.t = new_trust
        self.x = new_signal

def always_collaborate():
    return 1

def always_defect():
    return 0

def random_strategy():
    return random.choice([0, 1])

def tit_for_tat_cooperate(last_agent_choice=1):
    return last_agent_choice  

def tit_for_tat_defect(last_agent_choice=0):
    return last_agent_choice  

def adaptive_strategy(agent):
    return 1 if agent.x > 0.5 else 0  

def cheating_partner(round_num, cycle_length=5, cheat_duration=2):
    return 1 if (round_num % (cycle_length + cheat_duration)) < cycle_length else 0

def probabilistic_cheater():
    return 1 if random.random() < 0.7 else 0

def strategic_cheater(agent):
    return 0 if agent.t > 1.5 else 1

def expectation_violation_cheater(agent):
    """A partner that cheats by doing the opposite of what the agent expects."""
    return 0 if agent.x > 0.5 else 1

def run_single_agent_simulation(u_i, num_rounds=20, strategy=always_collaborate, initial_last_choice=1, strategy_kwargs={}, discount_factor=None):
    agent = StagHuntAgent(u_i=u_i)
    results = []
    last_agent_choice = initial_last_choice
    partner_choices = []
    
    for round_num in range(num_rounds):
        agent_choice = agent.make_decision()
        
        if strategy in [tit_for_tat_cooperate, tit_for_tat_defect]:
            partner_choice = strategy(last_agent_choice)
        elif strategy in [adaptive_strategy, strategic_cheater, expectation_violation_cheater]:
            partner_choice = strategy(agent)
        elif strategy == cheating_partner:
            partner_choice = strategy(round_num, **strategy_kwargs)
        else:
            partner_choice = strategy()
        
        partner_choices.append(partner_choice)

        # Calculate P_obs with discounting 
        if discount_factor is not None:
            weights = [discount_factor ** i for i in range(len(partner_choices)-1, -1, -1)] #This range gives us the indices of partner_choices in reverse order: from the most recent choice to the oldest choice.
            P_obs = sum(p * w for p, w in zip(partner_choices, weights)) / sum(weights) if partner_choices else agent.x
        else:
            P_obs = sum(partner_choices) / len(partner_choices) if partner_choices else agent.x

        new_trust = agent.compute_new_trust(partner_choice)
        new_signal = agent.compute_new_signal(P_obs)

        agent.update_state(new_trust, new_signal)

        last_agent_choice = agent_choice  
        results.append((round_num + 1, agent_choice, partner_choice, agent.x, agent.t))
    
    return results

# Set simulation parameters
num_rounds = 30  # Changed from 20 to test flexibility
discount_factor = 0.6  # Add discounted memory with a mild effect
# Run simulations with all strategies
sim_results_cheating = run_single_agent_simulation(u_i=None, num_rounds=num_rounds, strategy=cheating_partner, strategy_kwargs={"cycle_length": 4, "cheat_duration": 2}, discount_factor=discount_factor)
sim_results_strategic_cheater = run_single_agent_simulation(u_i=None, num_rounds=num_rounds, strategy=strategic_cheater, discount_factor=discount_factor)
sim_results_prob_cheater = run_single_agent_simulation(u_i=None, num_rounds=num_rounds, strategy=probabilistic_cheater, discount_factor=discount_factor)
sim_results_tit_for_tat_coop = run_single_agent_simulation(u_i=None, num_rounds=num_rounds, strategy=tit_for_tat_cooperate, initial_last_choice=1, discount_factor=discount_factor)
sim_results_tit_for_tat_defect = run_single_agent_simulation(u_i=None, num_rounds=num_rounds, strategy=tit_for_tat_defect, initial_last_choice=0, discount_factor=discount_factor)
sim_results_collab = run_single_agent_simulation(u_i=None, num_rounds=num_rounds, strategy=always_collaborate, discount_factor=discount_factor)
sim_results_defect = run_single_agent_simulation(u_i=None, num_rounds=num_rounds, strategy=always_defect, discount_factor=discount_factor)
sim_results_random = run_single_agent_simulation(u_i=None, num_rounds=num_rounds, strategy=random_strategy, discount_factor=discount_factor)
sim_results_adaptive = run_single_agent_simulation(u_i=None, num_rounds=num_rounds, strategy=adaptive_strategy, discount_factor=discount_factor)
sim_results_expect_violation = run_single_agent_simulation(u_i=None, num_rounds=num_rounds, strategy=expectation_violation_cheater, discount_factor=discount_factor)

# Convert to DataFrames
df_cheating = pd.DataFrame(sim_results_cheating, columns=["Round", "Agent_Choice", "Partner_Choice", "Agent_Signal", "Agent_Trust"])
df_strategic_cheater = pd.DataFrame(sim_results_strategic_cheater, columns=["Round", "Agent_Choice", "Partner_Choice", "Agent_Signal", "Agent_Trust"])
df_prob_cheater = pd.DataFrame(sim_results_prob_cheater, columns=["Round", "Agent_Choice", "Partner_Choice", "Agent_Signal", "Agent_Trust"])
df_collab = pd.DataFrame(sim_results_collab, columns=["Round", "Agent_Choice", "Partner_Choice", "Agent_Signal", "Agent_Trust"])
df_defect = pd.DataFrame(sim_results_defect, columns=["Round", "Agent_Choice", "Partner_Choice", "Agent_Signal", "Agent_Trust"])
df_random = pd.DataFrame(sim_results_random, columns=["Round", "Agent_Choice", "Partner_Choice", "Agent_Signal", "Agent_Trust"])
df_tit_for_tat_coop = pd.DataFrame(sim_results_tit_for_tat_coop, columns=["Round", "Agent_Choice", "Partner_Choice", "Agent_Signal", "Agent_Trust"])
df_tit_for_tat_defect = pd.DataFrame(sim_results_tit_for_tat_defect, columns=["Round", "Agent_Choice", "Partner_Choice", "Agent_Signal", "Agent_Trust"])
df_adaptive = pd.DataFrame(sim_results_adaptive, columns=["Round", "Agent_Choice", "Partner_Choice", "Agent_Signal", "Agent_Trust"])
df_expect_violation = pd.DataFrame(sim_results_expect_violation, columns=["Round", "Agent_Choice", "Partner_Choice", "Agent_Signal", "Agent_Trust"])


import matplotlib.pyplot as plt
import seaborn as sns

# Define a distinct color palette (using seaborn's "tab10" or "tab20" for more colors)
colors = sns.color_palette("tab20", 10)  # 10 distinct colors for 10 strategies

# Define line styles and markers to differentiate the lines
line_styles = ['-', '--', ':', '-.', '-', '--', ':', '-.', '-', '--']
markers = ['o', 's', '^', 'v', 'D', 'p', '*', 'h', 'x', '+']

# Visualize Trust Evolution
plt.figure(figsize=(14, 8))  # Increase figure size
plt.plot(df_cheating["Round"], df_cheating["Agent_Trust"], label="Cheating", 
         color=colors[0], linestyle=line_styles[0], marker=markers[0], alpha=0.7)
plt.plot(df_strategic_cheater["Round"], df_strategic_cheater["Agent_Trust"], label="Strategic Cheater", 
         color=colors[1], linestyle=line_styles[1], marker=markers[1], alpha=0.7)
plt.plot(df_prob_cheater["Round"], df_prob_cheater["Agent_Trust"], label="Prob Cheater", 
         color=colors[2], linestyle=line_styles[2], marker=markers[2], alpha=0.7)
plt.plot(df_collab["Round"], df_collab["Agent_Trust"], label="Always Collaborate", 
         color=colors[3], linestyle=line_styles[3], marker=markers[3], alpha=0.7)
plt.plot(df_defect["Round"], df_defect["Agent_Trust"], label="Always Defect", 
         color=colors[4], linestyle=line_styles[4], marker=markers[4], alpha=0.7)
plt.plot(df_random["Round"], df_random["Agent_Trust"], label="Random Strategy", 
         color=colors[5], linestyle=line_styles[5], marker=markers[5], alpha=0.7)
plt.plot(df_tit_for_tat_coop["Round"], df_tit_for_tat_coop["Agent_Trust"], label="Tit-for-Tat Coop", 
         color=colors[6], linestyle=line_styles[6], marker=markers[6], alpha=0.7)
plt.plot(df_tit_for_tat_defect["Round"], df_tit_for_tat_defect["Agent_Trust"], label="Tit-for-Tat Defect", 
         color=colors[7], linestyle=line_styles[7], marker=markers[7], alpha=0.7)
plt.plot(df_adaptive["Round"], df_adaptive["Agent_Trust"], label="Adaptive Strategy", 
         color=colors[8], linestyle=line_styles[8], marker=markers[8], alpha=0.7)
plt.plot(df_expect_violation["Round"], df_expect_violation["Agent_Trust"], label="Expectation Violation", 
         color=colors[9], linestyle=line_styles[9], marker=markers[9], alpha=0.7)

plt.xlabel("Round")
plt.ylabel("Agent Trust")
plt.title(f"Trust Evolution (Discount Factor = {discount_factor})")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')  # Move legend outside
plt.grid()
plt.xticks(ticks=np.arange(1, num_rounds + 1, step=5))
plt.tight_layout()  # Adjust layout to make room for the legend
plt.show()

# Visualize Signal Evolution
plt.figure(figsize=(14, 8))  # Increase figure size
plt.plot(df_cheating["Round"], df_cheating["Agent_Signal"], label="Cheating", 
         color=colors[0], linestyle=line_styles[0], marker=markers[0], alpha=0.7)
plt.plot(df_strategic_cheater["Round"], df_strategic_cheater["Agent_Signal"], label="Strategic Cheater", 
         color=colors[1], linestyle=line_styles[1], marker=markers[1], alpha=0.7)
plt.plot(df_prob_cheater["Round"], df_prob_cheater["Agent_Signal"], label="Prob Cheater", 
         color=colors[2], linestyle=line_styles[2], marker=markers[2], alpha=0.7)
plt.plot(df_collab["Round"], df_collab["Agent_Signal"], label="Always Collaborate", 
         color=colors[3], linestyle=line_styles[3], marker=markers[3], alpha=0.7)
plt.plot(df_defect["Round"], df_defect["Agent_Signal"], label="Always Defect", 
         color=colors[4], linestyle=line_styles[4], marker=markers[4], alpha=0.7)
plt.plot(df_random["Round"], df_random["Agent_Signal"], label="Random Strategy", 
         color=colors[5], linestyle=line_styles[5], marker=markers[5], alpha=0.7)
plt.plot(df_tit_for_tat_coop["Round"], df_tit_for_tat_coop["Agent_Signal"], label="Tit-for-Tat Coop", 
         color=colors[6], linestyle=line_styles[6], marker=markers[6], alpha=0.7)
plt.plot(df_tit_for_tat_defect["Round"], df_tit_for_tat_defect["Agent_Signal"], label="Tit-for-Tat Defect", 
         color=colors[7], linestyle=line_styles[7], marker=markers[7], alpha=0.7)
plt.plot(df_adaptive["Round"], df_adaptive["Agent_Signal"], label="Adaptive Strategy", 
         color=colors[8], linestyle=line_styles[8], marker=markers[8], alpha=0.7)
plt.plot(df_expect_violation["Round"], df_expect_violation["Agent_Signal"], label="Expectation Violation", 
         color=colors[9], linestyle=line_styles[9], marker=markers[9], alpha=0.7)

plt.xlabel("Round")
plt.ylabel("Agent Signal")
plt.title(f"Signal Evolution (Discount Factor = {discount_factor})")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')  # Move legend outside
plt.grid()
plt.xticks(ticks=np.arange(1, num_rounds + 1, step=5))
plt.tight_layout()  # Adjust layout to make room for the legend
plt.show()