In [13]:
import pandas as pd
import numpy as np
import random
import re
import time
from mistralai import Mistral
from tqdm import tqdm # <--- NEW IMPORT
import os
import pickle

# --- CONFIGURATION ---
API_KEY = "4aSIsAS2QW3dzgQuckWLz4hC7bN9ZBfw"  # <--- Paste your Key here
MODEL = "magistral-small-2509" # Or "mistral-small-2509"

# Experiment Parameters
N_AGENTS = 2
N_ARMS = 10
N_ROUNDS = 1
ARMS_PER_AGENT = 2  # How many arms each agent gets per round

# Set this to False to run the REAL experiment (costs money/credits)
# Set this to True to test the code logic quickly (free)
MOCK_MODE = False 

# Initialize Mistral Client
if not MOCK_MODE:
    client = Mistral(api_key=API_KEY)
else:
    client = None

print(f"Setup Complete. Mock Mode is: {MOCK_MODE}")

Setup Complete. Mock Mode is: False


In [14]:
import time
import numpy as np

# --- 1. DEFINE THE ENVIRONMENT (ARMS) ---
np.random.seed(42) 
TRUE_ARM_MEANS = np.clip(np.random.normal(loc=0.5, scale=0.15, size=N_ARMS), 0.0, 1.0)

def get_arm_reward(arm_index):
    mean = TRUE_ARM_MEANS[arm_index]
    reward = np.random.normal(loc=mean, scale=0.05)
    return round(reward, 4)

# --- 2. ROBUST MISTRAL API WRAPPER (With Rate Limit Handling) ---
def call_mistral(system_prompt, user_prompt):
    """
    Handles the API call with a Retry Loop and Exponential Backoff.
    This prevents the code from crashing if Mistral says 'Too Many Requests'.
    """
    if MOCK_MODE:
        return "MOCK_RESPONSE"

    max_retries = 5
    base_wait = 5 # Start waiting 5 seconds
    
    for attempt in range(max_retries):
        try:
            # Attempt the API Call
            response = client.chat.complete(
                model=MODEL,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ]
            )
            
            # Parse Response
            msg = response.choices[0].message
            content = msg.content

            if isinstance(content, str):
                return content
            else:
                visible_text = []
                for chunk in content:
                    if getattr(chunk, "type", None) == "text":
                        visible_text.append(getattr(chunk, "text", ""))
                return "".join(visible_text).strip()

        except Exception as e:
            # Check if it's a Rate Limit (usually contains "429" or "Rate limit")
            error_msg = str(e).lower()
            is_rate_limit = "429" in error_msg or "rate limit" in error_msg
            
            wait_time = base_wait * (2 ** attempt) # 5s, 10s, 20s, 40s...
            
            print(f"\n[API Warning] Attempt {attempt+1}/{max_retries} failed.")
            print(f"Error: {e}")
            
            if is_rate_limit:
                print(f"Rate Limit Hit. Sleeping for {wait_time} seconds...")
            else:
                print(f"Generic Error. Retrying in {wait_time} seconds...")
                
            time.sleep(wait_time)

    # If we exit the loop, we failed 5 times in a row.
    print("\n[CRITICAL FAIL] Max retries exceeded. Returning fallback.")
    return "ERROR: MAX RETRIES EXCEEDED"

In [15]:
def get_empty_structure():
    """Helper to create unique dictionary objects for the DataFrame."""
    return {
        'assigned_arms': [], 
        'chosen_arm': [], 
        'payoff': []
    }

def initialize_experiment_log(num_agents, num_iterations):
    """Creates the main DataFrame."""
    data = []
    for _ in range(num_iterations):
        row_data = {}
        for i in range(num_agents):
            row_data[f"Agent_{i}"] = get_empty_structure()
        row_data['Total_Payoff'] = 0.0
        data.append(row_data)
    
    return pd.DataFrame(data)

def initialize_message_log():
    """Creates the list to store message history."""
    return []

# Create the structures
experiment_df = initialize_experiment_log(N_AGENTS, N_ROUNDS)
message_log = initialize_message_log()

print("Data structures initialized.")

Data structures initialized.


In [16]:
class Agent:
    def __init__(self, agent_id, total_agents, total_arms, total_rounds):
        self.agent_id = f"Agent_{agent_id}"
        self.id_num = agent_id
        self.total_agents = total_agents
        self.total_arms = total_arms
        self.total_rounds = total_rounds
        
        # Memory
        self.history = []  # List of dicts: {'round', 'arm', 'payoff'}
        self.inbox = []    # List of messages received in current round
        
    def get_system_prompt(self):
        return (
            f"You are {self.agent_id}, participating in a cooperative Multi-Armed Bandit game. "
            f"There are {self.total_agents} agents, {self.total_arms} total arms, "
            f"and the game lasts {self.total_rounds} rounds. "
            "Your goal is to maximize the team's average reward."
        )

    def _format_history(self):
        """
        Format: Round X: Pulled Arm Y, Reward: Z
        UPDATED: Returns ALL history, not just the last 5.
        """
        # recent = self.history[-5:] <--- OLD CODE (Deleted)
        recent = self.history      # <--- NEW CODE (Keep everything)
        
        if not recent:
            return "No history yet."
            
        formatted = []
        for record in recent:
            line = (f"Round {record['round']}: Pulled Arm {record['arm']}, "
                    f"Reward: {record['payoff']:.4f}")
            formatted.append(line)
        return "\n".join(formatted)

    # --- PHASE 1: DECIDE WHO TO MESSAGE ---
    def generate_message(self, current_round, assigned_arms, assignment_map):
        
        hist_str = self._format_history()
        
        user_prompt = (
            f"--- ROUND {current_round} ---\n"
            f"My Assigned Arms: {assigned_arms}\n"
            f"All Agents' Assignments: {assignment_map}\n"
            f"My Recent History:\n{hist_str}\n\n"
            "Task: Based on your history and the map, choose one agent to message. "
            "Tell them something useful.\n"
            "Response Format: 'TO: Agent_X | MSG: <content>'\n"
            "Example: 'TO: Agent_4 | MSG: I tried arm 2 and got 0.8 reward.'"
        )
        
        # Get raw text from LLM
        response_text = call_mistral(self.get_system_prompt(), user_prompt)
        
        # Parse the response
        target_agent = None
        message_content = "No message"
        
        if MOCK_MODE:
            # Random logic for testing
            target_id = random.randint(0, self.total_agents - 1)
            target_agent = f"Agent_{target_id}"
            message_content = f"Hello from {self.agent_id}"
        else:
            # Regex to find "TO: Agent_X" and "MSG: ..."
            match = re.search(r"TO:\s*(Agent_\d+).*?MSG:\s*(.*)", response_text, re.DOTALL | re.IGNORECASE)
            if match:
                target_agent = match.group(1).strip()
                message_content = match.group(2).strip()
            else:
                # Fallback if format is wrong
                print(f"Format Warning {self.agent_id}: {response_text[:50]}...")
        
        return target_agent, message_content

    # --- INTERMEDIATE: RECEIVE ---
    def receive_message(self, sender_id, content):
        self.inbox.append(f"From {sender_id}: {content}")

    # --- PHASE 2: DECIDE WHICH ARM TO PULL ---
    def make_choice(self, assigned_arms):
        
        inbox_text = "\n".join(self.inbox) if self.inbox else "No messages received."
        
        user_prompt = (
            f"You have received the following messages from other agents:\n{inbox_text}\n\n"
            f"Considering your history and this new advice, which arm will you pull from {assigned_arms}?\n"
            "Return ONLY the integer of the arm."
        )
        
        # We include the system prompt again to ensure persona is kept
        response_text = call_mistral(self.get_system_prompt(), user_prompt)
        
        choice = None
        
        if MOCK_MODE:
            choice = random.choice(assigned_arms)
        else:
            # Extract first number found in response
            numbers = re.findall(r'\d+', response_text)
            if numbers:
                choice = int(numbers[0])
                # Validation: Ensure choice is in assigned_arms
                if choice not in assigned_arms:
                    choice = random.choice(assigned_arms) # Fallback
            else:
                choice = random.choice(assigned_arms) # Fallback
                
        # Clear inbox for next round
        self.inbox = []
        
        return choice

    def update_history(self, round_num, arm, payoff):
        self.history.append({'round': round_num, 'arm': arm, 'payoff': payoff})

In [17]:
class RandomManager:
    def __init__(self, num_arms, num_agents, arms_per_agent):
        self.num_arms = num_arms
        self.num_agents = num_agents
        self.arms_per_agent = arms_per_agent
        self.all_arms = list(range(num_arms))

    def assign_arms(self):
        """
        Assigns arms ensuring full coverage while completely eliminating
        agent bias (e.g., Agent 0 always getting more arms).
        """
        assignment = {f"Agent_{i}": [] for i in range(self.num_agents)}
        
        # --- STEP 1: RANDOMIZE EVERYTHING ---
        
        # A. Shuffle the "Deck" of Arms
        deck_of_arms = self.all_arms[:]
        random.shuffle(deck_of_arms)
        
        # B. Shuffle the "Seating Order" of Agents
        # This ensures the 'remainder' arms don't always fall to Agent 0-3
        agent_order = list(range(self.num_agents))
        random.shuffle(agent_order)
        
        # --- STEP 2: DEAL FOR COVERAGE ---
        # Deal the arms one by one to the agents in their random order
        for i, arm in enumerate(deck_of_arms):
            # Use modulo on the loop counter 'i', NOT the arm ID
            # This ensures we cycle through the agents 0, 1, 2... evenly
            target_agent_idx = agent_order[i % self.num_agents]
            assignment[f"Agent_{target_agent_idx}"].append(arm)
            
        # --- STEP 3: FILL QUOTAS (TOP UP) ---
        # Now we ensure everyone reaches 'arms_per_agent'
        for i in range(self.num_agents):
            agent_key = f"Agent_{i}"
            current_count = len(assignment[agent_key])
            needed = self.arms_per_agent - current_count
            
            if needed > 0:
                # Pick random extra arms that this agent doesn't have yet
                available = [x for x in self.all_arms if x not in assignment[agent_key]]
                
                # Safety check: if needed > available (rare edge case), take all available
                take_count = min(len(available), needed)
                
                if take_count > 0:
                    new_picks = random.sample(available, take_count)
                    assignment[agent_key].extend(new_picks)
        
        return assignment

In [18]:
#Save and load checkpoint functions

CHECKPOINT_DF = "checkpoint_experiment_df.pkl"
CHECKPOINT_LOG = "checkpoint_message_log.pkl"

def save_checkpoint(df, msg_log):
    df.to_pickle(CHECKPOINT_DF)
    with open(CHECKPOINT_LOG, "wb") as f:
        pickle.dump(msg_log, f)

def load_checkpoint(n_agents, n_rounds):
    if os.path.exists(CHECKPOINT_DF) and os.path.exists(CHECKPOINT_LOG):
        print("Found checkpoint files. Loading previous state...")
        df = pd.read_pickle(CHECKPOINT_DF)
        with open(CHECKPOINT_LOG, "rb") as f:
            msg_log = pickle.load(f)
            
        start_round = 0
        for i in range(n_rounds):
            # Check if Agent_0 has data for this round
            if df.at[i, f"Agent_0"]['chosen_arm']: 
                start_round = i + 1
            else:
                break
        print(f"Resuming from Round {start_round}...")
        return start_round, df, msg_log
    else:
        print("No checkpoint found. Starting fresh.")
        df = initialize_experiment_log(n_agents, n_rounds)
        msg_log = initialize_message_log()
        return 0, df, msg_log

In [19]:
# --- SETUP & RESUME LOGIC ---
start_round, experiment_df, message_log = load_checkpoint(N_AGENTS, N_ROUNDS)

# Re-initialize Agents and Manager
agents = [Agent(i, N_AGENTS, N_ARMS, N_ROUNDS) for i in range(N_AGENTS)]
manager = RandomManager(N_ARMS, N_AGENTS, ARMS_PER_AGENT)

# RESTORE MEMORY (If resuming)
if start_round > 0:
    print("Restoring agent memories from dataframe...")
    for r in range(start_round):
        for agent in agents:
            record = experiment_df.at[r, agent.agent_id]
            if record['chosen_arm']:
                agent.update_history(r, record['chosen_arm'][0], record['payoff'][0])

print(f"Starting Simulation from Round {start_round} of {N_ROUNDS}")

# --- MAIN LOOP ---
for t in tqdm(range(start_round, N_ROUNDS), desc="Simulation Progress"):
    
    # 1. Manager assigns arms
    assignments = manager.assign_arms()
    
    # 2. Phase 1: Communication
    messages_this_round = []
    for agent in agents:
        my_arms = assignments[agent.agent_id]
        target_id, msg_content = agent.generate_message(t, my_arms, assignments)
        
        if target_id and target_id in assignments and target_id != agent.agent_id:
            msg_record = {
                'iteration': t, 
                'sender': agent.agent_id, 
                'receiver': target_id, 
                'message': msg_content
            }
            messages_this_round.append(msg_record)
            message_log.append(msg_record)
    
    # 3. Deliver Messages
    agent_map = {a.agent_id: a for a in agents}
    for msg in messages_this_round:
        receiver_obj = agent_map[msg['receiver']]
        receiver_obj.receive_message(msg['sender'], msg['message'])
        
    # 4. Phase 2: Action
    round_payoffs = []
    for agent in agents:
        my_arms = assignments[agent.agent_id]
        chosen_arm = agent.make_choice(my_arms)
        payoff = get_arm_reward(chosen_arm)
        
        agent.update_history(t, chosen_arm, payoff)
        
        experiment_df.at[t, agent.agent_id] = {
            'assigned_arms': list(my_arms),
            'chosen_arm': [chosen_arm],
            'payoff': [payoff]
        }
        round_payoffs.append(payoff)
        
    # 5. Update Stats & SAVE CHECKPOINT
    total_score = np.average(round_payoffs)
    experiment_df.at[t, 'Total_Payoff'] = total_score
    
    # Save progress to disk immediately
    save_checkpoint(experiment_df, message_log)
    
    tqdm.write(f"Round {t} | Avg Payoff: {total_score:.4f} | Msgs Sent: {len(messages_this_round)}")

print("\nExperiment Complete!")

Found checkpoint files. Loading previous state...
Resuming from Round 1...
Restoring agent memories from dataframe...
Starting Simulation from Round 1 of 1


Simulation Progress: 0it [00:00, ?it/s]


Experiment Complete!





In [20]:
# 1. View the Main DataFrame
print("Experiment Data (First 5 rounds):")
print(experiment_df.head())

# 2. View the Message Log (First 5 messages)
print("\nMessage Log:")
for msg in message_log[:]:
    print(msg)

# 3. Save to CSV for your Network Science Students
experiment_df.to_csv("experiment_results.csv")
pd.DataFrame(message_log).to_csv("message_log.csv")
print("\nFiles saved: 'experiment_results.csv' and 'message_log.csv'")

Experiment Data (First 5 rounds):
                                                                     Agent_0  \
0  {'assigned_arms': [1, 9, 5, 2, 3], 'chosen_arm': [1], 'payoff': [0.4561]}   

                                                                     Agent_1  \
0  {'assigned_arms': [0, 4, 8, 7, 6], 'chosen_arm': [0], 'payoff': [0.5512]}   

   Total_Payoff  
0       0.50365  

Message Log:
{'iteration': 0, 'sender': 'Agent_0', 'receiver': 'Agent_1', 'message': "Let's share the arm number and reward after each pull to help maximize the team's average reward."}
{'iteration': 0, 'sender': 'Agent_1', 'receiver': 'Agent_0', 'message': "Iâ€™m assigned arms [0,4,6,7,8]. Let's coordinate to avoid overlapping arms."}

Files saved: 'experiment_results.csv' and 'message_log.csv'


In [21]:
experiment_df

Unnamed: 0,Agent_0,Agent_1,Total_Payoff
0,"{'assigned_arms': [1, 9, 5, 2, 3], 'chosen_arm': [1], 'payoff': [0.4561]}","{'assigned_arms': [0, 4, 8, 7, 6], 'chosen_arm': [0], 'payoff': [0.5512]}",0.50365


In [22]:
pd.set_option('display.max_rows', None)           # Max righe mostrate (default: 60)
pd.set_option('display.max_columns', None)        # Max colonne mostrate (default: 20)
pd.set_option('display.max_colwidth', None) 

In [23]:
experiment_df

Unnamed: 0,Agent_0,Agent_1,Total_Payoff
0,"{'assigned_arms': [1, 9, 5, 2, 3], 'chosen_arm': [1], 'payoff': [0.4561]}","{'assigned_arms': [0, 4, 8, 7, 6], 'chosen_arm': [0], 'payoff': [0.5512]}",0.50365
