In [29]:
import pandas as pd
import numpy as np
import random
import re
import time
from mistralai import Mistral
from tqdm import tqdm # <--- NEW IMPORT

# --- CONFIGURATION ---
API_KEY = "4aSIsAS2QW3dzgQuckWLz4hC7bN9ZBfw"  # <--- Paste your Key here
MODEL = "magistral-small-2509" # Or "mistral-small-2509"

# Experiment Parameters
N_AGENTS = 5
N_ARMS = 10
N_ROUNDS = 3
ARMS_PER_AGENT = 2  # How many arms each agent gets per round

# Set this to False to run the REAL experiment (costs money/credits)
# Set this to True to test the code logic quickly (free)
MOCK_MODE = False 

# Initialize Mistral Client
if not MOCK_MODE:
    client = Mistral(api_key=API_KEY)
else:
    client = None

print(f"Setup Complete. Mock Mode is: {MOCK_MODE}")

Setup Complete. Mock Mode is: False


In [30]:
import time
import numpy as np

# --- 1. DEFINE THE ENVIRONMENT (ARMS) ---
np.random.seed(42) 
TRUE_ARM_MEANS = np.clip(np.random.normal(loc=0.5, scale=0.15, size=N_ARMS), 0.0, 1.0)

def get_arm_reward(arm_index):
    mean = TRUE_ARM_MEANS[arm_index]
    reward = np.random.normal(loc=mean, scale=0.05)
    return round(reward, 4)

# --- 2. ROBUST MISTRAL API WRAPPER (With Rate Limit Handling) ---
def call_mistral(system_prompt, user_prompt):
    """
    Handles the API call with a Retry Loop and Exponential Backoff.
    This prevents the code from crashing if Mistral says 'Too Many Requests'.
    """
    if MOCK_MODE:
        return "MOCK_RESPONSE"

    max_retries = 5
    base_wait = 5 # Start waiting 5 seconds
    
    for attempt in range(max_retries):
        try:
            # Attempt the API Call
            response = client.chat.complete(
                model=MODEL,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ]
            )
            
            # Parse Response
            msg = response.choices[0].message
            content = msg.content

            if isinstance(content, str):
                return content
            else:
                visible_text = []
                for chunk in content:
                    if getattr(chunk, "type", None) == "text":
                        visible_text.append(getattr(chunk, "text", ""))
                return "".join(visible_text).strip()

        except Exception as e:
            # Check if it's a Rate Limit (usually contains "429" or "Rate limit")
            error_msg = str(e).lower()
            is_rate_limit = "429" in error_msg or "rate limit" in error_msg
            
            wait_time = base_wait * (2 ** attempt) # 5s, 10s, 20s, 40s...
            
            print(f"\n[API Warning] Attempt {attempt+1}/{max_retries} failed.")
            print(f"Error: {e}")
            
            if is_rate_limit:
                print(f"Rate Limit Hit. Sleeping for {wait_time} seconds...")
            else:
                print(f"Generic Error. Retrying in {wait_time} seconds...")
                
            time.sleep(wait_time)

    # If we exit the loop, we failed 5 times in a row.
    print("\n[CRITICAL FAIL] Max retries exceeded. Returning fallback.")
    return "ERROR: MAX RETRIES EXCEEDED"

In [31]:
def get_empty_structure():
    """Helper to create unique dictionary objects for the DataFrame."""
    return {
        'assigned_arms': [], 
        'chosen_arm': [], 
        'payoff': []
    }

def initialize_experiment_log(num_agents, num_iterations):
    """Creates the main DataFrame."""
    data = []
    for _ in range(num_iterations):
        row_data = {}
        for i in range(num_agents):
            row_data[f"Agent_{i}"] = get_empty_structure()
        row_data['Total_Payoff'] = 0.0
        data.append(row_data)
    
    return pd.DataFrame(data)

def initialize_message_log():
    """Creates the list to store message history."""
    return []

# Create the structures
experiment_df = initialize_experiment_log(N_AGENTS, N_ROUNDS)
message_log = initialize_message_log()

print("Data structures initialized.")

Data structures initialized.


In [32]:
class Agent:
    def __init__(self, agent_id, total_agents, total_arms, total_rounds):
        self.agent_id = f"Agent_{agent_id}"
        self.id_num = agent_id
        self.total_agents = total_agents
        self.total_arms = total_arms
        self.total_rounds = total_rounds
        
        # Memory
        self.history = []  # List of dicts: {'round', 'arm', 'payoff'}
        self.inbox = []    # List of messages received in current round
        
    def get_system_prompt(self):
        return (
            f"You are {self.agent_id}, participating in a cooperative Multi-Armed Bandit game. "
            f"There are {self.total_agents} agents, {self.total_arms} total arms, "
            f"and the game lasts {self.total_rounds} rounds. "
            "Your goal is to maximize the team's average reward."
        )

    def _format_history(self):
        """Format: Round X: Pulled Arm Y, Reward: Z"""
        recent = self.history[-5:] # Keep last 5 to save tokens
        if not recent:
            return "No history yet."
            
        formatted = []
        for record in recent:
            line = (f"Round {record['round']}: Pulled Arm {record['arm']}, "
                    f"Reward: {record['payoff']:.4f}")
            formatted.append(line)
        return "\n".join(formatted)

    # --- PHASE 1: DECIDE WHO TO MESSAGE ---
    def generate_message(self, current_round, assigned_arms, assignment_map):
        
        hist_str = self._format_history()
        
        user_prompt = (
            f"--- ROUND {current_round} ---\n"
            f"My Assigned Arms: {assigned_arms}\n"
            f"All Agents' Assignments: {assignment_map}\n"
            f"My Recent History:\n{hist_str}\n\n"
            "Task: Based on your history and the map, choose one agent to message. "
            "Tell them something useful.\n"
            "Response Format: 'TO: Agent_X | MSG: <content>'\n"
            "Example: 'TO: Agent_4 | MSG: I tried arm 2 and got 0.8 reward.'"
        )
        
        # Get raw text from LLM
        response_text = call_mistral(self.get_system_prompt(), user_prompt)
        
        # Parse the response
        target_agent = None
        message_content = "No message"
        
        if MOCK_MODE:
            # Random logic for testing
            target_id = random.randint(0, self.total_agents - 1)
            target_agent = f"Agent_{target_id}"
            message_content = f"Hello from {self.agent_id}"
        else:
            # Regex to find "TO: Agent_X" and "MSG: ..."
            match = re.search(r"TO:\s*(Agent_\d+).*?MSG:\s*(.*)", response_text, re.DOTALL | re.IGNORECASE)
            if match:
                target_agent = match.group(1).strip()
                message_content = match.group(2).strip()
            else:
                # Fallback if format is wrong
                print(f"Format Warning {self.agent_id}: {response_text[:50]}...")
        
        return target_agent, message_content

    # --- INTERMEDIATE: RECEIVE ---
    def receive_message(self, sender_id, content):
        self.inbox.append(f"From {sender_id}: {content}")

    # --- PHASE 2: DECIDE WHICH ARM TO PULL ---
    def make_choice(self, assigned_arms):
        
        inbox_text = "\n".join(self.inbox) if self.inbox else "No messages received."
        
        user_prompt = (
            f"You have received the following messages from other agents:\n{inbox_text}\n\n"
            f"Considering your history and this new advice, which arm will you pull from {assigned_arms}?\n"
            "Return ONLY the integer of the arm."
        )
        
        # We include the system prompt again to ensure persona is kept
        response_text = call_mistral(self.get_system_prompt(), user_prompt)
        
        choice = None
        
        if MOCK_MODE:
            choice = random.choice(assigned_arms)
        else:
            # Extract first number found in response
            numbers = re.findall(r'\d+', response_text)
            if numbers:
                choice = int(numbers[0])
                # Validation: Ensure choice is in assigned_arms
                if choice not in assigned_arms:
                    choice = random.choice(assigned_arms) # Fallback
            else:
                choice = random.choice(assigned_arms) # Fallback
                
        # Clear inbox for next round
        self.inbox = []
        
        return choice

    def update_history(self, round_num, arm, payoff):
        self.history.append({'round': round_num, 'arm': arm, 'payoff': payoff})

In [33]:
class RandomManager:
    def __init__(self, num_arms, num_agents, arms_per_agent):
        self.num_arms = num_arms
        self.num_agents = num_agents
        self.arms_per_agent = arms_per_agent
        self.all_arms = list(range(num_arms))

    def assign_arms(self):
        """
        Assigns arms ensuring full coverage while completely eliminating
        agent bias (e.g., Agent 0 always getting more arms).
        """
        assignment = {f"Agent_{i}": [] for i in range(self.num_agents)}
        
        # --- STEP 1: RANDOMIZE EVERYTHING ---
        
        # A. Shuffle the "Deck" of Arms
        deck_of_arms = self.all_arms[:]
        random.shuffle(deck_of_arms)
        
        # B. Shuffle the "Seating Order" of Agents
        # This ensures the 'remainder' arms don't always fall to Agent 0-3
        agent_order = list(range(self.num_agents))
        random.shuffle(agent_order)
        
        # --- STEP 2: DEAL FOR COVERAGE ---
        # Deal the arms one by one to the agents in their random order
        for i, arm in enumerate(deck_of_arms):
            # Use modulo on the loop counter 'i', NOT the arm ID
            # This ensures we cycle through the agents 0, 1, 2... evenly
            target_agent_idx = agent_order[i % self.num_agents]
            assignment[f"Agent_{target_agent_idx}"].append(arm)
            
        # --- STEP 3: FILL QUOTAS (TOP UP) ---
        # Now we ensure everyone reaches 'arms_per_agent'
        for i in range(self.num_agents):
            agent_key = f"Agent_{i}"
            current_count = len(assignment[agent_key])
            needed = self.arms_per_agent - current_count
            
            if needed > 0:
                # Pick random extra arms that this agent doesn't have yet
                available = [x for x in self.all_arms if x not in assignment[agent_key]]
                
                # Safety check: if needed > available (rare edge case), take all available
                take_count = min(len(available), needed)
                
                if take_count > 0:
                    new_picks = random.sample(available, take_count)
                    assignment[agent_key].extend(new_picks)
        
        return assignment

In [34]:
# Initialize Agents and Manager
agents = [Agent(i, N_AGENTS, N_ARMS, N_ROUNDS) for i in range(N_AGENTS)]
manager = RandomManager(N_ARMS, N_AGENTS, ARMS_PER_AGENT)

print(f"Starting Simulation: {N_AGENTS} Agents, {N_ROUNDS} Rounds")

# --- MAIN LOOP WITH TQDM ---
# This creates the progress bar
for t in tqdm(range(N_ROUNDS), desc="Simulation Progress"):
    
    # 1. Manager assigns arms
    assignments = manager.assign_arms()
    
    # 2. Phase 1: Communication (Generate Messages)
    messages_this_round = []
    
    # Note: We remove the 'Phase 1' print to keep the progress bar clean
    # or you can keep it, but it will print above the bar.
    
    for agent in agents:
        my_arms = assignments[agent.agent_id]
        
        target_id, msg_content = agent.generate_message(t, my_arms, assignments)
        
        if target_id and target_id in assignments and target_id != agent.agent_id:
            messages_this_round.append({
                'sender': agent.agent_id,
                'receiver': target_id,
                'message': msg_content
            })
            
            # Add to Global Log
            message_log.append({
                'iteration': t,
                'sender': agent.agent_id,
                'receiver': target_id,
                'message': msg_content
            })
    
    # 3. Deliver Messages
    agent_map = {a.agent_id: a for a in agents}
    
    for msg in messages_this_round:
        receiver_obj = agent_map[msg['receiver']]
        receiver_obj.receive_message(msg['sender'], msg['message'])
        
    # 4. Phase 2: Action (Pull Arms)
    round_payoffs = []
    
    for agent in agents:
        my_arms = assignments[agent.agent_id]
        
        # Agent chooses
        chosen_arm = agent.make_choice(my_arms)
        
        # Environment pays
        payoff = get_arm_reward(chosen_arm)
        
        # Agent learns
        agent.update_history(t, chosen_arm, payoff)
        
        # Update DataFrame
        experiment_df.at[t, agent.agent_id] = {
            'assigned_arms': list(my_arms),
            'chosen_arm': [chosen_arm],
            'payoff': [payoff]
        }
        
        round_payoffs.append(payoff)
        
    # 5. Update Global Stats (Using Average)
    total_score = np.average(round_payoffs) # <--- Using Average as requested
    experiment_df.at[t, 'Total_Payoff'] = total_score
    
    # Optional: Print stats nicely alongside the progress bar
    # tqdm.write() prints without breaking the bar visual
    tqdm.write(f"Round {t} | Avg Payoff: {total_score:.4f} | Msgs Sent: {len(messages_this_round)}")

print("\nExperiment Complete!")

Starting Simulation: 5 Agents, 3 Rounds


Simulation Progress:  33%|█████████████████████                                          | 1/3 [00:59<01:59, 59.68s/it]

Round 0 | Avg Payoff: 0.4718 | Msgs Sent: 5

Error: API error occurred: Status 429. Body: {"object":"error","message":"Rate limit exceeded","type":"rate_limited","param":null,"code":"1300"}
Rate Limit Hit. Sleeping for 5 seconds...


Simulation Progress:  67%|██████████████████████████████████████████                     | 2/3 [03:03<01:37, 97.38s/it]

Round 1 | Avg Payoff: 0.4740 | Msgs Sent: 5


Simulation Progress: 100%|███████████████████████████████████████████████████████████████| 3/3 [04:52<00:00, 97.56s/it]

Round 2 | Avg Payoff: 0.5844 | Msgs Sent: 5

Experiment Complete!





In [40]:
# 1. View the Main DataFrame
print("Experiment Data (First 5 rounds):")
print(experiment_df.head())

# 2. View the Message Log (First 5 messages)
print("\nMessage Log:")
for msg in message_log[:]:
    print(msg)

# 3. Save to CSV for your Network Science Students
experiment_df.to_csv("experiment_results.csv")
pd.DataFrame(message_log).to_csv("message_log.csv")
print("\nFiles saved: 'experiment_results.csv' and 'message_log.csv'")

Experiment Data (First 5 rounds):
                                                            Agent_0  \
0  {'assigned_arms': [4, 7], 'chosen_arm': [4], 'payoff': [0.4417]}   
1  {'assigned_arms': [0, 3], 'chosen_arm': [0], 'payoff': [0.5464]}   
2  {'assigned_arms': [4, 9], 'chosen_arm': [9], 'payoff': [0.6547]}   

                                                            Agent_1  \
0   {'assigned_arms': [2, 1], 'chosen_arm': [1], 'payoff': [0.456]}   
1  {'assigned_arms': [8, 9], 'chosen_arm': [8], 'payoff': [0.3789]}   
2  {'assigned_arms': [2, 8], 'chosen_arm': [2], 'payoff': [0.5859]}   

                                                            Agent_2  \
0  {'assigned_arms': [8, 0], 'chosen_arm': [8], 'payoff': [0.4417]}   
1  {'assigned_arms': [1, 7], 'chosen_arm': [7], 'payoff': [0.6308]}   
2  {'assigned_arms': [0, 7], 'chosen_arm': [0], 'payoff': [0.5779]}   

                                                            Agent_3  \
0  {'assigned_arms': [5, 9], 'chosen_arm

In [38]:
experiment_df

Unnamed: 0,Agent_0,Agent_1,Agent_2,Agent_3,Agent_4,Total_Payoff
0,"{'assigned_arms': [4, 7], 'chosen_arm': [4], 'payoff': [0.4417]}","{'assigned_arms': [2, 1], 'chosen_arm': [1], 'payoff': [0.456]}","{'assigned_arms': [8, 0], 'chosen_arm': [8], 'payoff': [0.4417]}","{'assigned_arms': [5, 9], 'chosen_arm': [5], 'payoff': [0.3692]}","{'assigned_arms': [3, 6], 'chosen_arm': [6], 'payoff': [0.6506]}",0.47184
1,"{'assigned_arms': [0, 3], 'chosen_arm': [0], 'payoff': [0.5464]}","{'assigned_arms': [8, 9], 'chosen_arm': [8], 'payoff': [0.3789]}","{'assigned_arms': [1, 7], 'chosen_arm': [7], 'payoff': [0.6308]}","{'assigned_arms': [5, 2], 'chosen_arm': [5], 'payoff': [0.4195]}","{'assigned_arms': [6, 4], 'chosen_arm': [4], 'payoff': [0.3943]}",0.47398
2,"{'assigned_arms': [4, 9], 'chosen_arm': [9], 'payoff': [0.6547]}","{'assigned_arms': [2, 8], 'chosen_arm': [2], 'payoff': [0.5859]}","{'assigned_arms': [0, 7], 'chosen_arm': [0], 'payoff': [0.5779]}","{'assigned_arms': [1, 5], 'chosen_arm': [5], 'payoff': [0.3936]}","{'assigned_arms': [6, 3], 'chosen_arm': [6], 'payoff': [0.7097]}",0.58436


In [36]:
pd.set_option('display.max_rows', None)           # Max righe mostrate (default: 60)
pd.set_option('display.max_columns', None)        # Max colonne mostrate (default: 20)
pd.set_option('display.max_colwidth', None) 

In [37]:
experiment_df

Unnamed: 0,Agent_0,Agent_1,Agent_2,Agent_3,Agent_4,Total_Payoff
0,"{'assigned_arms': [4, 7], 'chosen_arm': [4], 'payoff': [0.4417]}","{'assigned_arms': [2, 1], 'chosen_arm': [1], 'payoff': [0.456]}","{'assigned_arms': [8, 0], 'chosen_arm': [8], 'payoff': [0.4417]}","{'assigned_arms': [5, 9], 'chosen_arm': [5], 'payoff': [0.3692]}","{'assigned_arms': [3, 6], 'chosen_arm': [6], 'payoff': [0.6506]}",0.47184
1,"{'assigned_arms': [0, 3], 'chosen_arm': [0], 'payoff': [0.5464]}","{'assigned_arms': [8, 9], 'chosen_arm': [8], 'payoff': [0.3789]}","{'assigned_arms': [1, 7], 'chosen_arm': [7], 'payoff': [0.6308]}","{'assigned_arms': [5, 2], 'chosen_arm': [5], 'payoff': [0.4195]}","{'assigned_arms': [6, 4], 'chosen_arm': [4], 'payoff': [0.3943]}",0.47398
2,"{'assigned_arms': [4, 9], 'chosen_arm': [9], 'payoff': [0.6547]}","{'assigned_arms': [2, 8], 'chosen_arm': [2], 'payoff': [0.5859]}","{'assigned_arms': [0, 7], 'chosen_arm': [0], 'payoff': [0.5779]}","{'assigned_arms': [1, 5], 'chosen_arm': [5], 'payoff': [0.3936]}","{'assigned_arms': [6, 3], 'chosen_arm': [6], 'payoff': [0.7097]}",0.58436
