In [1]:
import pandas as pd
import numpy as np
import random
import re
import time
import os
import pickle
import sys
import requests
from tqdm import tqdm
from scipy.special import softmax

# --- CONFIGURATION ---
API_KEY = "lf3hE5UomEtiemAyKm8h5YJeHrSe5ltb"
MODEL = "magistral-small-2509"

N_AGENTS = 30
N_ARMS = 50
N_ROUNDS = 50
MOCK_MODE = False

# RL Parameters
RL_LEARNING_RATE = 0.5 

# --- PATHS ---
BASE_DIR = os.getcwd() 
CHECKPOINT_DF = os.path.join(BASE_DIR, "real_experiment_df.pkl")
CHECKPOINT_LOG = os.path.join(BASE_DIR, "real_message_log.pkl")
CHECKPOINT_WEIGHTS = os.path.join(BASE_DIR, "real_weights_log.pkl")

# --- HELPER FUNCTIONS ---
def get_empty_structure():
    return {'assigned_arms': [], 'chosen_arm': [], 'payoff': []}

def initialize_experiment_log(num_agents, num_iterations):
    data = []
    for _ in range(num_iterations):
        row_data = {f"Agent_{i}": get_empty_structure() for i in range(num_agents)}
        row_data['Total_Payoff'] = 0.0
        data.append(row_data)
    return pd.DataFrame(data)

def save_checkpoint(df, msg_log, weights_log):
    tmp_df, tmp_msg, tmp_w = CHECKPOINT_DF + ".tmp", CHECKPOINT_LOG + ".tmp", CHECKPOINT_WEIGHTS + ".tmp"
    df.to_pickle(tmp_df)
    with open(tmp_msg, "wb") as f: pickle.dump(msg_log, f)
    with open(tmp_w, "wb") as f: pickle.dump(weights_log, f)
    os.replace(tmp_df, CHECKPOINT_DF)
    os.replace(tmp_msg, CHECKPOINT_LOG)
    os.replace(tmp_w, CHECKPOINT_WEIGHTS)

def load_checkpoint(n_agents, n_rounds):
    if os.path.exists(CHECKPOINT_DF):
        print(f"Found checkpoint. Loading...")
        df = pd.read_pickle(CHECKPOINT_DF)
        with open(CHECKPOINT_LOG, "rb") as f: msg_log = pickle.load(f)
        with open(CHECKPOINT_WEIGHTS, "rb") as f: weights_log = pickle.load(f)
        start_round = 0
        for i in range(n_rounds):
            if df.at[i, "Agent_0"]['chosen_arm']: start_round = i + 1
            else: break
        print(f"Resuming from Round {start_round}...")
        return start_round, df, msg_log, weights_log
    else:
        print("Starting fresh experiment.")
        return 0, initialize_experiment_log(n_agents, n_rounds), [], []

# --- ENVIRONMENT ---
np.random.seed(42) 
_sorted_means = np.linspace(0, 100, N_ARMS)
TRUE_ARM_MEANS = _sorted_means.copy()
np.random.shuffle(TRUE_ARM_MEANS)
print(f"Environment Initialized (0-100). Best Arm Mean: {np.max(TRUE_ARM_MEANS):.2f}")

def get_arm_reward(arm_index):
    mean = TRUE_ARM_MEANS[arm_index]
    return round(np.clip(np.random.normal(loc=mean, scale=5.0), 0.0, 100.0), 2)

# --- API ---
def call_mistral(system_prompt, user_prompt):
    if MOCK_MODE: return "MOCK_RESPONSE"
    url = "https://api.mistral.ai/v1/chat/completions"
    headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
    payload = {"model": MODEL, "messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]}
    
    for attempt in range(5):
        try:
            resp = requests.post(url, headers=headers, json=payload, timeout=60)
            resp.raise_for_status()
            content = resp.json()['choices'][0]['message']['content']
            if isinstance(content, list): 
                return "".join([c.get('text', '') for c in content if isinstance(c, dict) and c.get('type') == 'text']).strip()
            return str(content).strip()
        except Exception as e:
            wait = 5 * (2 ** attempt)
            print(f"API Error (Attempt {attempt+1}): {e}. Waiting {wait}s...")
            time.sleep(wait)
    return "ERROR"

# --- CLASSES ---
class Agent:
    def __init__(self, agent_id, total_agents, total_arms, total_rounds):
        self.agent_id = f"Agent_{agent_id}"
        self.total_agents, self.total_arms, self.total_rounds = total_agents, total_arms, total_rounds
        self.history, self.inbox = [], []

    def get_system_prompt(self):
        return f"You are {self.agent_id} in a {self.total_agents}-agent bandit game. Rewards 0-100. Maximize team reward."

    def _format_history(self):
        return "\n".join([f"Round {r['round']}: Pulled Arm {r['arm']}, Reward: {r['payoff']:.2f}" for r in self.history]) if self.history else "No history."

    def generate_message(self, current_round, assigned_arms, assignment_map):
        user_prompt = (f"--- ROUND {current_round} ---\nMy Arms: {assigned_arms}\nMap: {assignment_map}\nHistory:\n{self._format_history()}\n"
                       "Task: Message ONE agent. Keep it short.\nFormat: 'TO: Agent_X | MSG: <content>'")
        resp = call_mistral(self.get_system_prompt(), user_prompt)
        if MOCK_MODE: return f"Agent_{random.randint(0, self.total_agents-1)}", "Mock"
        match = re.search(r"TO:\s*(Agent_\d+).*?MSG:\s*(.*)", resp, re.DOTALL | re.IGNORECASE)
        return (match.group(1).strip(), match.group(2).strip()) if match else (None, None)

    def receive_message(self, sender, content):
        self.inbox.append(f"From {sender}: {content}")

    def make_choice(self, assigned_arms):
        # --- FIXED SYNTAX ERROR HERE ---
        # We perform the string join OUTSIDE the f-string to prevent backslash issues
        inbox_str = '\n'.join(self.inbox) if self.inbox else 'None'
        
        user_prompt = (f"Msgs:\n{inbox_str}\n\nPick arm from {assigned_arms}. Return integer ONLY.")
        
        resp = call_mistral(self.get_system_prompt(), user_prompt)
        if MOCK_MODE: return random.choice(assigned_arms)
        nums = re.findall(r'\d+', resp)
        return int(nums[0]) if nums and int(nums[0]) in assigned_arms else random.choice(assigned_arms)

    def update_history(self, r, arm, payoff):
        self.history.append({'round': r, 'arm': arm, 'payoff': payoff})

class SharpRLManager:
    def __init__(self, num_arms, num_agents, learning_rate):
        self.num_arms, self.num_agents, self.lr = num_arms, num_agents, learning_rate
        self.weights = np.zeros((num_agents, num_arms))

    def assign_arms(self):
        assignment = {f"Agent_{i}": [] for i in range(self.num_agents)}
        probs = softmax(self.weights, axis=0)
        indices = list(range(self.num_agents))
        counts = np.zeros(self.num_agents, dtype=int)
        
        for arm in range(self.num_arms):
            chosen = np.random.choice(indices, p=probs[:, arm])
            assignment[f"Agent_{chosen}"].append(arm)
            counts[chosen] += 1
            
        empty = [i for i, c in enumerate(counts) if c == 0]
        if empty:
            wealthy = [i for i, c in enumerate(counts) if c > 1]
            random.shuffle(empty)
            for poor in empty:
                if not wealthy: break
                donor = wealthy[0]
                steal = assignment[f"Agent_{donor}"].pop()
                counts[donor] -= 1
                assignment[f"Agent_{poor}"].append(steal)
                counts[poor] += 1
                if counts[donor] <= 1: wealthy.pop(0)
        return assignment

    def update_weights(self, agent_id, arm_idx, reward):
        idx = int(agent_id.split("_")[1])
        # Sharp scaling /5.0
        self.weights[idx, arm_idx] += self.lr * (reward / 5.0)

# --- EXECUTION ---
if __name__ == "__main__":
    start, df, msg_log, w_log = load_checkpoint(N_AGENTS, N_ROUNDS)
    agents = [Agent(i, N_AGENTS, N_ARMS, N_ROUNDS) for i in range(N_AGENTS)]
    manager = SharpRLManager(N_ARMS, N_AGENTS, RL_LEARNING_RATE)
    
    if start > 0:
        print("Restoring state...")
        for r in range(start):
            for a in agents:
                rec = df.at[r, a.agent_id]
                if rec['chosen_arm']: a.update_history(r, rec['chosen_arm'][0], rec['payoff'][0])
        if w_log: manager.weights = w_log[-1].copy()

    print(f"Starting FINAL RL Experiment. Churn Target: ~0.18")
    for t in tqdm(range(start, N_ROUNDS)):
        assigns = manager.assign_arms()
        
        # Phase 1
        msgs = []
        for a in agents:
            tid, c = a.generate_message(t, assigns[a.agent_id], assigns)
            if tid and tid in assigns and tid != a.agent_id:
                rec = {'iteration': t, 'sender': a.agent_id, 'receiver': tid, 'message': c}
                msgs.append(rec)
                msg_log.append(rec)
        agent_map = {a.agent_id: a for a in agents}
        for m in msgs: agent_map[m['receiver']].receive_message(m['sender'], m['message'])
        
        # Phase 2
        rewards = []
        for a in agents:
            my_arms = assigns[a.agent_id]
            ch = a.make_choice(my_arms)
            pay = get_arm_reward(ch)
            
            a.update_history(t, ch, pay)
            manager.update_weights(a.agent_id, ch, pay)
            
            df.at[t, a.agent_id] = {'assigned_arms': list(my_arms), 'chosen_arm': [ch], 'payoff': [pay]}
            rewards.append(pay)
            
        df.at[t, 'Total_Payoff'] = np.average(rewards)
        w_log.append(manager.weights.copy())
        save_checkpoint(df, msg_log, w_log)
        
    print("Done. Saving final results...")
    df.to_csv("final_sharp_rl_results.csv")
    pd.DataFrame(msg_log).to_csv("final_sharp_messages.csv")

  from pandas.core import (


Environment Initialized (0-100). Best Arm Mean: 100.00
Starting fresh experiment.
Starting FINAL RL Experiment. Churn Target: ~0.18


  0%|                                                                                           | 0/50 [00:00<?, ?it/s]

API Error (Attempt 1): 429 Client Error: Too Many Requests for url: https://api.mistral.ai/v1/chat/completions. Waiting 5s...


  4%|███▏                                                                            | 2/50 [15:44<6:18:45, 473.44s/it]

API Error (Attempt 1): 429 Client Error: Too Many Requests for url: https://api.mistral.ai/v1/chat/completions. Waiting 5s...


  8%|██████▍                                                                         | 4/50 [34:50<6:52:11, 537.64s/it]

API Error (Attempt 1): 429 Client Error: Too Many Requests for url: https://api.mistral.ai/v1/chat/completions. Waiting 5s...


 32%|████████████████████████▋                                                    | 16/50 [2:42:36<6:22:18, 674.65s/it]

API Error (Attempt 1): 502 Server Error: Bad Gateway for url: https://api.mistral.ai/v1/chat/completions. Waiting 5s...


 36%|███████████████████████████▋                                                 | 18/50 [3:07:26<6:18:39, 709.97s/it]

API Error (Attempt 1): HTTPSConnectionPool(host='api.mistral.ai', port=443): Read timed out. (read timeout=60). Waiting 5s...


 72%|███████████████████████████████████████████████████████▍                     | 36/50 [7:00:54<2:56:47, 757.67s/it]

API Error (Attempt 1): 502 Server Error: Bad Gateway for url: https://api.mistral.ai/v1/chat/completions. Waiting 5s...


100%|██████████████████████████████████████████████████████████████████████████████| 50/50 [10:11:08<00:00, 733.38s/it]

Done. Saving final results...



