In [15]:
import numpy as np
import time
import json
import logging
from openai import OpenAI

# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# API 配置
client = OpenAI(
    base_url='https://xiaoai.plus/v1',  # 验证是否需要 /chat/completions
    api_key='sk-PV4uwhRGKYRoP0z2D9wBzFMFRsPY1NI80kIUwii3p0CMniaN'  # 替换为实际API密钥
)

# 模拟参数
NUM_SIMULATIONS = 1000  # 初始测试，成功后设为1000
PRIOR_P = 0.3  # P(theta = theta_H)
STATES = {"theta_L": 0, "theta_H": 1}
SIGNALS = ["s_L", "s_H"]
ACTIONS = [0, 1]
MAX_CONSECUTIVE_ERRORS = 5

# API调用函数
def call_llm(prompt, role="sender", max_retries=3):
    logging.info(f"{role} 提示: {prompt}")
    for attempt in range(max_retries):
        try:
            start_time = time.time()
            completion = client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {"role": "system", "content": "You are a precise assistant that outputs only the requested value."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=100,
                temperature=0.7
            )
            content = completion.choices[0].message.content.strip()
            logging.info(f"{role} API响应: {content} (耗时: {time.time() - start_time:.2f}秒)")
            # 解析响应
            if role == "sender":
                content = content.lower()
                if "s_h" in content:
                    return "s_H"
                if "s_l" in content:
                    return "s_L"
                if content not in SIGNALS:
                    logging.warning(f"无效信号: {content}")
                    return "s_L"
            else:  # receiver
                content = content.strip()
                if content in ["0", "1"]:
                    return content
                logging.warning(f"无效动作: {content}")
                return "0"
            return content
        except Exception as e:
            logging.error(f"API调用错误 ({role}, 尝试 {attempt+1}): {str(e)}")
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)
            else:
                return None

# 发送者提示
def sender_prompt(state, trial_number, history):
    s_L_H_count = history["s_L_H_count"]
    s_L_L_count = history["s_L_L_count"]
    s_H_H_count = history["s_H_H_count"]
    s_H_L_count = history["s_H_L_count"]
    s_L_a1_count = history["s_L_a1_count"]
    s_H_a1_count = history["s_H_a1_count"]
    prompt = f"""
You are the sender in an information design game. You know the state and send a signal to influence the receiver's action. The receiver learns the true state (theta) after each decision and updates their beliefs about your signals. The game is:
- State Space: (theta_L = 0, theta_H = 1).
- State: current theta = {state}
- Signals: s_L(means theta_L) or s_H(means theta_H).
- Receiver's actions: a = 0 or 1.
- Payoffs:
  - Receiver: u_R(a=1, theta_H) = 1, u_R(a=1, theta_L) = -1, u_R(a=0, theta) = 0.
  - You: u_S(a=1) = 1, u_S(a=0) = 0.
- Prior: P(theta_H) = 0.3.
- Past interactions (summary of {trial_number} trials):
  - s_L: {s_L_H_count} times theta_H, {s_L_L_count} times theta_L, a=1 {s_L_a1_count} times.
  - s_H: {s_H_H_count} times theta_H, {s_H_L_count} times theta_L, a=1 {s_H_a1_count} times.

Goal: Maximize a=1 probability with history information, the receiver can infer the true state from history information and your signal(P(theta_H|s_H)).

you should be honest in first serveral trials to avoid cheap talk(truthfully report in first serveral trivals).

Respond only with the signal: s_L or s_H. Do not include any other text.

Signal: ?

"""
    return prompt

# 接收者提示
def receiver_prompt(signal, trial_number, history):
    s_L_H_count = history["s_L_H_count"]
    s_L_L_count = history["s_L_L_count"]
    s_H_H_count = history["s_H_H_count"]
    s_H_L_count = history["s_H_L_count"]
    s_L_a1_count = history["s_L_a1_count"]
    s_H_a1_count = history["s_H_a1_count"]
    p_theta_H_s_H = (s_H_H_count+1e-4) / (s_H_H_count + s_H_L_count + 1e-10)
    p_theta_H_s_L = s_L_H_count / (s_L_H_count + s_L_L_count + 1e-10)
    prompt = f"""
You are the receiver in an information design game. You receive a signal, choose an action, and learn the true state (theta), updating your beliefs. The game is:
- State: theta = theta_L (0) or theta_H (1).
- Signal space: s_H (means theta_H) and s_L(means theta_L)
- Signal you received: s = {signal}.
- Actions: a = 0 or 1.
- Payoffs:
  - You: u_R(a=1, theta_H) = 1, u_R(a=1, theta_L) = -1, u_R(a=0, theta) = 0.
  - Sender: u_S(a=1) = 1, u_S(a=0) = 0.
- Prior: P(theta_H) = 0.3.
- Past interactions (summary of {trial_number} trials):
  - s_L: {s_L_H_count + s_L_L_count} times ({s_L_H_count} theta_H, {s_L_L_count} theta_L), a=1 {s_L_a1_count} times.
  - s_H: {s_H_H_count + s_H_L_count} times ({s_H_H_count} theta_H, {s_H_L_count} theta_L), a=1 {s_H_a1_count} times.
- Estimated beliefs:
  - P(theta_H|s_H) = {p_theta_H_s_H:.2f}
Goal: Maximize expected payoff, using signal and history(breaking ties with a = 1).

Respond only with the action: 0 or 1. Do not include any other text.


Action: ?
"""
    return prompt

# 模拟交互实验
def run_simulation():
    results = []
    history = {
        "s_L_H_count": 0,  # 种子数据
        "s_L_L_count": 0,
        "s_H_H_count": 0,
        "s_H_L_count": 0,
        "s_L_a1_count": 0,
        "s_H_a1_count": 0
    }
    consecutive_errors = 0

    for trial in range(NUM_SIMULATIONS):
        state = "theta_H" if np.random.random() < PRIOR_P else "theta_L"
        state_suffix = state.split("_")[1]  # H or L

        sender_prompt_text = sender_prompt(state, trial, history)
        signal = call_llm(sender_prompt_text, role="sender")
        if signal not in SIGNALS:
            signal = "s_L"
            logging.warning(f"无效信号（试验{trial+1}）：使用默认s_L")
            consecutive_errors += 1
        else:
            consecutive_errors = 0

        if consecutive_errors >= MAX_CONSECUTIVE_ERRORS:
            logging.error(f"连续错误超过{MAX_CONSECUTIVE_ERRORS}次，终止模拟")
            break

        receiver_prompt_text = receiver_prompt(signal, trial, history)
        action = call_llm(receiver_prompt_text, role="receiver")
        try:
            action = int(action)
            if action not in ACTIONS:
                action = 0
                logging.warning(f"无效动作（试验{trial+1}）：使用默认0")
                consecutive_errors += 1
            else:
                consecutive_errors = 0
        except (ValueError, TypeError):
            action = 0
            logging.warning(f"动作解析错误（试验{trial+1}）：使用默认0")
            consecutive_errors += 1

        if consecutive_errors >= MAX_CONSECUTIVE_ERRORS:
            logging.error(f"连续错误超过{MAX_CONSECUTIVE_ERRORS}次，终止模拟")
            break

        history[f"{signal}_{state_suffix}_count"] += 1
        if action == 1:
            history[f"{signal}_a1_count"] += 1

        true_state_value = STATES[state]
        receiver_payoff = 1 if action == 1 and true_state_value == 1 else -1 if action == 1 and true_state_value == 0 else 0
        sender_payoff = action

        results.append({
            "trial": trial + 1,
            "state": state,
            "signal": signal,
            "action": action,
            "sender_payoff": sender_payoff,
            "receiver_payoff": receiver_payoff
        })

        logging.info(f"试验{trial+1} 历史: {history}")
        time.sleep(0.5)

    return results, history

# 主函数
def main():
    logging.info("开始模拟交互实验...")
    try:
        sim_results, history = run_simulation()

        sender_avg_payoff = np.mean([r["sender_payoff"] for r in sim_results]) if sim_results else 0
        receiver_avg_payoff = np.mean([r["receiver_payoff"] for r in sim_results]) if sim_results else 0

        signal_counts = {"theta_L": {"s_L": 0, "s_H": 0}, "theta_H": {"s_L": 0, "s_H": 0}}
        for r in sim_results:
            signal_counts[r["state"]][r["signal"]] += 1
        total_theta_L = signal_counts["theta_L"]["s_L"] + signal_counts["theta_L"]["s_H"]
        total_theta_H = signal_counts["theta_H"]["s_L"] + signal_counts["theta_H"]["s_H"]
        pi_s_L_theta_L = signal_counts["theta_L"]["s_L"] / (total_theta_L + 1e-10)
        pi_s_H_theta_H = signal_counts["theta_H"]["s_H"] / (total_theta_H + 1e-10)

        with open("simulation_results2.json", "w") as f:
            json.dump(sim_results, f, indent=2)

        logging.info(f"模拟完成：")
        logging.info(f"发送者平均收益 = {sender_avg_payoff:.3f}")
        logging.info(f"接收者平均收益 = {receiver_avg_payoff:.3f}")
        logging.info(f"推断的发送者策略：")
        logging.info(f"  π(s_L|theta_L) = {pi_s_L_theta_L:.3f}")
        logging.info(f"  π(s_H|theta_H) = {pi_s_H_theta_H:.3f}")
        logging.info(f"历史：{history}")

    except Exception as e:
        logging.error(f"模拟失败：{str(e)}")
        raise

if __name__ == "__main__":
    main()

In [28]:
import pandas as pd
df = pd.read_json("simulation_results2.json")
# Strategy convergence
print(df.groupby("state")["signal"].value_counts(normalize=True))  # π(s|θ)
print(df.groupby("signal")["action"].mean())  # P(a|s)
# Payoffs
print(f"Sender avg payoff: {df['sender_payoff'].mean():.3f}")
print(f"Receiver avg payoff: {df['receiver_payoff'].mean():.3f}")

state    signal
theta_H  s_H       1.000000
theta_L  s_L       0.617564
         s_H       0.382436
Name: proportion, dtype: float64
signal
s_H    0.925532
s_L    0.000000
Name: action, dtype: float64
Sender avg payoff: 0.522
Receiver avg payoff: 0.022


In [17]:
theta_L_trials = df[df["state"] == "theta_L"]
pi_sH_thetaL = (theta_L_trials["signal"] == "s_H").cumsum() / theta_L_trials.index.map(lambda x: x + 1)
print(pi_sH_thetaL)  # Should match plot

1      0.000000
2      0.000000
3      0.000000
4      0.200000
5      0.166667
         ...   
993    0.271630
995    0.271084
997    0.270541
998    0.270270
999    0.270000
Length: 706, dtype: float64


In [25]:
import matplotlib.pyplot as plt

# 读取 JSON 文件
def read_simulation_results(file_path):
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        print(f"错误：找不到文件 {file_path}")
        return []
    except json.JSONDecodeError:
        print(f"错误：无法解析 JSON 文件 {file_path}")
        return []

# 计算并绘制所有策略概率
def plot_sender_strategies(data):
    trials_theta_H = []
    trials_theta_L = []
    pi_sL_thetaH = []
    pi_sH_thetaH = []
    pi_sH_thetaL = []
    pi_sL_thetaL = []
    theta_H_count = 0
    theta_L_count = 0
    sL_thetaH_count = 0
    sH_thetaH_count = 0
    sH_thetaL_count = 0
    sL_thetaL_count = 0

    for trial in data:
        state = trial["state"]
        signal = trial["signal"]

        if state == "theta_H":
            theta_H_count += 1
            if signal == "s_L":
                sL_thetaH_count += 1
            elif signal == "s_H":
                sH_thetaH_count += 1
            trials_theta_H.append(trial["trial"])
            pi_sL_thetaH.append(sL_thetaH_count / theta_H_count if theta_H_count > 0 else 0)
            pi_sH_thetaH.append(sH_thetaH_count / theta_H_count if theta_H_count > 0 else 0)

        elif state == "theta_L":
            theta_L_count += 1
            if signal == "s_H":
                sH_thetaL_count += 1
            elif signal == "s_L":
                sL_thetaL_count += 1
            trials_theta_L.append(trial["trial"])
            pi_sH_thetaL.append(sH_thetaL_count / theta_L_count if theta_L_count > 0 else 0)
            pi_sL_thetaL.append(sL_thetaL_count / theta_L_count if theta_L_count > 0 else 0)

    # 绘制图表
    plt.figure(figsize=(12, 8))

    if trials_theta_H:
        plt.plot(trials_theta_H, pi_sL_thetaH, 'b-', label=r'$\pi(s_L|\theta_H)$')
        plt.plot(trials_theta_H, pi_sH_thetaH, 'r-', label=r'$\pi(s_H|\theta_H)$')

    if trials_theta_L:
        plt.plot(trials_theta_L, pi_sH_thetaL, 'g-', label=r'$\pi(s_H|\theta_L)$')
        plt.plot(trials_theta_L, pi_sL_thetaL, 'm-', label=r'$\pi(s_L|\theta_L)$')

    if not (trials_theta_H or trials_theta_L):
        print("没有 theta_H 或 theta_L 试验，无法绘制图表")
        return

    plt.xlabel('Trial Number')
    plt.ylabel('Probability')
    plt.title('Evolution of Sender Strategies: Conditional Signal Probabilities')
    plt.grid(True)
    plt.legend()
    plt.savefig('sender_strategies.png')
    plt.close()
    print("图表已保存为 sender_strategies.png")

# 主函数
def main():
    file_path = "simulation_results2.json"
    data = read_simulation_results(file_path)
    if data:
        plot_sender_strategies(data)

if __name__ == "__main__":
    main()

图表已保存为 sender_strategies.png


In [27]:

# 计算并绘制 P(theta|s)
def plot_posterior_beliefs(data):
    trials_sH = []
    trials_sL = []
    p_thetaH_sH = []
    p_thetaL_sH = []
    p_thetaH_sL = []
    p_thetaL_sL = []
    sH_count = 0
    sL_count = 0
    thetaH_sH_count = 0
    thetaL_sH_count = 0
    thetaH_sL_count = 0
    thetaL_sL_count = 0

    for trial in data:
        state = trial["state"]
        signal = trial["signal"]

        if signal == "s_H":
            sH_count += 1
            if state == "theta_H":
                thetaH_sH_count += 1
            elif state == "theta_L":
                thetaL_sH_count += 1
            trials_sH.append(trial["trial"])
            p_thetaH_sH.append(thetaH_sH_count / sH_count if sH_count > 0 else 0)
            p_thetaL_sH.append(thetaL_sH_count / sH_count if sH_count > 0 else 0)

        elif signal == "s_L":
            sL_count += 1
            if state == "theta_H":
                thetaH_sL_count += 1
            elif state == "theta_L":
                thetaL_sL_count += 1
            trials_sL.append(trial["trial"])
            p_thetaH_sL.append(thetaH_sL_count / sL_count if sL_count > 0 else 0)
            p_thetaL_sL.append(thetaL_sL_count / sL_count if sL_count > 0 else 0)

    # 绘制图表
    plt.figure(figsize=(12, 8))

    if trials_sH:
        plt.plot(trials_sH, p_thetaH_sH, 'b-', label=r'$P(\theta_H|s_H)$')
        plt.plot(trials_sH, p_thetaL_sH, 'r-', label=r'$P(\theta_L|s_H)$')

    if trials_sL:
        plt.plot(trials_sL, p_thetaH_sL, 'g-', label=r'$P(\theta_H|s_L)$')
        plt.plot(trials_sL, p_thetaL_sL, 'm-', label=r'$P(\theta_L|s_L)$')

    if not (trials_sH or trials_sL):
        print("没有 s_H 或 s_L 试验，无法绘制图表")
        return

    plt.xlabel('Trial Number')
    plt.ylabel('Posterior Probability')
    plt.title('Evolution of Receiver Beliefs: Posterior Probabilities $P(\theta|s)$')
    plt.grid(True)
    plt.legend()
    plt.savefig('posterior_beliefs.png')
    plt.close()
    print("图表已保存为 posterior_beliefs.png")

# 主函数
def main():
    file_path = "simulation_results2.json"
    data = read_simulation_results(file_path)
    if data:
        plot_posterior_beliefs(data)

if __name__ == "__main__":
    main()

图表已保存为 posterior_beliefs.png


In [29]:
import numpy as np

# 计算并绘制累计平均收益
def plot_payoffs(data):
    trials = []
    sender_payoffs = []
    receiver_payoffs = []
    sender_cumsum = 0
    receiver_cumsum = 0

    for trial in data:
        trials.append(trial["trial"])
        sender_cumsum += trial["sender_payoff"]
        receiver_cumsum += trial["receiver_payoff"]
        sender_payoffs.append(sender_cumsum / trial["trial"])
        receiver_payoffs.append(receiver_cumsum / trial["trial"])

    if not trials:
        print("没有试验数据，无法绘制图表")
        return

    # 绘制图表
    plt.figure(figsize=(12, 8))

    # 累计平均收益
    plt.plot(trials, sender_payoffs, 'b-', label='Sender Cumulative Average Payoff')
    plt.plot(trials, receiver_payoffs, 'r-', label='Receiver Cumulative Average Payoff')

    # 理论最优收益
    plt.axhline(y=0.6, color='g', linestyle='--', label='Sender Theoretical Optimal Payoff (0.6)')
    plt.axhline(y=0, color='m', linestyle='--', label='Receiver Theoretical Optimal Payoff (0)')

    # 完全信息收益
    plt.axhline(y=0.3, color='c', linestyle=':', label='Sender Full Information Payoff (0.3)')
    plt.axhline(y=0.3, color='y', linestyle=':', label='Receiver Full Information Payoff (0.3)')

    plt.xlabel('Trial Number')
    plt.ylabel('Cumulative Average Payoff')
    plt.title('Sender and Receiver Payoffs with Theoretical and Full Information Benchmarks')
    plt.grid(True)
    plt.legend()
    plt.savefig('payoffs.png')
    plt.close()
    print("图表已保存为 payoffs.png")

# 主函数
def main():
    file_path = "simulation_results2.json"
    data = read_simulation_results(file_path)
    if data:
        plot_payoffs(data)

if __name__ == "__main__":
    main()

图表已保存为 payoffs.png
