In [3]:
import numpy as np
import matplotlib.pyplot as plt

from mech_env import SingleItemAuctionMechEnv
from mech_myerson_rl import ReservePolicy  # 你训练好的策略

# 理论 Myerson 单物品 U[0,1]
def myerson_alloc_pay(valuations, reserve=0.5):
    B, n = valuations.shape
    alloc = np.zeros((B, n))
    pay = np.zeros((B, n))

    order = np.argsort(-valuations, axis=1)
    top1_idx = order[:, 0]
    top2_val = np.where(n >= 2, valuations[np.arange(B), order[:, 1]], 0.0)
    top1_val = valuations[np.arange(B), top1_idx]

    sale_mask = top1_val >= reserve
    alloc[np.arange(B), top1_idx] = sale_mask.astype(float)
    prices = np.maximum(reserve, top2_val) * sale_mask.astype(float)
    pay[np.arange(B), top1_idx] = prices
    return alloc, pay

def revenue(alloc, pay):
    return pay.sum(axis=1).mean()

def efficiency(valuations, alloc):
    B, n = valuations.shape
    top1_true = np.argmax(valuations, axis=1)
    winner = np.argmax(alloc, axis=1)
    sale_mask = alloc.sum(axis=1) > 0.5
    return ((sale_mask) & (winner == top1_true)).mean()


In [1]:
#生成样本并评估Myerson
rng = np.random.RandomState(123)
B = 100000
n = 5
vals = rng.rand(B, n)

alloc_m, pay_m = myerson_alloc_pay(vals, reserve=0.5)
rev_m = revenue(alloc_m, pay_m)
eff_m = efficiency(vals, alloc_m)

print("Myerson:")
print("  revenue =", rev_m)
print("  efficiency =", eff_m)


NameError: name 'np' is not defined

加载训练好的机制 RL 策略并评估(训练完后把 ReservePolicy 的参数存成了一个 policy.pt)

In [None]:
import torch
from mech_myerson_rl import ReservePolicy

policy = ReservePolicy()
policy.load_state_dict(torch.load("policy.pt"))
policy.eval()

with torch.no_grad():
    obs = torch.zeros((B, 1), dtype=torch.float32)
    r_tensor = policy(obs)
    r_hat = float(r_tensor.mean().item())
print("RL learned reserve ≈", r_hat)

# 用 RL 学出来的保留价跑一遍
alloc_rl, pay_rl = myerson_alloc_pay(vals, reserve=r_hat)
rev_rl = revenue(alloc_rl, pay_rl)
eff_rl = efficiency(vals, alloc_rl)

print("RL mechanism (plug-in reserve):")
print("  revenue =", rev_rl)
print("  efficiency =", eff_rl)


可视化价格分布

In [None]:
def prices_from_pay(pay):
    return pay.max(axis=1)

p_m = prices_from_pay(pay_m)
p_rl = prices_from_pay(pay_rl)

plt.figure()
plt.hist(p_m, bins=50, alpha=0.7, density=True)
plt.axvline(0.5, linestyle='--')
plt.title("Price distribution — Myerson (reserve=0.5)")
plt.show()

plt.figure()
plt.hist(p_rl, bins=50, alpha=0.7, density=True)
plt.axvline(r_hat, linestyle='--')
plt.title(f"Price distribution — RL mechanism (reserve≈{r_hat:.2f})")
plt.show()
