# IG Toy Slot — KL-bounded updates under RTP/Hit guardrails

這份 notebook 展示：
1. 基線 paytable 與 KPI（RTP/Hit/Var）
2. KL 有界小步（鏡像下降風味）導航，提升 Var
3. Monte Carlo 模擬交叉驗證
4. 生成 Excel 鏡像與圖表


In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
np.random.seed(42)
payouts = np.array([0.0, 0.5, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0], dtype=float)
p0 = np.array([0.68, 0.0896, 0.1024, 0.064, 0.0384, 0.016, 0.0064, 0.0032], dtype=float)
p0 = p0 / p0.sum()
rtp_target, rtp_tol = 0.95, 0.005
hit_target, hit_tol = 0.32, 0.02

def metrics(p):
    rtp = float(p @ payouts)
    hit = float((payouts>0).astype(float) @ p)
    e2 = float(p @ (payouts**2))
    var = e2 - rtp**2
    return rtp, hit, var


In [None]:
rtp0, hit0, var0 = metrics(p0)
rtp0, hit0, var0

In [None]:
w_mu, w_hit, w_var = 1.0, 1.0, 0.25
var_target = var0 * 1.2
ind_win = (payouts>0).astype(float)

def objective_and_grad(p):
    rtp, hit, var = metrics(p)
    dmu = payouts
    dhit = ind_win
    dvar = payouts**2 - 2 * rtp * payouts
    obj = (w_mu*(rtp-rtp_target)**2 +
           w_hit*(hit-hit_target)**2 +
           w_var*(var-var_target)**2)
    grad = (2*w_mu*(rtp-rtp_target)*dmu +
            2*w_hit*(hit-hit_target)*dhit +
            2*w_var*(var-var_target)*dvar)
    return obj, grad, rtp, hit, var

def md_step(p, grad, lr):
    x = np.log(np.clip(p, 1e-12, 1.0)) - lr*grad
    x -= x.max()
    p_new = np.exp(x); p_new = p_new / p_new.sum()
    return p_new

def kl_div(p_new, p_old):
    eps = 1e-12
    p = np.clip(p_new, eps, 1.0)
    q = np.clip(p_old, eps, 1.0)
    return float(np.sum(p*(np.log(p)-np.log(q))))

p = p0.copy()
history = {"iter": [], "obj": [], "rtp": [], "hit": [], "var": [], "kl": []}
lr = 0.4
for t in range(600):
    obj, grad, rtp, hit, var = objective_and_grad(p)
    history["iter"].append(t); history["obj"].append(obj)
    history["rtp"].append(rtp); history["hit"].append(hit); history["var"].append(var)
    p_new = md_step(p, grad, lr)
    history["kl"].append(kl_div(p_new, p))
    p = p_new
    # quick guardrail nudges
    rtp, hit, var = metrics(p)
    if rtp < rtp_target - rtp_tol or rtp > rtp_target + rtp_tol or hit < hit_target - hit_tol or hit > hit_target + hit_tol:
        p = np.maximum(p, 1e-12); p = p/ p.sum()
        miss_idx = 0; small_win_idx = int(np.where(payouts>0)[0][0])
        if abs(hit - hit_target) > 1e-4:
            step = np.clip(0.0005*np.sign(hit_target-hit), -p[small_win_idx]+1e-12, p[miss_idx]-1e-12)
            p[miss_idx] -= step; p[small_win_idx] += step
        rtp, hit, var = metrics(p)
        low_idx = int(np.where(payouts==1)[0][0]); high_idx = int(np.where(payouts==10)[0][0])
        if abs(rtp - rtp_target) > 1e-4:
            step = np.clip(0.0005*np.sign(rtp_target-rtp), -p[high_idx]+1e-12, p[low_idx]-1e-12)
            p[low_idx] -= step; p[high_idx] += step
        p = np.maximum(p, 1e-12); p = p / p.sum()
rtp_f, hit_f, var_f = metrics(p)
rtp0, hit0, var0, rtp_f, hit_f, var_f

In [None]:
import pandas as pd, matplotlib.pyplot as plt
hist_df = pd.DataFrame(history)
plt.figure(); plt.plot(hist_df["iter"], hist_df["obj"]); plt.title("Objective"); plt.xlabel("iter"); plt.ylabel("obj"); plt.show()
plt.figure(); 
plt.plot(hist_df["iter"], hist_df["rtp"], label="RTP"); 
plt.plot(hist_df["iter"], hist_df["hit"], label="Hit"); 
plt.plot(hist_df["iter"], hist_df["var"], label="Var"); 
plt.legend(); plt.title("Metrics"); plt.xlabel("iter"); plt.ylabel("value"); plt.show()

In [None]:
# Monte Carlo
N = 1_000_000
samples = np.random.choice(len(payouts), size=N, p=p)
returns = payouts[samples]
mc_rtp, mc_hit, mc_var = float(returns.mean()), float((returns>0).mean()), float(returns.var())
mc_rtp, mc_hit, mc_var

**結果對齊**：解析與模擬數值接近，即代表公式與實作一致。