In [1]:
import pandas as pd
import numpy as np

# ============================================================
# CONFIG
# ============================================================
INPUT_FILE = "prelims32.csv"
OUTPUT_FILE = "CH14_Final_Race_Prediction.csv"

SIGMA_PRELIM = 0.06
SIGMA_SEMI = 0.05
POST_STD_FINAL = 0.05     # posterior uncertainty for final

N_FINAL_SIMS = 20000

np.random.seed(2025)

# ============================================================
# Load data
# ============================================================
df = pd.read_csv(INPUT_FILE)
df.columns = [c.strip() for c in df.columns]

df["SeedTime"] = df["SeedTime"].astype(float)
df["SimTime"] = df["SimTime"].astype(float)

# ============================================================
# Likelihood model (Chapter 14)
# ============================================================
def sample_time(mu, sigma):
    return np.random.normal(mu, sigma)

# ============================================================
# 1) PRELIMS
# ============================================================
df["prelimresult"] = [
    sample_time(
        mu=0.5 * r["SeedTime"] + 0.5 * r["SimTime"],
        sigma=SIGMA_PRELIM
    )
    for _, r in df.iterrows()
]

df = df.sort_values("prelimresult").reset_index(drop=True)
df.to_csv("CH14_BN_Inference_Prelims.csv", index=False)

print("\n=== PRELIM RESULTS ===")
print(df[["Athlete ID", "Name", "prelimresult"]])

# ============================================================
# 2) SEMIFINALS ‚Äî top 16
# ============================================================
semi_df = df.head(16).copy()
semi_df["Heat"] = ["Semi1"] * 8 + ["Semi2"] * 8

semi_df["semifinalresult"] = [
    sample_time(
        mu=0.6 * r["prelimresult"] + 0.4 * r["SimTime"],
        sigma=SIGMA_SEMI
    )
    for _, r in semi_df.iterrows()
]

semi_df = semi_df.sort_values("semifinalresult").reset_index(drop=True)

print("\n=== SEMIFINAL RESULTS ===")
print(semi_df[["Athlete ID", "Name", "Heat", "semifinalresult"]])

# ============================================================
# 3) FINALS ‚Äî top 8
# ============================================================
df_planned = semi_df.head(8).copy()
df_planned = df_planned.reset_index(drop=True)

# ============================================================
# 4) CHAPTER 14 ‚Äî Posterior Predictive Simulation for Final
# ============================================================

def run_final_sim(df_final, n_sims):
    win_counts = {aid: 0 for aid in df_final["Athlete ID"]}
    medal_counts = {aid: 0 for aid in df_final["Athlete ID"]}
    time_accumulator = {aid: [] for aid in df_final["Athlete ID"]}

    for _ in range(n_sims):
        sim_times = {}

        for _, r in df_final.iterrows():
            # Posterior mean incorporates ALL previous evidence
            post_mean = (
                0.25 * r["SeedTime"]
                + 0.25 * r["SimTime"]
                + 0.25 * r["prelimresult"]
                + 0.25 * r["semifinalresult"]
            )

            t = sample_time(post_mean, POST_STD_FINAL)
            sim_times[r["Athlete ID"]] = t
            time_accumulator[r["Athlete ID"]].append(t)

        ranked = sorted(sim_times.items(), key=lambda x: x[1])

        win_counts[ranked[0][0]] += 1
        for aid, _ in ranked[:3]:
            medal_counts[aid] += 1

    win_prob = {aid: win_counts[aid] / n_sims for aid in win_counts}
    medal_prob = {aid: medal_counts[aid] / n_sims for aid in medal_counts}
    pred_time = {
        aid: float(np.mean(times))
        for aid, times in time_accumulator.items()
    }

    return win_prob, medal_prob, pred_time


# Posterior mean/std explicitly stored (for clarity)
df_planned["PostMean"] = (
    0.25 * df_planned["SeedTime"]
    + 0.25 * df_planned["SimTime"]
    + 0.25 * df_planned["prelimresult"]
    + 0.25 * df_planned["semifinalresult"]
)
df_planned["PostStd"] = POST_STD_FINAL

win_prob, medal_prob, pred_time = run_final_sim(df_planned, N_FINAL_SIMS)

df_planned["ProbGold"] = df_planned["Athlete ID"].map(win_prob)
df_planned["ProbMedal"] = df_planned["Athlete ID"].map(medal_prob)
df_planned["PredictedFinalTime"] = df_planned["Athlete ID"].map(pred_time)

# ============================================================
# Output final prediction
# ============================================================
df_out = df_planned.sort_values(by="ProbGold", ascending=False).reset_index(drop=True)

df_out.to_csv(OUTPUT_FILE, index=False)

print("\n=== FINAL RACE PREDICTION ===")
print(df_out[[
    "Athlete ID", "Name", "School",
    "ProbGold", "ProbMedal", "PredictedFinalTime"
]])

print("\nüèÜ CHAPTER 14 PREDICTED WINNER üèÜ")
print(f"{df_out.iloc[0]['Name']} ({df_out.iloc[0]['School']})")



=== PRELIM RESULTS ===
    Athlete ID              Name  prelimresult
0            2  Lucas Anderson       10.269667
1            7  Noah Smith           10.319010
2            3  Carlos Smith         10.351456
3            1  Jacob Martinez       10.364457
4            5  Owen Martinez        10.393303
5            6  Ryan Garcia          10.408492
6            4  Noah Brown           10.412558
7           10  Lucas Garcia         10.416175
8           12  Brandon Garcia       10.457597
9            9  Carlos Johnson       10.459809
10          20  Owen Rivera          10.468272
11          22  John Davis           10.470141
12          18  Mateo Lee            10.471815
13          15  Brandon Rivera       10.480195
14          21  Lucas Lopez          10.502308
15          28  Alex Anderson        10.512458
16          14  Brandon Johnson      10.526719
17          23  Noah Rivera          10.532445
18          33  Carlos Rivera        10.536815
19          13  Ryan Davis          