# Predictions

In [36]:
# === Recompute Best Probability & Best Value (with LA/NY name fix + diagnostics) ===
import numpy as np
import pandas as pd
from pathlib import Path
from IPython.display import display
import re

# -------- Params --------
YEAR = 2025
WEEK = 4                 # <- change as needed
USED_TEAMS = {"Arizona", "LA Rams", "Seattle"}     # <- e.g. {"Kansas City","Philadelphia"}
TOP_N = 5              # show many rows if you want
DATA = Path("../data")

# -------- Canonical mapping (same everywhere) --------
TEAM_KEY = {
    # NFC
    "Arizona Cardinals":"Arizona","Arizona":"Arizona",
    "Atlanta Falcons":"Atlanta","Atlanta":"Atlanta",
    "Carolina Panthers":"Carolina","Carolina":"Carolina",
    "Chicago Bears":"Chicago","Chicago":"Chicago",
    "Dallas Cowboys":"Dallas","Dallas":"Dallas",
    "Detroit Lions":"Detroit","Detroit":"Detroit",
    "Green Bay Packers":"Green Bay","Green Bay":"Green Bay",
    "Los Angeles Rams":"LA Rams","L.A. Rams":"LA Rams","LA Rams":"LA Rams","Rams":"LA Rams",
    "Minnesota Vikings":"Minnesota","Minnesota":"Minnesota",
    "New Orleans Saints":"New Orleans","New Orleans":"New Orleans",
    "New York Giants":"NY Giants","NY Giants":"NY Giants","Giants":"NY Giants",
    "Philadelphia Eagles":"Philadelphia","Philadelphia":"Philadelphia",
    "San Francisco 49ers":"San Francisco","San Francisco":"San Francisco","49ers":"San Francisco",
    "Seattle Seahawks":"Seattle","Seattle":"Seattle",
    "Tampa Bay Buccaneers":"Tampa Bay","Tampa Bay":"Tampa Bay","Buccaneers":"Tampa Bay","Bucs":"Tampa Bay",
    "Washington Commanders":"Washington","Washington":"Washington",
    # AFC
    "Baltimore Ravens":"Baltimore","Baltimore":"Baltimore",
    "Buffalo Bills":"Buffalo","Buffalo":"Buffalo",
    "Cincinnati Bengals":"Cincinnati","Cincinnati":"Cincinnati",
    "Cleveland Browns":"Cleveland","Cleveland":"Cleveland",
    "Denver Broncos":"Denver","Denver":"Denver",
    "Houston Texans":"Houston","Houston":"Houston",
    "Indianapolis Colts":"Indianapolis","Indianapolis":"Indianapolis",
    "Jacksonville Jaguars":"Jacksonville","Jacksonville":"Jacksonville","Jaguars":"Jacksonville","Jags":"Jacksonville",
    "Kansas City Chiefs":"Kansas City","Kansas City":"Kansas City",
    "Las Vegas Raiders":"Las Vegas","Las Vegas":"Las Vegas","Raiders":"Las Vegas",
    "Los Angeles Chargers":"LA Chargers","L.A. Chargers":"LA Chargers","LA Chargers":"LA Chargers","Chargers":"LA Chargers",
    "Miami Dolphins":"Miami","Miami":"Miami",
    "New England Patriots":"New England","New England":"New England",
    "New York Jets":"NY Jets","NY Jets":"NY Jets","Jets":"NY Jets",
    "Pittsburgh Steelers":"Pittsburgh","Pittsburgh":"Pittsburgh",
    "Tennessee Titans":"Tennessee","Tennessee":"Tennessee",
}

# -------- Helpers --------
def clean(s): 
    return re.sub(r"\s+"," ", str(s or "")).strip()

def to_key_name(s):
    # Normalize punctuation then map to city-style key
    s = clean(s).replace("N.Y.","NY").replace("L.A.","LA")
    return TEAM_KEY.get(s, s)

def ml_to_prob(ml):
    try:
        ml = float(ml)
    except (TypeError, ValueError):
        return np.nan
    return (-ml)/((-ml)+100.0) if ml < 0 else 100.0/(ml+100.0)

def rank_to_score_series(s):
    """rank 1(best)..32(worst) -> score 32(best)..1(worst)"""
    s = pd.to_numeric(s, errors="coerce")
    return 33 - s

def logistic_expected_prob(delta, k=0.085):
    """Map rank-score differential (team - opp) -> expected win prob via logistic."""
    x = pd.to_numeric(delta, errors="coerce").fillna(0.0)
    x = np.clip(x, -50, 50)
    return 1.0 / (1.0 + np.exp(-k * x))

def safe_read_csv(path):
    try:
        return pd.read_csv(path)
    except Exception:
        return pd.DataFrame()

# -------- Load base team-week (and normalize names just in case) --------
tw = pd.read_csv(DATA / f"results_teamweek_{YEAR}.csv")
for col in ["team","opponent"]:
    if col in tw.columns:
        tw[col] = tw[col].map(to_key_name)

# Ensure is_home / is_away exist (new results may not have them)
if "is_home" not in tw.columns:
    tw["is_home"] = (tw["home_away"] == "H").astype(int)
if "is_away" not in tw.columns:
    tw["is_away"] = (tw["home_away"] == "A").astype(int)

keep_cols = ["week","team","opponent","home_away","is_home","is_away","completed",
             "game_id","game_id_api","start_time","margin","team_score","opp_score","win","loss"]
tw = tw[[c for c in keep_cols if c in tw.columns]].copy()

# -------- Load odds (short/preferred) and normalize names --------
odds = pd.read_csv(DATA / f"odds_long_{YEAR}.csv")
if "win_prob" not in odds.columns:
    if "win_prob_book" in odds.columns:
        odds = odds.rename(columns={"win_prob_book":"win_prob"})
    else:
        odds["win_prob"] = odds["ml"].map(ml_to_prob)

for col in ["team","opponent"]:
    if col in odds.columns:
        odds[col] = odds[col].map(to_key_name)

odds = odds[[c for c in ["week","team","opponent","home_away","ml","win_prob","book","commence_time"]
             if c in odds.columns]].copy()

# -------- Merge odds -> team-week --------
keys = ["week","team","opponent","home_away"]
cand = tw.merge(odds, on=keys, how="left")

# Fill missing with complement from opponent row (P = 1 - P(opp))
tmp = cand[keys + ["win_prob"]].rename(columns={
    "team":"opponent","opponent":"team","home_away":"home_away_opp","win_prob":"opp_win_prob"})
tmp["home_away"] = np.where(tmp["home_away_opp"]=="H","A","H")
tmp = tmp.drop(columns=["home_away_opp"])
cand = cand.merge(tmp, on=keys, how="left")
cand["win_prob"] = np.where(cand["win_prob"].isna() & cand["opp_win_prob"].notna(),
                            1.0 - cand["opp_win_prob"], cand["win_prob"])
cand = cand.drop(columns=["opp_win_prob"])

# Backstop: compute from ml when present
if "ml" in cand.columns:
    need = cand["win_prob"].isna() & cand["ml"].notna()
    cand.loc[need, "win_prob"] = cand.loc[need, "ml"].map(ml_to_prob)

# -------- Load rankings and compute expected prob from ranks --------
cbs = safe_read_csv(DATA / f"cbs_rank_{YEAR}.csv")
fox = safe_read_csv(DATA / f"fox_rank_{YEAR}.csv")
nfl = safe_read_csv(DATA / f"nfl_rank_{YEAR}.csv")

def attach_rank(df, src, col):
    if src.empty:
        return pd.Series(np.nan, index=df.index), pd.Series(np.nan, index=df.index)
    s_team = df[["team"]].merge(src.rename(columns={"team_key":"team"})[["team",col]],
                                on="team", how="left")[col]
    s_opp  = df[["opponent"]].merge(src.rename(columns={"team_key":"opponent"})[["opponent",col]],
                                    on="opponent", how="left")[col]
    return pd.to_numeric(s_team, errors="coerce"), pd.to_numeric(s_opp, errors="coerce")

cand["cbs_rank"], cand["cbs_rank_opp"] = attach_rank(cand, cbs, "cbs_rank")
cand["fox_rank"], cand["fox_rank_opp"] = attach_rank(cand, fox, "fox_rank")
cand["nfl_rank"], cand["nfl_rank_opp"] = attach_rank(cand, nfl, "nfl_rank")

team_scores = pd.concat([
    rank_to_score_series(cand["cbs_rank"]),
    rank_to_score_series(cand["fox_rank"]),
    rank_to_score_series(cand["nfl_rank"]),
], axis=1)

opp_scores = pd.concat([
    rank_to_score_series(cand["cbs_rank_opp"]),
    rank_to_score_series(cand["fox_rank_opp"]),
    rank_to_score_series(cand["nfl_rank_opp"]),
], axis=1)

cand["rank_score_avg"]     = team_scores.mean(axis=1, skipna=True)
cand["rank_score_avg_opp"] = opp_scores.mean(axis=1, skipna=True)
cand["rank_score_diff"]    = cand["rank_score_avg"] - cand["rank_score_avg_opp"]
cand["expected_prob_rank"] = logistic_expected_prob(cand["rank_score_diff"])

# -------- Filter for the target week & Survivor constraints --------
pool = cand[(cand["week"]==WEEK) & (~cand["team"].isin(USED_TEAMS))].copy()
# We DO NOT drop NaN win_prob so all teams remain visible

# -------- Outputs --------
# Best Probability: only rows that have win_prob
cols_prob = [c for c in ["week","team","opponent","home_away","is_home","win_prob","ml","book",
                         "rank_score_avg","rank_score_avg_opp","rank_score_diff","expected_prob_rank"]
             if c in pool.columns]
best_prob = (pool[pool["win_prob"].notna()]
             .sort_values(["win_prob","rank_score_diff"], ascending=[False, False])
             .loc[:, cols_prob]
             .head(TOP_N))

# Best Value: market vs rank expected (NaN value_score sinks to bottom)
pool["value_score"] = 0.7*pool["win_prob"] - 0.3*pool["expected_prob_rank"]
cols_val = [c for c in ["week","team","opponent","home_away","is_home","win_prob","expected_prob_rank",
                        "value_score","rank_score_avg","rank_score_avg_opp","rank_score_diff","ml","book"]
            if c in pool.columns]
best_value = (pool
              .sort_values(["value_score","win_prob"], ascending=[False, False], na_position="last")
              .loc[:, cols_val]
              .head(TOP_N))

print(f"Week {WEEK} — Best Probability (top {TOP_N})")
display(best_prob)

print(f"Week {WEEK} — Best Value (top {TOP_N})")
display(best_value)

# -------- Diagnostics (to catch any remaining name/merge issues) --------
missing_odds = pool[pool["win_prob"].isna()][["team","opponent","home_away"]].drop_duplicates()
print(f"Teams this week without odds rows: {len(missing_odds)}")
display(missing_odds.head(20))

anti = pool.merge(
    odds[["week","team","opponent","home_away"]],
    on=["week","team","opponent","home_away"],
    how="left", indicator=True
)
not_found = anti[anti["_merge"]=="left_only"][["team","opponent","home_away"]].drop_duplicates()
print("Rows in results with no matching odds (after normalization):")
display(not_found.head(20))

Week 4 — Best Probability (top 5)


Unnamed: 0,week,team,opponent,home_away,is_home,win_prob,ml,book,rank_score_avg,rank_score_avg_opp,rank_score_diff,expected_prob_rank
99,4,Buffalo,New Orleans,H,1,0.935484,-1450.0,draftkings,31.0,1.333333,29.666667,0.925647
106,4,Detroit,Cleveland,H,1,0.84252,-535.0,draftkings,29.5,9.0,20.5,0.851004
105,4,Denver,Cincinnati,H,1,0.82906,-485.0,draftkings,19.0,14.0,5.0,0.604679
108,4,Houston,Tennessee,H,1,0.803922,-410.0,draftkings,9.0,2.333333,6.666667,0.637994
107,4,Green Bay,Dallas,A,0,0.782609,-360.0,draftkings,26.0,10.0,16.0,0.79576


Week 4 — Best Value (top 5)


Unnamed: 0,week,team,opponent,home_away,is_home,win_prob,expected_prob_rank,value_score,rank_score_avg,rank_score_avg_opp,rank_score_diff,ml,book
105,4,Denver,Cincinnati,H,1,0.82906,0.604679,0.398938,19.0,14.0,5.0,-485.0,draftkings
99,4,Buffalo,New Orleans,H,1,0.935484,0.925647,0.377145,31.0,1.333333,29.666667,-1450.0,draftkings
108,4,Houston,Tennessee,H,1,0.803922,0.637994,0.371347,9.0,2.333333,6.666667,-410.0,draftkings
119,4,New England,Carolina,H,1,0.714286,0.492917,0.352125,8.666667,9.0,-0.333333,-250.0,draftkings
106,4,Detroit,Cleveland,H,1,0.84252,0.851004,0.334462,29.5,9.0,20.5,-535.0,draftkings


Teams this week without odds rows: 0


Unnamed: 0,team,opponent,home_away


Rows in results with no matching odds (after normalization):


Unnamed: 0,team,opponent,home_away


# Another prediction

In [38]:
# === Survivor Planner: future-aware pick with matchup edges & lookahead ===
import numpy as np
import pandas as pd
from pathlib import Path

# ---------------------------- CONFIG ---------------------------------------
YEAR       = 2025
THIS_WEEK  = 4                          # <- set current week
LOOKAHEAD  = 4                          # weeks to look ahead (3-6 is a good range)
BEAM_SIZE  = 80                         # search width (40-150 reasonable)
USED_TEAMS = set()                      # e.g., {"Kansas City", "Philadelphia"}

DATA = Path("../data")

# Weights for the single-week model score (tweak as you like)
W_MARKET   = 0.55   # market win prob (from moneyline)
W_MATCHUP  = 0.25   # pass/rush matchup edges
W_POWER    = 0.10   # average media power ranks (if available)
W_WL       = 0.06   # W/L to date (if available)
W_SOS      = 0.04   # strength of schedule to date (if available)

# Opportunity-cost penalty when using a team now instead of saving for later
OPP_PENALTY = 0.35  # higher -> more conservative (preserve future great spots)

# ---------------------- LOAD WHAT WE HAVE ----------------------------------
def safe_read_csv(p):
    try:
        return pd.read_csv(p)
    except Exception:
        return pd.DataFrame()

results = safe_read_csv(DATA / f"results_teamweek_{YEAR}.csv")  # from your ESPN results scraper
odds    = safe_read_csv(DATA / f"odds_long_{YEAR}.csv")         # short odds set you said we can use
off     = safe_read_csv(DATA / f"offense_power_{YEAR}.csv")     # offense ranks we built (rank_pass, rank_rush)
def_    = safe_read_csv(DATA / f"defense_power_{YEAR}.csv")     # defense ranks we built (rank_pass_def, rank_rush_def)

# Optional enrichments if present in your repo
power_avg = safe_read_csv(DATA / f"power_avg_{YEAR}.csv")       # columns: team, avg_power_rank (1 best)
tw_strength = safe_read_csv(DATA / f"team_strength_weekly_{YEAR}.csv")  # if you have it: wpct_to_date, sos_to_date, etc.

# ---------------------- NORMALIZE & PREP -----------------------------------
def to_prob_from_ml(ml):
    try:
        ml = float(ml)
    except Exception:
        return np.nan
    return (-ml)/((-ml)+100.0) if ml < 0 else 100.0/(ml+100.0)

# keep/lighten results
keep_cols = ["week","team","opponent","home_away"]
for c in ["is_home","is_away","completed","team_score","opp_score","win","loss","game_id","game_id_api","start_time"]:
    if c in results.columns:
        keep_cols.append(c)
results = results[keep_cols].copy()

# odds -> pick win_prob and keep minimal columns
if "win_prob" not in odds.columns:
    odds["win_prob"] = odds["ml"].map(to_prob_from_ml)
odds = odds[[c for c in ["week","team","opponent","home_away","ml","win_prob","book","commence_time"] if c in odds.columns]]

# offense/defense rank columns (flexible pickers)
def pick_col(df, includes, must_contain=None):
    cols = [c for c in df.columns if all(x in c.lower() for x in includes)]
    if must_contain:
        cols = [c for c in cols if must_contain in c.lower()]
    if not cols:
        return None
    cols.sort(key=len)
    return cols[0]

# Ensure team key
if "team_key" in off.columns and "team" not in off.columns:
    off = off.rename(columns={"team_key":"team"})
if "team_key" in def_.columns and "team" not in def_.columns:
    def_ = def_.rename(columns={"team_key":"team"})

# Expected column names from the power files we built earlier:
# offense: team, rank_pass, rank_rush
# defense: team, rank_pass_def, rank_rush_def
col_off_pass  = pick_col(off, ["pass"], must_contain="rank")  or "rank_pass"
col_off_rush  = pick_col(off, ["rush"], must_contain="rank")  or "rank_rush"
col_def_ppass = pick_col(def_,["pass","def"], must_contain="rank") or "rank_pass_def"
col_def_prush = pick_col(def_,["rush","def"], must_contain="rank") or "rank_rush_def"

for need, df_, name in [
    (col_off_pass,  off,  "offense pass rank"),
    (col_off_rush,  off,  "offense rush rank"),
    (col_def_ppass, def_, "defense pass rank"),
    (col_def_prush, def_, "defense rush rank"),
]:
    if need not in df_.columns:
        raise KeyError(f"Missing column for {name}. Available: {list(df_.columns)}")

# Optional average media power rank
if not power_avg.empty:
    if "team_key" in power_avg.columns and "team" not in power_avg.columns:
        power_avg = power_avg.rename(columns={"team_key":"team"})
    col_power = pick_col(power_avg, ["avg","power","rank"]) or "avg_power_rank"
    if col_power not in power_avg.columns:
        power_avg[col_power] = np.nan
    power_avg = power_avg[["team", col_power]].rename(columns={col_power:"avg_power_rank"})

# Optional W/L & SoS by team-week
if not tw_strength.empty:
    # try to detect useful fields; fallback to None
    c_wpct = pick_col(tw_strength, ["wpct","to","date"]) or pick_col(tw_strength, ["wpct"])
    c_sos  = pick_col(tw_strength, ["sos"])
    keep_s = ["week","team"]
    if c_wpct: keep_s.append(c_wpct)
    if c_sos: keep_s.append(c_sos)
    tw_strength = tw_strength[keep_s].rename(columns={(c_wpct or "wpct"): "wpct_to_date",
                                                      (c_sos or "sos"): "sos_to_date"})
else:
    tw_strength = pd.DataFrame()

# ---------------------- BUILD WEEKLY CANDIDATES ----------------------------
# Merge odds into results for alignment
key_cols = ["week","team","opponent","home_away"]
cand = results.merge(odds, on=key_cols, how="left", suffixes=("","_odds"))

# Pull team + opp offense/defense ranks
cand = cand.merge(off[["team", col_off_pass, col_off_rush]].rename(
    columns={col_off_pass:"rank_pass_off", col_off_rush:"rank_rush_off"}),
    on="team", how="left")

cand = cand.merge(def_[["team", col_def_ppass, col_def_prush]].rename(
    columns={col_def_ppass:"rank_pass_def", col_def_prush:"rank_rush_def"}),
    left_on="opponent", right_on="team", how="left", suffixes=("","_opp")).drop(columns=["team_opp"])

# Optional power ranks
if not power_avg.empty:
    cand = cand.merge(power_avg, on="team", how="left")
    cand = cand.merge(power_avg.rename(columns={"team":"opponent","avg_power_rank":"avg_power_rank_opp"}),
                      on="opponent", how="left")

# Optional W/L & SoS
if not tw_strength.empty:
    cand = cand.merge(tw_strength, on=["week","team"], how="left")
    cand = cand.merge(tw_strength.rename(columns={
        "team":"opponent","wpct_to_date":"wpct_to_date_opp","sos_to_date":"sos_to_date_opp"
    }), on=["week","opponent"], how="left")

# Compute matchup edges: lower rank number = stronger unit, so invert to “score”
def inv_rank(r):
    r = pd.to_numeric(r, errors="coerce")
    return (33 - r)  # 1->32 (best), 32->1 (worst)

cand["pass_off_score"]  = inv_rank(cand["rank_pass_off"])
cand["rush_off_score"]  = inv_rank(cand["rank_rush_off"])
cand["pass_def_opp_sc"] = inv_rank(cand["rank_pass_def"])
cand["rush_def_opp_sc"] = inv_rank(cand["rank_rush_def"])

# Edge (higher = better for team): offense score - opponent defense score
cand["edge_pass"] = cand["pass_off_score"] - cand["pass_def_opp_sc"]
cand["edge_rush"] = cand["rush_off_score"] - cand["rush_def_opp_sc"]

# Z-normalize helper
def z(s):
    s = pd.to_numeric(s, errors="coerce")
    m = s.mean(); sd = s.std(ddof=0)
    if sd == 0 or pd.isna(sd): return pd.Series(np.zeros(len(s)), index=s.index)
    return (s - m)/sd

# Build features to combine
cand["z_market"] = z(cand["win_prob"])
cand["z_epass"]  = z(cand["edge_pass"])
cand["z_erush"]  = z(cand["edge_rush"])

if "avg_power_rank" in cand.columns:
    cand["z_power"] = z(33 - pd.to_numeric(cand["avg_power_rank"], errors="coerce"))
else:
    cand["z_power"] = 0.0

if "wpct_to_date" in cand.columns:
    cand["z_wl"] = z(cand["wpct_to_date"])
else:
    cand["z_wl"] = 0.0

if "sos_to_date" in cand.columns:
    # tougher SOS -> credit (we standardize, so direction can be tuned; here harder SOS -> +)
    cand["z_sos"] = z(cand["sos_to_date"])
else:
    cand["z_sos"] = 0.0

# Single-week model score and a calibrated probability (sigmoid over z-mixture)
cand["model_score"] = (W_MARKET*cand["z_market"] + W_MATCHUP*(0.6*cand["z_epass"] + 0.4*cand["z_erush"])
                       + W_POWER*cand["z_power"] + W_WL*cand["z_wl"] + W_SOS*cand["z_sos"])

# map to [0,1] prob via logistic; center at 0 with moderate slope
cand["model_win_prob"] = 1.0/(1.0 + np.exp(-0.95*cand["model_score"]))

# If market exists, carry a pure-market probability for “value”
cand["market_win_prob"] = cand["win_prob"]
cand["value_score"]     = cand["model_win_prob"] - cand["market_win_prob"]

# Keep one row per team/week (home vs away duplicate shouldn’t exist, but be safe)
cand = cand.drop_duplicates(subset=["week","team"]).reset_index(drop=True)

# Filter to remaining schedule weeks
remaining = sorted(cand["week"].dropna().unique())
remaining = [w for w in remaining if w >= THIS_WEEK]

# ---------------------- LOOK-AHEAD BEAM SEARCH -----------------------------
# Goal: choose one pick per week, no team reuse, maximize product of model_win_prob.
# Beam search keeps the top BEAM_SIZE partial plans at each step.

def beam_plan(cand, start_week, horizon, beam_size, used_teams):
    weeks = [w for w in sorted(cand["week"].unique()) if w >= start_week][:horizon]
    # Pre-index candidates by week
    by_week = {w: cand[(cand["week"]==w) & (~cand["team"].isin(used_teams))].copy() for w in weeks}
    # start
    beams = [({"used": set(used_teams), "seq": [], "logp": 0.0})]
    for w in weeks:
        next_beams = []
        wk = by_week[w].sort_values("model_win_prob", ascending=False)
        for b in beams:
            # viable options: not yet used
            wk_ok = wk[~wk["team"].isin(b["used"])].head(50)  # cap per-branch breadth
            for _, r in wk_ok.iterrows():
                nb = {
                    "used": b["used"] | {r["team"]},
                    "seq":  b["seq"] + [(w, r["team"], r["opponent"], r["home_away"], float(r["model_win_prob"]), float(r.get("market_win_prob", np.nan)))],
                    "logp": b["logp"] + np.log(max(1e-6, float(r["model_win_prob"])))
                }
                next_beams.append(nb)
        if not next_beams:
            break
        # keep top-K
        next_beams.sort(key=lambda x: x["logp"], reverse=True)
        beams = next_beams[:beam_size]
    # best beam
    beams.sort(key=lambda x: x["logp"], reverse=True)
    return beams

beams = beam_plan(cand, THIS_WEEK, LOOKAHEAD, BEAM_SIZE, USED_TEAMS)

# Aggregate how often a team is chosen THIS_WEEK among the best sequences,
# but account for opportunity cost: subtract penalty proportional to that team's
# best future week prob if picked now.
if beams:
    best = beams[:min(20, len(beams))]  # inspect top N sequences
else:
    best = []

# Precompute each team's best future prob after THIS_WEEK
future_best = (cand[cand["week"]>THIS_WEEK]
               .groupby("team")["model_win_prob"].max().rename("best_future_prob").to_dict())

def opp_cost(team):
    return OPP_PENALTY * future_best.get(team, 0.0)

# Build recommendation table for THIS_WEEK only
wk = cand[(cand["week"]==THIS_WEEK) & (~cand["team"].isin(USED_TEAMS))].copy()

# how often in top sequences
from collections import Counter
appear = Counter()
for b in best:
    for (w,t,opp,ha,p_m,p_q) in b["seq"]:
        if w == THIS_WEEK:
            appear[t] += 1

wk["seq_hits"] = wk["team"].map(lambda t: appear.get(t, 0))

# final future-aware pick score = log(model_prob) - opportunity cost + small tie-breaks
wk["pick_score"] = np.log(wk["model_win_prob"].clip(1e-6, 1.0)) - wk["team"].map(opp_cost) \
                   + 0.02*wk["seq_hits"] + 0.01*wk["value_score"].fillna(0)

rec = wk.sort_values(["pick_score","model_win_prob","value_score"], ascending=False).reset_index(drop=True)

# ---------------------- OUTPUTS --------------------------------------------
def pct(x): 
    return f"{100*float(x):.1f}%" if pd.notna(x) else ""

cols_show = [c for c in [
    "team","opponent","home_away",
    "model_win_prob","market_win_prob","value_score",
    "edge_pass","edge_rush","rank_pass_off","rank_rush_off","rank_pass_def","rank_rush_def",
    "avg_power_rank","wpct_to_date","sos_to_date",
    "seq_hits","pick_score","ml","book"
] if c in rec.columns]

print(f"=== Survivor recommendation for Week {THIS_WEEK} (lookahead {LOOKAHEAD}, beam {BEAM_SIZE}) ===")
display(rec[cols_show].head(15))

# Helper to explain a team row
def explain(team_name, week=THIS_WEEK, table=rec):
    row = table[table["team"].str.lower().eq(team_name.lower()) & (table["model_win_prob"].notna())]
    if row.empty:
        print(f"No row for {team_name} in week {week}.")
        return
    r = row.iloc[0]
    print(f"Week {week}: {r['team']} vs {r['opponent']} ({r['home_away']})")
    print(f"  Model prob: {pct(r['model_win_prob'])} | Market: {pct(r.get('market_win_prob'))} | Value: {r.get('value_score'):+.3f}")
    print(f"  Pass edge: {r.get('edge_pass'):+.1f}  (off rank {r.get('rank_pass_off')}, opp pass-D rank {r.get('rank_pass_def')})")
    print(f"  Rush edge: {r.get('edge_rush'):+.1f}  (off rank {r.get('rank_rush_off')}, opp rush-D rank {r.get('rank_rush_def')})")
    if 'avg_power_rank' in r: print(f"  Avg power rank: {r['avg_power_rank']}")
    if 'wpct_to_date' in r:   print(f"  W/L to date: {r['wpct_to_date']:.3f}")
    if 'sos_to_date' in r:    print(f"  SOS to date: {r['sos_to_date']:.3f}")
    bf = future_best.get(r['team'], np.nan)
    print(f"  Best future prob (opp-cost reference): {pct(bf)} ; Seq hits: {int(r.get('seq_hits',0))}")
    print(f"  Pick score (future-aware): {r['pick_score']:+.3f}")

# Example: explain top pick
if not rec.empty:
    explain(rec.iloc[0]["team"])

=== Survivor recommendation for Week 4 (lookahead 4, beam 80) ===


Unnamed: 0,team,opponent,home_away,model_win_prob,market_win_prob,value_score,edge_pass,edge_rush,rank_pass_off,rank_rush_off,rank_pass_def,rank_rush_def,wpct_to_date,sos_to_date,seq_hits,pick_score,ml,book
0,Buffalo,New Orleans,H,0.827415,0.935484,-0.108069,22.0,13.0,6.0,1.0,28.0,14.0,1.0,3.148955,20,-0.059502,-1450.0,draftkings
1,Denver,Cincinnati,H,0.717293,0.82906,-0.111767,7.0,10.0,22.0,10.0,29.0,20.0,0.333333,0.329905,0,-0.460443,-485.0,draftkings
2,LA Chargers,NY Giants,A,0.716167,0.73545,-0.019283,17.0,4.0,3.0,26.0,20.0,30.0,1.0,3.089031,0,-0.542587,-278.0,draftkings
3,Houston,Tennessee,H,0.63793,0.814815,-0.176885,-14.0,6.0,30.0,25.0,16.0,31.0,0.0,-0.333782,0,-0.554133,-440.0,draftkings
4,Detroit,Cleveland,H,0.730425,0.84252,-0.112095,9.0,-2.0,4.0,3.0,13.0,1.0,0.666667,2.075655,0,-0.579738,-535.0,draftkings
5,New England,Carolina,H,0.608911,0.714286,-0.105375,-2.0,-3.0,7.0,24.0,5.0,21.0,0.333333,0.422482,0,-0.600958,-250.0,draftkings
6,San Francisco,Jacksonville,H,0.611345,0.657534,-0.046189,16.0,-24.0,2.0,29.0,18.0,5.0,1.0,0.556317,0,-0.651602,-192.0,draftkings
7,LA Rams,Indianapolis,H,0.607804,0.657534,-0.04973,6.0,-1.0,8.0,11.0,14.0,10.0,0.666667,-0.127227,0,-0.694884,-192.0,draftkings
8,Las Vegas,Chicago,H,0.556602,0.545455,0.011147,21.0,-4.0,10.0,30.0,31.0,26.0,0.333333,-0.409265,0,-0.701074,-120.0,draftkings
9,Minnesota,Pittsburgh,A,0.564189,0.586777,-0.022588,-4.0,13.0,27.0,14.0,23.0,27.0,0.666667,0.468163,0,-0.770237,-142.0,draftkings


Week 4: Buffalo vs New Orleans (H)
  Model prob: 82.7% | Market: 93.5% | Value: -0.108
  Pass edge: +22.0  (off rank 6.0, opp pass-D rank 28.0)
  Rush edge: +13.0  (off rank 1.0, opp rush-D rank 14.0)
  W/L to date: 1.000
  SOS to date: 3.149
  Best future prob (opp-cost reference): 76.8% ; Seq hits: 20
  Pick score (future-aware): -0.060


# Pred #3

In [44]:
# ================== CURRENT-WEEK PICK WITH FUTURE-AWARE LOOKAHEAD ==================
# Assumes you already have: cand with columns:
#   week, team, opponent, home_away, model_win_prob, value_score, edge_pass, edge_rush, ...
# And variables: YEAR, THIS_WEEK, LOOKAHEAD, BEAM_SIZE, USED_TEAMS (set())

import numpy as np
import pandas as pd

# Keep a full copy for lookahead, but lock presentation to THIS_WEEK
cand_all = cand.copy()

# Sanity: ensure needed cols
need = {"week","team","opponent","home_away","model_win_prob"}
missing = need - set(cand_all.columns)
if missing:
    raise KeyError(f"cand is missing columns: {missing}")

# Helper: beam over weeks AFTER a given week, starting with a used-team set
def best_future_logp(cand_full: pd.DataFrame, start_week: int, horizon: int, beam_size: int, used_teams: set) -> float:
    weeks = [w for w in sorted(cand_full["week"].unique()) if w > start_week][:horizon]
    if not weeks:
        return 0.0
    # index by week
    by_week = {w: cand_full[cand_full["week"]==w].copy() for w in weeks}
    # beams hold (used_set, logp)
    beams = [(set(used_teams), 0.0)]
    for w in weeks:
        nxt = []
        wk_cand = by_week[w].sort_values("model_win_prob", ascending=False)
        for used, lp in beams:
            # viable (not yet used)
            viable = wk_cand[~wk_cand["team"].isin(used)]
            # cap breadth per branch
            for _, r in viable.head(50).iterrows():
                p = float(r["model_win_prob"])
                if not np.isfinite(p) or p <= 0:
                    continue
                nxt.append((used | {r["team"]}, lp + np.log(max(1e-6, p))))
        if not nxt:
            break
        # keep best K partial plans
        nxt.sort(key=lambda x: x[1], reverse=True)
        beams = nxt[:beam_size]
    if not beams:
        return 0.0
    # best continuation logp
    return max(lp for _, lp in beams)

# Build the current-week table
wk = (cand_all[(cand_all["week"]==THIS_WEEK) & (~cand_all["team"].isin(USED_TEAMS))].copy()
        .reset_index(drop=True))

if wk.empty:
    raise RuntimeError(f"No candidate games found for week {THIS_WEEK} after filtering.")

# Compute continuation value for each possible pick this week
cont_vals = []
for _, r in wk.iterrows():
    team = r["team"]
    used_now = set(USED_TEAMS) | {team}
    # best future plan if we pick this team now
    cont_logp = best_future_logp(
        cand_full=cand_all,
        start_week=THIS_WEEK,
        horizon=LOOKAHEAD-1,   # remaining weeks in the window
        beam_size=BEAM_SIZE,
        used_teams=used_now
    )
    cur_logp = np.log(max(1e-6, float(r["model_win_prob"])))
    total_logp = cur_logp + cont_logp
    cont_vals.append(total_logp)

wk["plan_total_logp"] = cont_vals

# Optional tie-breaks: incorporate slight boosts for value/edges (keeps focus on win prob)
wk["pick_score"] = wk["plan_total_logp"] \
                   + 0.01 * wk.get("value_score", 0).fillna(0) \
                   + 0.005 * wk.get("edge_pass", 0).fillna(0) \
                   + 0.005 * wk.get("edge_rush", 0).fillna(0)

# Present the CURRENT WEEK ONLY recommendations (best first)
cols_show = [c for c in [
    "team","opponent","home_away",
    "model_win_prob","market_win_prob","value_score",
    "edge_pass","edge_rush","rank_pass_off","rank_rush_off","rank_pass_def","rank_rush_def",
    "avg_power_rank","wpct_to_date","sos_to_date","ml","book",
    "pick_score"
] if c in wk.columns]

wk_rec = wk.sort_values(["pick_score","model_win_prob","value_score"], ascending=False).reset_index(drop=True)
print(f"=== Survivor picks for Week {THIS_WEEK} (lookahead {LOOKAHEAD}, beam {BEAM_SIZE}) ===")
display(wk_rec[cols_show].head(15))

# (Optional) Explain top N with their future continuation quality
def explain_pick(row):
    def pct(x): return f"{100*float(x):.1f}%"
    print(f"- {row['team']} vs {row['opponent']} ({row['home_away']}): "
          f"model {pct(row['model_win_prob'])}, pick_score {row['pick_score']:+.3f}")

for i in range(min(5, len(wk_rec))):
    explain_pick(wk_rec.iloc[i])

=== Survivor picks for Week 4 (lookahead 14, beam 100) ===


Unnamed: 0,team,opponent,home_away,model_win_prob,market_win_prob,value_score,edge_pass,edge_rush,rank_pass_off,rank_rush_off,rank_pass_def,rank_rush_def,wpct_to_date,sos_to_date,ml,book,pick_score
0,Buffalo,New Orleans,H,0.803267,0.935484,-0.132217,22.0,13.0,6.0,1.0,28.0,14.0,1.0,0.111111,-1450.0,draftkings,-0.341151
1,LA Chargers,NY Giants,A,0.772315,0.73545,0.036865,16.0,4.0,4.0,26.0,20.0,30.0,1.0,0.333333,-278.0,draftkings,-0.398824
2,Green Bay,Dallas,A,0.692254,0.782609,-0.090355,19.0,5.0,13.0,18.0,32.0,23.0,0.666667,0.555556,-360.0,draftkings,-0.489536
3,Detroit,Cleveland,H,0.693679,0.84252,-0.14884,11.0,-2.0,2.0,3.0,13.0,1.0,0.666667,0.444444,-535.0,draftkings,-0.563063
4,Tampa Bay,Philadelphia,H,0.690787,0.384615,0.306171,-2.0,8.0,10.0,17.0,8.0,25.0,1.0,0.111111,160.0,draftkings,-0.577692
5,Philadelphia,Tampa Bay,A,0.724038,0.657534,0.066504,-14.0,2.0,29.0,5.0,15.0,7.0,1.0,0.444444,-192.0,draftkings,-0.623075
6,LA Rams,Indianapolis,H,0.664056,0.657534,0.006521,5.0,-1.0,9.0,11.0,14.0,10.0,0.666667,0.333333,-192.0,draftkings,-0.630154
7,Indianapolis,LA Rams,A,0.66651,0.384615,0.281895,-5.0,6.0,16.0,2.0,11.0,8.0,1.0,0.111111,160.0,draftkings,-0.638711
8,San Francisco,Jacksonville,H,0.684104,0.657534,0.02657,15.0,-26.0,3.0,31.0,18.0,5.0,1.0,0.416667,-192.0,draftkings,-0.67521
9,Minnesota,Pittsburgh,A,0.594908,0.586777,0.008131,-4.0,14.0,27.0,13.0,23.0,27.0,0.666667,0.444444,-142.0,draftkings,-0.710097


- Buffalo vs New Orleans (H): model 80.3%, pick_score -0.341
- LA Chargers vs NY Giants (A): model 77.2%, pick_score -0.399
- Green Bay vs Dallas (A): model 69.2%, pick_score -0.490
- Detroit vs Cleveland (H): model 69.4%, pick_score -0.563
- Tampa Bay vs Philadelphia (H): model 69.1%, pick_score -0.578


# Pt3

In [45]:
# === Survivor: current-week ranking with matchup edges + future-aware planning ===
import numpy as np
import pandas as pd
from pathlib import Path

# ----------------------- CONFIG -----------------------
YEAR       = 2025
THIS_WEEK  = 4                # set to the current NFL week
LOOKAHEAD  = 4                # how many future weeks to consider in planning
BEAM_SIZE  = 80               # search width (40-150 sensible)
USED_TEAMS = set()            # e.g., {"Kansas City", "Philadelphia"}
DATA       = Path("../data")

# Weights for the single-week model (feel free to tweak)
W_MARKET     = 0.45   # market/odds baseline
W_MATCHUP    = 0.30   # pass vs pass-D & rush vs rush-D (offense-centric)
W_DEF_MATCH  = 0.10   # defense vs opponent offense (optional, if you want D to matter too)
W_POWER      = 0.10   # consensus power (avg media/consensus rank)
W_WL         = 0.03   # season W/L to date (team_consensus_strength or weekly)
W_SOS        = 0.02   # SOS to date (harder schedule -> slight credit)

# Within MATCHUP: relative weight of pass vs rush edges
MATCHUP_PASS_WEIGHT = 0.6
MATCHUP_RUSH_WEIGHT = 0.4

# Turn one-week greed down by reserving elite future spots (higher = more conservative)
OPP_COST_PENALTY = 0.35

# ----------------------- HELPERS -----------------------
def safe_read_csv(p):
    try:
        return pd.read_csv(p)
    except Exception:
        return pd.DataFrame()

def pick_col(df, includes, must_contain=None, default=None):
    cols = [c for c in df.columns if all(x in c.lower() for x in includes)]
    if must_contain:
        cols = [c for c in cols if must_contain in c.lower()]
    if cols:
        cols.sort(key=len)  # prefer shorter, cleaner names
        return cols[0]
    return default

def inv_rank(r):
    # convert rank (1 best..32 worst) to a score (32..1 best->largest)
    r = pd.to_numeric(r, errors="coerce")
    return 33 - r

def z(s):
    s = pd.to_numeric(s, errors="coerce")
    m = s.mean(); sd = s.std(ddof=0)
    if not np.isfinite(sd) or sd == 0:
        return pd.Series(np.zeros(len(s)), index=s.index)
    return (s - m) / sd

def ml_to_prob(ml):
    try:
        ml = float(ml)
    except (TypeError, ValueError):
        return np.nan
    if ml < 0:  # favorite
        return (-ml)/((-ml)+100.0)
    return 100.0/(ml+100.0)

# ----------------------- LOAD --------------------------
# schedule/results
results = safe_read_csv(DATA / f"results_teamweek_{YEAR}.csv")

# odds (preferred: odds_long_{YEAR}.csv that you wrote)
odds = safe_read_csv(DATA / f"odds_long_{YEAR}.csv")
if odds.empty:
    # fallback: the “all books” file; take DraftKings or best
    ob = safe_read_csv(DATA / f"odds_long_allbooks_{YEAR}.csv")
    if not ob.empty:
        # choose preferred line per team-week (prefer DraftKings else highest win prob)
        ob["win_prob_book"] = ob["ml"].map(ml_to_prob)
        def best_book(g):
            dk = g[g["book"].str.lower()=="draftkings"]
            if not dk.empty:
                return dk.sort_values("win_prob_book", ascending=False).iloc[0]
            return g.sort_values("win_prob_book", ascending=False).iloc[0]
        odds = (ob.groupby(["week","team","opponent","home_away"], group_keys=False)
                  .apply(best_book)
                  .reset_index(drop=True))

# offense and defense power files
off = safe_read_csv(DATA / f"offense_power_{YEAR}.csv")
def_ = safe_read_csv(DATA / f"defense_power_{YEAR}.csv")

# consensus team strength (your weighted 32-row table) – used for power, wins, sos
cons_strength = safe_read_csv(DATA / f"team_consensus_strength_{YEAR}.csv")

# consensus rank file (optional; if you prefer directly)
consensus_rank = safe_read_csv(DATA / f"consensus_rank_{YEAR}.csv")

# ----------------------- NORMALIZE COLUMN NAMES -----------------------------
# team col
for df in (off, def_, cons_strength, consensus_rank):
    if "team" not in df.columns and "team_key" in df.columns:
        df.rename(columns={"team_key":"team"}, inplace=True)

# detect offense rank columns
col_off_pass = pick_col(off, ["pass"], must_contain="rank", default="rank_pass")
col_off_rush = pick_col(off, ["rush"], must_contain="rank", default="rank_rush")

# detect defense rank columns
col_def_pass = pick_col(def_, ["pass","def"], must_contain="rank", default="rank_pass_def")
col_def_rush = pick_col(def_, ["rush","def"], must_contain="rank", default="rank_rush_def")

# detect power rank
if not consensus_rank.empty:
    col_cons_rank = pick_col(consensus_rank, ["consensus","rank"], default=pick_col(consensus_rank, ["avg","power","rank"], default=None))
else:
    col_cons_rank = None

# ----------------------- BUILD CANDIDATES (ALL WEEKS) ----------------------
# base from results + odds
key_cols = ["week","team","opponent","home_away"]
keep_cols = key_cols + [c for c in ["completed","win","loss","team_score","opp_score"] if c in results.columns]
base = results[keep_cols].drop_duplicates(subset=key_cols).copy()

if "win_prob" not in odds.columns and "win_prob_book" in odds.columns:
    odds["win_prob"] = odds["win_prob_book"]
if "win_prob" not in odds.columns and "ml" in odds.columns:
    odds["win_prob"] = odds["ml"].map(ml_to_prob)

odds_keep = [c for c in ["week","team","opponent","home_away","ml","win_prob","book","commence_time"] if c in odds.columns]
base = base.merge(odds[odds_keep], on=key_cols, how="left", suffixes=("","_odds"))

# attach offense ranks (team)
base = base.merge(off[["team", col_off_pass, col_off_rush]].rename(columns={
    col_off_pass:"rank_pass_off", col_off_rush:"rank_rush_off"
}), on="team", how="left")

# attach opponent defense ranks
base = base.merge(def_[["team", col_def_pass, col_def_rush]].rename(columns={
    "team":"opponent",
    col_def_pass:"rank_pass_def_opp",
    col_def_rush:"rank_rush_def_opp"
}), on="opponent", how="left")

# (Optional) also consider team defense vs opp offense (defense-centric edge)
base = base.merge(def_[["team", col_def_pass, col_def_rush]].rename(columns={
    col_def_pass:"rank_pass_def",
    col_def_rush:"rank_rush_def"
}), on="team", how="left")

base = base.merge(off[["team", col_off_pass, col_off_rush]].rename(columns={
    "team":"opponent",
    col_off_pass:"rank_pass_off_opp",
    col_off_rush:"rank_rush_off_opp"
}), on="opponent", how="left")

# consensus strength (wins, sos, power)
if not cons_strength.empty:
    # prefer your already-assembled consensus table
    cs = cons_strength.copy()
    # detect columns
    col_wpct = pick_col(cs, ["wpct"], default=None)
    col_sos  = pick_col(cs, ["sos"], default=None)
    col_power_rank = pick_col(cs, ["overall","rank"], default=None)
    col_power_score= pick_col(cs, ["total","score"], default=None)
    keep_cs = ["team"]
    if col_wpct:       keep_cs.append(col_wpct)
    if col_sos:        keep_cs.append(col_sos)
    if col_power_rank: keep_cs.append(col_power_rank)
    if col_power_score:keep_cs.append(col_power_score)

    cs = cs[keep_cs].copy()
    if col_wpct:        cs.rename(columns={col_wpct:"wpct_to_date"}, inplace=True)
    if col_sos:         cs.rename(columns={col_sos:"sos_to_date"}, inplace=True)
    if col_power_rank:  cs.rename(columns={col_power_rank:"avg_power_rank"}, inplace=True)
    if col_power_score: cs.rename(columns={col_power_score:"power_total_score"}, inplace=True)

    base = base.merge(cs, on="team", how="left")
else:
    # fallback: use consensus_rank if provided
    if col_cons_rank and not consensus_rank.empty:
        cr = consensus_rank[["team", col_cons_rank]].rename(columns={col_cons_rank:"avg_power_rank"})
        base = base.merge(cr, on="team", how="left")

# ----------------------- FEATURES -----------------------
# Market
base["z_market"] = z(base["win_prob"])

# Matchup edges (offense vs opp-defense)
base["pass_off_sc"]  = inv_rank(base["rank_pass_off"])
base["rush_off_sc"]  = inv_rank(base["rank_rush_off"])
base["pass_def_opp"] = inv_rank(base["rank_pass_def_opp"])
base["rush_def_opp"] = inv_rank(base["rank_rush_def_opp"])

base["edge_pass"] = base["pass_off_sc"] - base["pass_def_opp"]
base["edge_rush"] = base["rush_off_sc"] - base["rush_def_opp"]
base["z_epass"]   = z(base["edge_pass"])
base["z_erush"]   = z(base["edge_rush"])

# Optional defense-centric edge (your D vs their O)
base["pass_def_sc"]  = inv_rank(base["rank_pass_def"])
base["rush_def_sc"]  = inv_rank(base["rank_rush_def"])
base["pass_off_opp"] = inv_rank(base["rank_pass_off_opp"])
base["rush_off_opp"] = inv_rank(base["rank_rush_off_opp"])
base["edge_def"]     = ((base["pass_def_sc"] + base["rush_def_sc"])/2.0
                        - (base["pass_off_opp"] + base["rush_off_opp"])/2.0)
base["z_edef"]       = z(base["edge_def"])

# Consensus power (convert rank to score, then z) – prefer power_total_score if present
if "power_total_score" in base.columns:
    base["z_power"] = z(base["power_total_score"])
elif "avg_power_rank" in base.columns:
    base["z_power"] = z(inv_rank(base["avg_power_rank"]))
else:
    base["z_power"] = 0.0

# W/L and SOS (if present)
base["z_wl"]  = z(base.get("wpct_to_date", pd.Series(index=base.index)))
base["z_sos"] = z(base.get("sos_to_date", pd.Series(index=base.index)))

# Single-week model score
base["model_score"] = (
    W_MARKET  * base["z_market"]
  + W_MATCHUP * (MATCHUP_PASS_WEIGHT*base["z_epass"] + MATCHUP_RUSH_WEIGHT*base["z_erush"])
  + W_DEF_MATCH * base["z_edef"]
  + W_POWER   * base["z_power"]
  + W_WL      * base["z_wl"]
  + W_SOS     * base["z_sos"]
)

# Map to probability via logistic
base["model_win_prob"] = 1.0/(1.0 + np.exp(-0.95*base["model_score"]))
base["market_win_prob"] = base["win_prob"]
base["value_score"]     = base["model_win_prob"] - base["market_win_prob"]

# Remove accidental duplicates
base = base.drop_duplicates(subset=key_cols).reset_index(drop=True)

# ----------------------- FUTURE-AWARE PLANNING -----------------------------
# Beam search: choose one team per week (no reuse), maximize product of model_win_prob
def beam_plan(cand, start_week, horizon, beam_size, used_teams):
    weeks = [w for w in sorted(cand["week"].dropna().unique()) if w >= start_week][:horizon]
    by_week = {w: cand[(cand["week"]==w) & (~cand["team"].isin(used_teams))].copy() for w in weeks}
    beams = [({"used": set(used_teams), "seq": [], "logp": 0.0})]
    for w in weeks:
        next_beams = []
        wk = by_week[w].sort_values("model_win_prob", ascending=False)
        for b in beams:
            wk_ok = wk[~wk["team"].isin(b["used"])].head(50)  # prune per week
            for _, r in wk_ok.iterrows():
                nb = {
                    "used": b["used"] | {r["team"]},
                    "seq":  b["seq"] + [(w, r["team"], r["opponent"], r["home_away"],
                                         float(r["model_win_prob"]), float(r.get("market_win_prob", np.nan)))],
                    "logp": b["logp"] + np.log(max(1e-6, float(r["model_win_prob"])))
                }
                next_beams.append(nb)
        if not next_beams:
            break
        next_beams.sort(key=lambda x: x["logp"], reverse=True)
        beams = next_beams[:beam_size]
    beams.sort(key=lambda x: x["logp"], reverse=True)
    return beams

beams = beam_plan(base, THIS_WEEK, LOOKAHEAD, BEAM_SIZE, USED_TEAMS)

# Opportunity cost: if we use team now, we forfeit its best future week
future_best = (base[base["week"]>THIS_WEEK]
               .groupby("team")["model_win_prob"].max()
               .rename("best_future_prob").to_dict())

def opp_cost(team):
    return OPP_COST_PENALTY * future_best.get(team, 0.0)

# Current-week table
wk = base[(base["week"]==THIS_WEEK) & (~base["team"].isin(USED_TEAMS))].copy()

# How often team appears in top sequences this week (tie-break)
from collections import Counter
appear = Counter()
for b in beams[:min(20, len(beams))]:
    for (w,t,opp,ha,p_m,p_q) in b["seq"]:
        if w == THIS_WEEK:
            appear[t] += 1
wk["seq_hits"] = wk["team"].map(lambda t: appear.get(t, 0))

# Final pick score: log(model prob) - opp cost + small boosts for sequence frequency & value
wk["pick_score"] = (np.log(wk["model_win_prob"].clip(1e-6,1.0))
                    - wk["team"].map(opp_cost)
                    + 0.02*wk["seq_hits"]
                    + 0.01*wk["value_score"].fillna(0))

# ----------------------- OUTPUTS -----------------------
cols_show = [c for c in [
    "team","opponent","home_away",
    "model_win_prob","market_win_prob","value_score",
    "edge_pass","edge_rush","edge_def",
    "rank_pass_off","rank_rush_off","rank_pass_def_opp","rank_rush_def_opp",
    "avg_power_rank","wpct_to_date","sos_to_date",
    "seq_hits","pick_score","ml","book"
] if c in wk.columns]

print(f"=== Survivor (Week {THIS_WEEK}) with matchup edges & future-aware planning ===")
display(wk.sort_values(["pick_score","model_win_prob","value_score"], ascending=False)[cols_show].head(15))

=== Survivor (Week 4) with matchup edges & future-aware planning ===


Unnamed: 0,team,opponent,home_away,model_win_prob,market_win_prob,value_score,edge_pass,edge_rush,edge_def,rank_pass_off,rank_rush_off,rank_pass_def_opp,rank_rush_def_opp,avg_power_rank,wpct_to_date,sos_to_date,seq_hits,pick_score,ml,book
99,Buffalo,New Orleans,H,0.815819,0.935484,-0.119664,22.0,13.0,5.0,6.0,1.0,28.0,14.0,1.0,1.0,0.111111,20,-0.06786,-1450.0,draftkings
105,Denver,Cincinnati,H,0.730536,0.82906,-0.098524,9.0,10.0,13.0,20.0,10.0,29.0,20.0,16.0,0.333333,0.666667,0,-0.458684,-485.0,draftkings
112,LA Chargers,NY Giants,A,0.727004,0.73545,-0.008446,16.0,4.0,12.0,4.0,26.0,20.0,30.0,2.0,1.0,0.333333,0,-0.53366,-278.0,draftkings
108,Houston,Tennessee,H,0.619854,0.814815,-0.194961,-14.0,6.0,13.0,30.0,25.0,16.0,31.0,27.0,0.0,0.777778,0,-0.584394,-440.0,draftkings
106,Detroit,Cleveland,H,0.718193,0.84252,-0.124327,11.0,-2.0,2.0,2.0,3.0,13.0,1.0,7.0,0.666667,0.444444,0,-0.601133,-535.0,draftkings
119,New England,Carolina,H,0.583781,0.714286,-0.130505,-3.0,-3.0,7.0,8.0,24.0,5.0,21.0,20.0,0.333333,0.333333,0,-0.644595,-250.0,draftkings
123,San Francisco,Jacksonville,H,0.60693,0.657534,-0.050604,15.0,-26.0,4.5,3.0,31.0,18.0,5.0,9.0,1.0,0.416667,0,-0.662602,-192.0,draftkings
113,LA Rams,Indianapolis,H,0.60736,0.657534,-0.050174,5.0,-1.0,-0.5,9.0,11.0,14.0,10.0,6.0,0.666667,0.333333,0,-0.703028,-192.0,draftkings
116,Minnesota,Pittsburgh,A,0.5885,0.586777,0.001723,-4.0,14.0,10.5,27.0,13.0,23.0,27.0,12.0,0.666667,0.444444,0,-0.728522,-142.0,draftkings
114,Las Vegas,Chicago,H,0.530552,0.545455,-0.014902,19.0,-3.0,-8.0,12.0,29.0,31.0,26.0,26.0,0.333333,0.666667,0,-0.747255,-120.0,draftkings


# Taking everything (I think) into account

In [58]:
# === Survivor Picks with full feature set + lookahead ======================
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter

# ---------------------------- CONFIG ---------------------------------------
YEAR          = 2025
WEEK_OVERRIDE = 4          # <-- set this each week
LOOKAHEAD     = max(1, 18 - WEEK_OVERRIDE)   # through Week 18 by default
BEAM_SIZE     = 80
USED_TEAMS    = set()      # e.g., {"Kansas City", "Philadelphia"}

# Single-week model weights (tune freely; they’re z-normalized inside)
W_MARKET  = 0.40   # market-implied win prob (from ML)
W_MATCHUP = 0.25   # pass/rush matchup edges (60/40 split)
W_POWER   = 0.15   # consensus media power (inverted rank)
W_WL      = 0.05   # W/L to date
W_SOS     = 0.15   # SOS to date (opponents’ W/L to date)

# Future-aware: penalty for using a team now if they have easy weeks or big win spots later
OPP_PENALTY_BY_PROB = 0.35   # scale vs future best model prob
OPP_PENALTY_BY_EASE = 0.15   # scale vs projected SOS "ease" (lower SOS → easier → bigger penalty)

DATA = Path("../data")

# ---------------------------- IO helpers -----------------------------------
def safe_read_csv(p):
    try:
        return pd.read_csv(p)
    except Exception:
        return pd.DataFrame()

def to_prob_from_ml(ml):
    try:
        ml = float(ml)
    except Exception:
        return np.nan
    return (-ml)/((-ml)+100.0) if ml < 0 else 100.0/(ml+100.0)

def z(s):
    s = pd.to_numeric(s, errors="coerce")
    m, sd = s.mean(), s.std(ddof=0)
    if sd == 0 or pd.isna(sd):
        return pd.Series(np.zeros(len(s)), index=s.index)
    return (s - m) / sd

def inv_rank(r):             # 1 best .. 32 worst  → 32 .. 1 score
    r = pd.to_numeric(r, errors="coerce")
    return 33 - r

# ---------------------------- Load data ------------------------------------
results = safe_read_csv(DATA / f"results_teamweek_{YEAR}.csv")        # schedule + final scores
odds    = safe_read_csv(DATA / f"odds_long_{YEAR}.csv")               # preferred book per team-week
off     = safe_read_csv(DATA / f"offense_power_{YEAR}.csv")           # offense ranks
def_    = safe_read_csv(DATA / f"defense_power_{YEAR}.csv")           # defense ranks
cons    = safe_read_csv(DATA / f"consensus_rank_{YEAR}.csv")          # average/consensus power ranks
projSOS = safe_read_csv(DATA / f"projected_sos_{YEAR}.csv")           # from the “projected SOS” step

# normalize team column to 'team'
for df in (off, def_, cons):
    if "team" not in df.columns and "team_key" in df.columns:
        df.rename(columns={"team_key":"team"}, inplace=True)

# Flexible column pickers
def pick_col(df, includes, must=None, default=None):
    cols = [c for c in df.columns if all(k in c.lower() for k in includes)]
    if must: cols = [c for c in cols if must in c.lower()]
    if cols:
        cols.sort(key=len)
        return cols[0]
    return default

col_off_pass  = pick_col(off,  ["pass"], must="rank", default="rank_pass")
col_off_rush  = pick_col(off,  ["rush"], must="rank", default="rank_rush")
col_def_ppass = pick_col(def_, ["pass","def"], must="rank", default="rank_pass_def")
col_def_prush = pick_col(def_, ["rush","def"], must="rank", default="rank_rush_def")
col_cons_rank = pick_col(cons, ["consensus","rank"], default=pick_col(cons,["avg","power","rank"], default=None))
if col_cons_rank is None:
    col_cons_rank = "rank" if "rank" in cons.columns else cons.columns[-1]

# ------------------ Build W/L-to-date and SOS-to-date from results ---------
res = results.copy()
if "completed" in res.columns:
    res = res[res["completed"] == True]

need = {"week","team","opponent","win"}
missing = [c for c in need if c not in res.columns]
if missing:
    # build win boolean from scores if needed
    if {"team_score","opp_score"}.issubset(res.columns):
        res["win"] = (res["team_score"] > res["opp_score"]).astype(int)
    else:
        raise KeyError(f"results_teamweek_{YEAR}.csv missing columns {missing} and scores to derive them.")

# W/L to date through WEEK_OVERRIDE-1
to_date = res[res["week"] < WEEK_OVERRIDE].copy()
wl = (to_date.groupby("team", as_index=False)
              .agg(wins=("win","sum"), games=("win","size")))
wl["losses"] = wl["games"] - wl["wins"]
wl["wpct_to_date"] = np.where(wl["games"]>0, wl["wins"]/wl["games"], np.nan)

# SOS to date = mean opponent wpct_to_date (computed consistently)
opp_wpct = wl.set_index("team")["wpct_to_date"].to_dict()
to_date["opp_wpct_to_date"] = to_date["opponent"].map(opp_wpct)
sos = (to_date.groupby("team", as_index=False)
              .agg(sos_to_date=("opp_wpct_to_date","mean")))

wl_sos = wl.merge(sos, on="team", how="outer")

# ------------------ Build current and future-week candidate rows -----------
key_cols = ["week","team","opponent","home_away"]
cands = results[key_cols].drop_duplicates().copy()
# Current and future weeks only
cands = cands[cands["week"] >= WEEK_OVERRIDE].reset_index(drop=True)

# attach odds → pick win_prob and moneyline
if not {"ml","win_prob"}.issubset(odds.columns):
    odds["win_prob"] = odds["ml"].map(to_prob_from_ml)
odds_keep = [c for c in ["week","team","opponent","home_away","ml","win_prob","book","commence_time"] if c in odds.columns]
cands = cands.merge(odds[odds_keep], on=key_cols, how="left")

# attach offense ranks for team & defense ranks for opponent
cands = cands.merge(off[["team",col_off_pass,col_off_rush]].rename(
    columns={col_off_pass:"rank_pass_off", col_off_rush:"rank_rush_off"}), on="team", how="left")

cands = cands.merge(def_[["team",col_def_ppass,col_def_prush]].rename(
    columns={col_def_ppass:"rank_pass_def", col_def_prush:"rank_rush_def"}), left_on="opponent", right_on="team",
    how="left", suffixes=("","_opp")).drop(columns=["team_opp"])

# consensus power for team / opponent
power = cons[["team", col_cons_rank]].rename(columns={col_cons_rank:"consensus_rank"})
cands = cands.merge(power.rename(columns={"team":"team", "consensus_rank":"team_consensus_rank"}), on="team", how="left")
cands = cands.merge(power.rename(columns={"team":"opponent", "consensus_rank":"opp_consensus_rank"}), on="opponent", how="left")

# W/L & SOS to date (team rows only)
cands = cands.merge(wl_sos, on="team", how="left")

# Projected SOS (power + matchup) from current week forward
if not projSOS.empty:
    cands = cands.merge(
        projSOS.rename(columns={
            "future_sos_power":   "future_sos_power_from_week",
            "future_sos_matchup": "future_sos_matchup_from_week",
            "future_sos_proj":    "future_sos_proj_from_week"
        }),
        on=["team","week"], how="left"
    )

# ------------------ Build single-week model features -----------------------
# matchup edges (higher is better):  inverse ranks → scores, compare to opp defense
cands["pass_off_sc"] = inv_rank(cands["rank_pass_off"])
cands["rush_off_sc"] = inv_rank(cands["rank_rush_off"])
cands["pass_def_opp_sc"] = inv_rank(cands["rank_pass_def"])
cands["rush_def_opp_sc"] = inv_rank(cands["rank_rush_def"])

cands["edge_pass"] = cands["pass_off_sc"] - cands["pass_def_opp_sc"]
cands["edge_rush"] = cands["rush_off_sc"] - cands["rush_def_opp_sc"]

# z-normed features
cands["z_market"] = z(cands["win_prob"])
cands["z_epass"]  = z(cands["edge_pass"])
cands["z_erush"]  = z(cands["edge_rush"])

# power: invert rank (1 best → 32 → high score)
cands["z_power_team"] = z(inv_rank(cands["team_consensus_rank"]))
cands["z_power_opp"]  = z(inv_rank(cands["opp_consensus_rank"]))  # (optional, not used in score)

# to-date W/L and SOS
cands["z_wl"]  = z(cands["wpct_to_date"])
cands["z_sos"] = z(cands["sos_to_date"])

# single-week score → calibrated probability
cands["model_score"] = (
    W_MARKET * cands["z_market"] +
    W_MATCHUP * (0.60*cands["z_epass"] + 0.40*cands["z_erush"]) +
    W_POWER   * cands["z_power_team"] +
    W_WL      * cands["z_wl"] +
    W_SOS     * cands["z_sos"]
)
cands["model_win_prob"]  = 1/(1 + np.exp(-0.95*cands["model_score"]))
cands["market_win_prob"] = cands["win_prob"]
cands["value_score"]     = cands["model_win_prob"] - cands["market_win_prob"]

# one row per (week, team)
cands = cands.drop_duplicates(subset=["week","team"]).reset_index(drop=True)

# ------------------ Lookahead beam search (do not burn future value) -------
weeks = sorted(cands["week"].unique())
weeks = [w for w in weeks if WEEK_OVERRIDE <= w <= WEEK_OVERRIDE + LOOKAHEAD - 1]

# Precompute best future prob & future ease for opportunity-cost
future_table = cands[cands["week"] > WEEK_OVERRIDE].copy()
best_future_prob = future_table.groupby("team")["model_win_prob"].max().rename("best_future_prob")
future_ease = (-future_table["future_sos_proj_from_week"]).groupby(future_table["team"]).max().rename("best_future_ease")

best_future = pd.concat([best_future_prob, future_ease], axis=1)

def opp_cost(team):
    r = best_future.loc[team] if team in best_future.index else None
    p = float(r["best_future_prob"]) if r is not None and pd.notna(r["best_future_prob"]) else 0.0
    e = float(r["best_future_ease"]) if r is not None and pd.notna(r["best_future_ease"]) else 0.0
    return OPP_PENALTY_BY_PROB * p + OPP_PENALTY_BY_EASE * e

# Beam search
by_week = {w: cands[cands["week"]==w].sort_values("model_win_prob", ascending=False) for w in weeks}
beams = [({"used": set(USED_TEAMS), "seq": [], "logp": 0.0})]

for w in weeks:
    nxt = []
    frame = by_week[w]
    for b in beams:
        picks = frame[~frame["team"].isin(b["used"])].head(50)
        for _, r in picks.iterrows():
            lp = np.log(max(1e-6, float(r["model_win_prob"])))
            # subtract opportunity cost for using this team now
            lp -= opp_cost(r["team"])
            nxt.append({
                "used": b["used"] | {r["team"]},
                "seq":  b["seq"] + [(w, r["team"], r["opponent"], r["home_away"], float(r["model_win_prob"]))],
                "logp": b["logp"] + lp
            })
    if not nxt: break
    nxt.sort(key=lambda x: x["logp"], reverse=True)
    beams = nxt[:BEAM_SIZE]

beams.sort(key=lambda x: x["logp"], reverse=True)
top_beams = beams[:min(20, len(beams))]

# ------------------ THIS WEEK recommendation table -------------------------
wk = cands[(cands["week"]==WEEK_OVERRIDE) & (~cands["team"].isin(USED_TEAMS))].copy()

# count how often a team appears at WEEK_OVERRIDE among best sequences
hits = Counter()
for b in top_beams:
    for (w,t,opp,ha,p) in b["seq"]:
        if w == WEEK_OVERRIDE:
            hits[t] += 1
wk["seq_hits"] = wk["team"].map(lambda t: hits.get(t, 0))

# final pick score (log model prob – opp-cost + small ties on seq_hits/value)
wk["pick_score"] = (
    np.log(wk["model_win_prob"].clip(1e-6, 1.0))
    - wk["team"].map(opp_cost)
    + 0.02*wk["seq_hits"]
    + 0.01*wk["value_score"].fillna(0)
)

# nice columns
show_cols = [c for c in [
    "team","opponent","home_away",
    "model_win_prob","market_win_prob","value_score",
    "edge_pass","edge_rush",
    "rank_pass_off","rank_rush_off","rank_pass_def","rank_rush_def",
    "team_consensus_rank","wpct_to_date","sos_to_date",
    "future_sos_proj_from_week","seq_hits","pick_score","ml","book"
] if c in wk.columns]

wk = wk.sort_values(["pick_score","model_win_prob","value_score"], ascending=False).reset_index(drop=True)

print(f"=== Survivor recommendation for Week {WEEK_OVERRIDE} (lookahead {LOOKAHEAD}, beam {BEAM_SIZE}) ===")
display(wk[show_cols].head(15))

# quick explainer helper
def explain(team_name, week=WEEK_OVERRIDE, table=wk):
    row = table[(table["team"].str.lower()==team_name.lower())]
    if row.empty:
        print(f"No row for {team_name} in week {week}.")
        return
    r = row.iloc[0]
    pct = lambda x: f"{100*float(x):.1f}%" if pd.notna(x) else "—"
    print(f"Week {week}: {r['team']} vs {r['opponent']} ({r['home_away']})")
    print(f"  Model prob: {pct(r['model_win_prob'])} | Market: {pct(r.get('market_win_prob'))} | Value: {r.get('value_score'):+.3f}")
    print(f"  Pass edge: {r.get('edge_pass'):+.1f}  (off rank {r.get('rank_pass_off')}, opp pass-D rank {r.get('rank_pass_def')})")
    print(f"  Rush edge: {r.get('edge_rush'):+.1f}  (off rank {r.get('rank_rush_off')}, opp rush-D rank {r.get('rank_rush_def')})")
    print(f"  Consensus power rank: {r.get('team_consensus_rank')},  W/L to date: {r.get('wpct_to_date'):.3f}  |  SOS to date: {r.get('sos_to_date'):.3f}")
    if 'future_sos_proj_from_week' in r:
        print(f"  Projected SOS (from week {week}): {r['future_sos_proj_from_week']:.3f} (lower= easier)")
    bf = best_future.loc[r['team']]["best_future_prob"] if r['team'] in best_future.index else np.nan
    print(f"  Best future win-spot prob: {pct(bf)} ; Seq hits (top plans): {int(r.get('seq_hits',0))}")
    print(f"  Final pick score: {r['pick_score']:+.3f}")

# Example: explain the #1 pick
if not wk.empty:
    explain(wk.iloc[0]["team"])

=== Survivor recommendation for Week 4 (lookahead 14, beam 80) ===


Unnamed: 0,team,opponent,home_away,model_win_prob,market_win_prob,value_score,edge_pass,edge_rush,rank_pass_off,rank_rush_off,rank_pass_def,rank_rush_def,team_consensus_rank,wpct_to_date,sos_to_date,future_sos_proj_from_week,seq_hits,pick_score,ml,book
0,Green Bay,Dallas,A,0.724841,0.782609,-0.057767,19.0,5.0,13.0,18.0,32.0,23.0,6.25,0.666667,0.555556,0.055106,20,0.062445,-360.0,draftkings
1,Denver,Cincinnati,H,0.708396,0.82906,-0.120664,9.0,10.0,20.0,10.0,29.0,20.0,14.5,0.333333,0.666667,0.302386,0,-0.450369,-485.0,draftkings
2,Houston,Tennessee,H,0.613139,0.814815,-0.201676,-14.0,6.0,30.0,25.0,16.0,31.0,24.0,0.0,0.777778,0.771762,0,-0.508975,-440.0,draftkings
3,Buffalo,New Orleans,H,0.757593,0.935484,-0.177891,22.0,13.0,6.0,1.0,28.0,14.0,2.0,1.0,0.111111,-0.136651,0,-0.564298,-1450.0,draftkings
4,LA Chargers,NY Giants,A,0.682658,0.73545,-0.052792,16.0,4.0,4.0,26.0,20.0,30.0,3.5,1.0,0.333333,0.130938,0,-0.572696,-278.0,draftkings
5,Atlanta,Washington,H,0.483687,0.574468,-0.090781,-10.0,-8.0,31.0,14.0,21.0,6.0,22.5,0.333333,0.666667,0.683593,0,-0.628887,-135.0,draftkings
6,Las Vegas,Chicago,H,0.552805,0.545455,0.00735,19.0,-3.0,12.0,29.0,31.0,26.0,24.25,0.333333,0.666667,0.422092,0,-0.667533,-120.0,draftkings
7,Detroit,Cleveland,H,0.708303,0.84252,-0.134216,11.0,-2.0,2.0,3.0,13.0,1.0,4.5,0.666667,0.444444,-0.281714,0,-0.691107,-535.0,draftkings
8,Philadelphia,Tampa Bay,A,0.59635,0.657534,-0.061184,-14.0,2.0,29.0,5.0,15.0,7.0,1.0,1.0,0.444444,0.379217,0,-0.697517,-192.0,draftkings
9,San Francisco,Jacksonville,H,0.589073,0.657534,-0.068461,15.0,-26.0,3.0,31.0,18.0,5.0,10.25,1.0,0.444444,0.317946,0,-0.698869,-192.0,draftkings


Week 4: Green Bay vs Dallas (A)
  Model prob: 72.5% | Market: 78.3% | Value: -0.058
  Pass edge: +19.0  (off rank 13.0, opp pass-D rank 32.0)
  Rush edge: +5.0  (off rank 18.0, opp rush-D rank 23.0)
  Consensus power rank: 6.25,  W/L to date: 0.667  |  SOS to date: 0.556
  Projected SOS (from week 4): 0.055 (lower= easier)
  Best future win-spot prob: — ; Seq hits (top plans): 20
  Final pick score: +0.062
