We initialize Python imports and opens a DuckDB connection that every later cell reuses. We also load the preferred specs and the modeling frame, then confirm seasons are present and ordered so the split is chronological

In [1]:
import warnings
from pathlib import Path

import duckdb
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats

warnings.filterwarnings("ignore", category=RuntimeWarning)

CWD = Path().resolve()
DB_FILE = None
for p in [CWD] + list(CWD.parents):
    cand = p / "db" / "nflpa.duckdb"
    if cand.exists():
        DB_FILE = cand
        break
if DB_FILE is None:
    for p in [CWD] + list(CWD.parents):
        cand = p / "nflpa.duckdb"
        if cand.exists():
            DB_FILE = cand
            break
if DB_FILE is None:
    raise RuntimeError("Could not find nflpa.duckdb")

con = duckdb.connect(str(DB_FILE), read_only=False)

need = ["step18_model_frame", "step18_preferred_model_specs"]
existing = set(con.execute("SHOW TABLES").df()["name"].astype(str).tolist())
missing = [t for t in need if t not in existing]
if missing:
    raise RuntimeError(f"Missing tables for step19, {missing}, run notebook 18 first")

df = con.execute("SELECT * FROM step18_model_frame").df()
pref = con.execute("SELECT * FROM step18_preferred_model_specs").df()

print("rows in step18_model_frame", len(df))
print("preferred specs")
print(pref)

if "season" not in df.columns or "week" not in df.columns:
    raise RuntimeError("Missing season or week in step18_model_frame")

seasons = sorted(df["season"].dropna().astype(int).unique().tolist())
print("seasons", seasons[:10], "to", seasons[-10:])
if len(seasons) < 3:
    raise RuntimeError("Need at least 3 seasons for chronological cross validation")

rows in step18_model_frame 5950
preferred specs
  side         outcome   family                 spec_id  \
0  def  Inj_Def_Next_w  poisson  nonscore_roll4_no_lags   
1  off  Inj_Off_Next_w  poisson  nonscore_roll4_no_lags   

                                             formula           aic  \
0  Inj_Def_Next_w ~ shock_nonscore + shock_x_blow...  20805.188499   
1  Inj_Off_Next_w ~ shock_nonscore + shock_x_blow...  20178.233429   

            bic  
0  22551.577735  
1  21924.622665  
seasons [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021] to [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]


We define season based forward splits that train on earlier seasons and test on later seasons and avoid the common pitfall where season week fixed effects block prediction in unseen seasons

In [2]:
TEAM_COL = "team" if "team" in df.columns else "team_key"

if "season_trend" not in df.columns:
    df["season_trend"] = df["season"].astype(int) - int(df["season"].astype(int).min())

if df["season_trend"].isna().any():
    raise RuntimeError("season_trend has missing values, check season column types")

def make_cv_formula(formula: str) -> str:
    f = str(formula)

    if "C(season_week)" in f:
        f = f.replace("C(season_week)", "C(week) + season_trend")

    f = f.replace("C(season) + C(week)", "C(week) + season_trend")
    f = f.replace("C(week) + C(season)", "C(week) + season_trend")

    f = f.replace("+  C(week) + season_trend", "+ C(week) + season_trend")
    f = f.replace("+ C(week)  + season_trend", "+ C(week) + season_trend")

    return f

pref_def = pref[pref["side"] == "def"].iloc[0].to_dict()
pref_off = pref[pref["side"] == "off"].iloc[0].to_dict()

cv_formula_def = make_cv_formula(pref_def["formula"])
cv_formula_off = make_cv_formula(pref_off["formula"])

print("cv formula defense")
print(cv_formula_def[:240], "...")

print("cv formula offense")
print(cv_formula_off[:240], "...")

N_TEST_SEASONS = 2
test_seasons = seasons[-N_TEST_SEASONS:]
folds = []
for s in test_seasons:
    train = [x for x in seasons if x < s]
    test = [s]
    folds.append({"train_seasons": train, "test_seasons": test})

print("folds")
for i, f in enumerate(folds, start=1):
    print(i, "train up to", max(f["train_seasons"]), "test", f["test_seasons"])

cv formula defense
Inj_Def_Next_w ~ shock_nonscore + shock_x_blowout + vol_nonscore_roll4_prior + cum_shocks_nonscore_prior + short_week_flag_w + bye_last_week_flag_w + home_flag_w + blowout_flag_w + points_for + points_against + offensive_snaps_w + defensive ...
cv formula offense
Inj_Off_Next_w ~ shock_nonscore + shock_x_blowout + vol_nonscore_roll4_prior + cum_shocks_nonscore_prior + short_week_flag_w + bye_last_week_flag_w + home_flag_w + blowout_flag_w + points_for + points_against + offensive_snaps_w + defensive ...
folds
1 train up to 2022 test [2023]
2 train up to 2023 test [2024]


We refit the main Models A and B on each training set and produce out of sample predictions on the test seasons

In [3]:
OUTCOME_DEF = str(pref_def["outcome"])
OUTCOME_OFF = str(pref_off["outcome"])

fam_def = str(pref_def["family"])
fam_off = str(pref_off["family"])

def fit_poisson(formula: str, data: pd.DataFrame):
    m = smf.glm(formula=formula, data=data, family=sm.families.Poisson())
    r = m.fit(maxiter=200, disp=0)
    return r

def fit_nb_discrete(formula: str, data: pd.DataFrame):
    m = smf.negativebinomial(formula=formula, data=data)
    r = m.fit(disp=False, maxiter=200)
    return r

def get_alpha_nb(res) -> float:
    try:
        if "alpha" in res.params.index:
            return float(res.params.loc["alpha"])
    except Exception:
        pass
    return np.nan

def predict_mean(res, data: pd.DataFrame) -> np.ndarray:
    mu = res.predict(data)
    mu = np.asarray(mu, dtype=float)
    mu = np.clip(mu, 1e-10, 1e12)
    return mu

def loglik_poisson(y: np.ndarray, mu: np.ndarray) -> float:
    return float(stats.poisson.logpmf(y, mu).sum())

def loglik_nb2(y: np.ndarray, mu: np.ndarray, alpha: float) -> float:
    if not np.isfinite(alpha) or alpha <= 0:
        return np.nan
    n = 1.0 / alpha
    p = n / (n + mu)
    return float(stats.nbinom.logpmf(y, n, p).sum())

def eval_metrics(y: np.ndarray, mu: np.ndarray) -> dict:
    err = y - mu
    mae = float(np.mean(np.abs(err)))
    rmse = float(np.sqrt(np.mean(err ** 2)))
    return {"mae": mae, "rmse": rmse}

fold_metrics = []
fold_coefs = []

key_terms_hint = [
    "shock_nonscore",
    "shock_x_blowout",
    "vol_nonscore_s2d_prior",
    "vol_nonscore_roll4_prior",
    "cum_shocks_nonscore_prior",
    "ST_Shock_NonScore_w_minus_1",
    "ST_Shock_NonScore_w_minus_2",
    "ST_Shock_NonScore_w_minus_3",
]

for fold_id, f in enumerate(folds, start=1):
    train_df = df[df["season"].astype(int).isin(f["train_seasons"])].copy()
    test_df = df[df["season"].astype(int).isin(f["test_seasons"])].copy()

    if len(train_df) == 0 or len(test_df) == 0:
        raise RuntimeError("Empty train or test fold, check season filtering")

    print("fold", fold_id, "train rows", len(train_df), "test rows", len(test_df))

    res_def = None
    res_off = None

    try:
        if fam_def == "poisson":
            res_def = fit_poisson(cv_formula_def, train_df)
        else:
            res_def = fit_nb_discrete(cv_formula_def, train_df)
    except Exception as e:
        print("defense fit failed fold", fold_id, str(e))

    try:
        if fam_off == "poisson":
            res_off = fit_poisson(cv_formula_off, train_df)
        else:
            res_off = fit_nb_discrete(cv_formula_off, train_df)
    except Exception as e:
        print("offense fit failed fold", fold_id, str(e))

    def record(side: str, outcome: str, res, fam: str, formula: str):
        if res is None:
            fold_metrics.append({
                "fold_id": fold_id,
                "side": side,
                "outcome": outcome,
                "family": fam,
                "train_seasons_max": int(max(f["train_seasons"])),
                "test_season": int(f["test_seasons"][0]),
                "n_train": int(len(train_df)),
                "n_test": int(len(test_df)),
                "loglik": np.nan,
                "loglik_per_obs": np.nan,
                "mae": np.nan,
                "rmse": np.nan,
                "fit_ok": 0,
            })
            return

        y = test_df[outcome].astype(int).to_numpy()
        mu = predict_mean(res, test_df)

        if fam == "poisson":
            ll = loglik_poisson(y, mu)
        else:
            alpha = get_alpha_nb(res)
            ll = loglik_nb2(y, mu, alpha)

        mets = eval_metrics(y.astype(float), mu)

        fold_metrics.append({
            "fold_id": fold_id,
            "side": side,
            "outcome": outcome,
            "family": fam,
            "train_seasons_max": int(max(f["train_seasons"])),
            "test_season": int(f["test_seasons"][0]),
            "n_train": int(len(train_df)),
            "n_test": int(len(test_df)),
            "loglik": float(ll) if np.isfinite(ll) else np.nan,
            "loglik_per_obs": float(ll) / float(len(y)) if np.isfinite(ll) else np.nan,
            "mae": mets["mae"],
            "rmse": mets["rmse"],
            "fit_ok": 1,
        })

        params = res.params.copy()
        for term in key_terms_hint:
            if term in params.index:
                fold_coefs.append({
                    "fold_id": fold_id,
                    "side": side,
                    "outcome": outcome,
                    "term": term,
                    "beta": float(params.loc[term]),
                    "train_seasons_max": int(max(f["train_seasons"])),
                    "test_season": int(f["test_seasons"][0]),
                })

    record("def", OUTCOME_DEF, res_def, fam_def, cv_formula_def)
    record("off", OUTCOME_OFF, res_off, fam_off, cv_formula_off)

metrics_df = pd.DataFrame(fold_metrics)
coefs_df = pd.DataFrame(fold_coefs)

print("fold metrics")
print(metrics_df)
print("coef rows", len(coefs_df))

fold 1 train rows 4990 test rows 480
fold 2 train rows 5470 test rows 480
fold metrics
   fold_id side         outcome   family  train_seasons_max  test_season  \
0        1  def  Inj_Def_Next_w  poisson               2022         2023   
1        1  off  Inj_Off_Next_w  poisson               2022         2023   
2        2  def  Inj_Def_Next_w  poisson               2023         2024   
3        2  off  Inj_Off_Next_w  poisson               2023         2024   

   n_train  n_test      loglik  loglik_per_obs       mae      rmse  fit_ok  
0     4990     480 -889.030679       -1.852147  1.291938  1.616624       1  
1     4990     480 -869.802797       -1.812089  1.266719  1.572176       1  
2     5470     480 -902.416572       -1.880035  1.340095  1.673221       1  
3     5470     480 -875.806401       -1.824597  1.263455  1.581088       1  
coef rows 16


We compute stability checks on the key 'NonScore' exposure terms across time and then summarize sign consistency and magnitude drift

In [4]:
metrics_df = metrics_df.copy()
coefs_df = coefs_df.copy()

print("mean loglik per obs by side")
print(
    metrics_df.groupby("side", dropna=False)["loglik_per_obs"]
    .mean()
    .reset_index()
)

if len(coefs_df) == 0:
    raise RuntimeError("No coefficient rows captured, this usually means key terms are missing from the fitted formulas")

stability = (
    coefs_df
    .groupby(["side", "term"], dropna=False)
    .agg(
        n_folds=("beta", "count"),
        mean_beta=("beta", "mean"),
        sd_beta=("beta", "std"),
        min_beta=("beta", "min"),
        max_beta=("beta", "max"),
        sign_consistency=("beta", lambda x: float((np.sign(x) == np.sign(x.iloc[0])).mean()) if len(x) > 0 else np.nan),
    )
    .reset_index()
)

print("coefficient stability summary")
print(stability.sort_values(["side", "term"]).reset_index(drop=True))

mean loglik per obs by side
  side  loglik_per_obs
0  def       -1.866091
1  off       -1.818343
coefficient stability summary
  side                       term  n_folds  mean_beta   sd_beta  min_beta  \
0  def  cum_shocks_nonscore_prior        2   0.010980  0.002695  0.009074   
1  def             shock_nonscore        2   0.019110  0.012595  0.010204   
2  def            shock_x_blowout        2   0.027231  0.004986  0.023706   
3  def   vol_nonscore_roll4_prior        2  -0.013457  0.000761 -0.013995   
4  off  cum_shocks_nonscore_prior        2  -0.009058  0.000099 -0.009128   
5  off             shock_nonscore        2   0.019672  0.013710  0.009978   
6  off            shock_x_blowout        2   0.098328  0.007478  0.093040   
7  off   vol_nonscore_roll4_prior        2   0.018144  0.003301  0.015810   

   max_beta  sign_consistency  
0  0.012886               1.0  
1  0.028015               1.0  
2  0.030757               1.0  
3 -0.012919               1.0  
4 -0.008988        

Quick sanity check to confirm that the exposure signs match the full sample direction and that predictive performance does not collapse in later seasons

In [5]:
pref_full = con.execute("SELECT * FROM step18_preferred_model_specs").df()
print("preferred full sample specs")
print(pref_full[["side", "spec_id", "family", "aic", "bic"]])

bad_folds = metrics_df[(metrics_df["fit_ok"] == 0) | (metrics_df["loglik_per_obs"].isna())]
print("bad folds count", len(bad_folds))
if len(bad_folds) > 0:
    print(bad_folds)

print("loglik per obs range by side")
print(metrics_df.groupby("side")["loglik_per_obs"].agg(["min", "max", "mean"]).reset_index())

shock_terms = coefs_df[coefs_df["term"].isin(["shock_nonscore", "shock_x_blowout"])].copy()
print("shock term estimates by fold")
print(shock_terms.sort_values(["side", "fold_id", "term"]).reset_index(drop=True))

preferred full sample specs
  side                 spec_id   family           aic           bic
0  def  nonscore_roll4_no_lags  poisson  20805.188499  22551.577735
1  off  nonscore_roll4_no_lags  poisson  20178.233429  21924.622665
bad folds count 0
loglik per obs range by side
  side       min       max      mean
0  def -1.880035 -1.852147 -1.866091
1  off -1.824597 -1.812089 -1.818343
shock term estimates by fold
   fold_id side         outcome             term      beta  train_seasons_max  \
0        1  def  Inj_Def_Next_w   shock_nonscore  0.010204               2022   
1        1  def  Inj_Def_Next_w  shock_x_blowout  0.030757               2022   
2        2  def  Inj_Def_Next_w   shock_nonscore  0.028015               2023   
3        2  def  Inj_Def_Next_w  shock_x_blowout  0.023706               2023   
4        1  off  Inj_Off_Next_w   shock_nonscore  0.029366               2022   
5        1  off  Inj_Off_Next_w  shock_x_blowout  0.093040               2022   
6        2  of

We export cross validation metrics and coefficient stability tables to DuckDB and to outputs csv files

In [6]:
con.register("step19_metrics_tmp", metrics_df)
con.execute("CREATE OR REPLACE TABLE step19_time_cv_metrics AS SELECT * FROM step19_metrics_tmp")
con.unregister("step19_metrics_tmp")

con.register("step19_coefs_tmp", coefs_df)
con.execute("CREATE OR REPLACE TABLE step19_time_cv_coefficients AS SELECT * FROM step19_coefs_tmp")
con.unregister("step19_coefs_tmp")

out_dir = Path("../outputs")
out_dir.mkdir(parents=True, exist_ok=True)

m_csv = out_dir / "step19_time_cv_metrics.csv"
c_csv = out_dir / "step19_time_cv_coefficients.csv"
s_csv = out_dir / "step19_time_cv_stability_summary.csv"

metrics_df.to_csv(m_csv, index=False)
coefs_df.to_csv(c_csv, index=False)
stability.to_csv(s_csv, index=False)

print("wrote duckdb table step19_time_cv_metrics")
print("wrote duckdb table step19_time_cv_coefficients")
print("wrote csv", m_csv.resolve())
print("wrote csv", c_csv.resolve())
print("wrote csv", s_csv.resolve())

wrote duckdb table step19_time_cv_metrics
wrote duckdb table step19_time_cv_coefficients
wrote csv /Users/ramko/Desktop/2025-26-NFLPA-Data-Analytics-Case-Competition/outputs/step19_time_cv_metrics.csv
wrote csv /Users/ramko/Desktop/2025-26-NFLPA-Data-Analytics-Case-Competition/outputs/step19_time_cv_coefficients.csv
wrote csv /Users/ramko/Desktop/2025-26-NFLPA-Data-Analytics-Case-Competition/outputs/step19_time_cv_stability_summary.csv
