We initialize Python imports and opens a DuckDB connection that every later cell reuses. We also load the preferred specs and the modeling frame and set bootstrap parameters so the bootstrap can run 1000 iterations without rewriting code

In [1]:
import warnings
from pathlib import Path

import duckdb
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

warnings.filterwarnings("ignore", category=RuntimeWarning)

CWD = Path().resolve()
DB_FILE = None
for p in [CWD] + list(CWD.parents):
    cand = p / "db" / "nflpa.duckdb"
    if cand.exists():
        DB_FILE = cand
        break
if DB_FILE is None:
    for p in [CWD] + list(CWD.parents):
        cand = p / "nflpa.duckdb"
        if cand.exists():
            DB_FILE = cand
            break
if DB_FILE is None:
    raise RuntimeError("Could not find nflpa.duckdb")

con = duckdb.connect(str(DB_FILE), read_only=False)

need = ["step18_model_frame", "step18_preferred_model_specs"]
existing = set(con.execute("SHOW TABLES").df()["name"].astype(str).tolist())
missing = [t for t in need if t not in existing]
if missing:
    raise RuntimeError(f"Missing tables for step20, {missing}, run notebook 18 first")

df = con.execute("SELECT * FROM step18_model_frame").df()
pref = con.execute("SELECT * FROM step18_preferred_model_specs").df()

TEAM_COL = "team" if "team" in df.columns else "team_key"

pref_def = pref[pref["side"] == "def"].iloc[0].to_dict()
pref_off = pref[pref["side"] == "off"].iloc[0].to_dict()

formula_def = str(pref_def["formula"])
formula_off = str(pref_off["formula"])
fam_def = str(pref_def["family"])
fam_off = str(pref_off["family"])

N_BOOT = 500
SEED = 20260106

print("bootstrap settings")
print("N_BOOT", N_BOOT)
print("SEED", SEED)
print("families", fam_def, fam_off)

bootstrap settings
N_BOOT 500
SEED 20260106
families poisson poisson
