In [17]:
import nfl_data_py as nfl
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score, brier_score_loss
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.isotonic import IsotonicRegression

In [18]:
seasons = list(range(2015, 2025))
pbp = nfl.import_pbp_data(seasons)

print(pbp.shape)

2015 done.
2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
2023 done.
2024 done.
Downcasting floats.
(483605, 398)


In [19]:
train = pbp.copy()

train = train.dropna(subset=['wp','qtr','game_seconds_remaining','score_differential','yardline_100'])
train['down']    = train['down'].fillna(1).clip(1,4)
train['ydstogo'] = train['ydstogo'].fillna(10).clip(1,50)
train['posteam_is_home'] = (train['posteam'] == train['home_team']).astype(int)
train['posteam_timeouts_remaining'] = train['posteam_timeouts_remaining'].fillna(2).clip(0,3)
train['defteam_timeouts_remaining'] = train['defteam_timeouts_remaining'].fillna(2).clip(0,3)
train['abs_score_diff'] = train['score_differential'].abs()
train['two_possession_offense'] = (train['score_differential'] >= 9).astype(int)

# --- 3. 📌 Add late-game signal features HERE ---
train['u120']  = (train['game_seconds_remaining'] <= 120).astype(int)
train['u60']   = (train['game_seconds_remaining'] <= 60).astype(int)
train['u30']   = (train['game_seconds_remaining'] <= 30).astype(int)
train['u15']   = (train['game_seconds_remaining'] <= 15).astype(int)
train['log_secs'] = np.log1p(train['game_seconds_remaining'])

# --- 4. Feature list ---
feat_cols = [
    'qtr','game_seconds_remaining','log_secs','score_differential','yardline_100',
    'down','ydstogo','posteam_is_home',
    'posteam_timeouts_remaining','defteam_timeouts_remaining',
    'abs_score_diff','two_possession_offense',
    'u120','u60','u30','u15'
]


X = train[feat_cols].copy()
y = train['wp'].astype(float).clip(0, 1)

# 7) Train/Validate split
Xtr, Xva, ytr, yva = train_test_split(X, y, test_size=0.2, random_state=42)

# 8) Model: more trees, modest LR; try monotonic constraints (if your sklearn supports it)
#    Monotonic constraints vector must match feat_cols order:
#      +1 means prediction should increase as feature increases; -1 decrease; 0 = no constraint
mono = [
    0,   # qtr (non-monotonic)
    0,   # game_seconds_remaining (non-monotonic globally)
    +1,  # score_differential (offense perspective)
    -1,  # yardline_100 (farther from opp EZ is worse for offense)
    -1,  # down
    -1,  # ydstogo
    +1,  # posteam_is_home
    +1,  # posteam_timeouts_remaining
    -1,  # defteam_timeouts_remaining
    +1,  # abs_score_diff (offense perspective)
    +1   # two_possession_offense
]


# --- 5. Train/Validation split ---
X = train[feat_cols]
y = train['wp']
Xtr, Xva, ytr, yva = train_test_split(X, y, test_size=0.2, random_state=42)

# --- 6. Train model ---
model = HistGradientBoostingRegressor(
    max_iter=700,
    learning_rate=0.045,
    max_depth=7,
    max_bins=255,
    early_stopping=True,
    validation_fraction=0.1
)
model.fit(Xtr, ytr)

Xcal, Xhold, ycal, yhold = train_test_split(Xva, yva, test_size=0.5, random_state=42)

p_cal_raw = np.clip(model.predict(Xcal), 0, 1)
iso = IsotonicRegression(out_of_bounds='clip')
iso.fit(p_cal_raw, ycal)

# Evaluate calibration on the held-out part of validation
p_hold_raw = np.clip(model.predict(Xhold), 0, 1)
p_hold_cal = iso.transform(p_hold_raw)

# 9) Quick sanity check
pred_va = model.predict(Xva)
print("R^2 on holdout:", r2_score(yva, pred_va))

# 10) Save
joblib.dump(model, '../models/wp_model.pkl')
print("Saved model to ../models/wp_model.pkl")

artifacts = {
    "feat_cols": feat_cols,
    "defaults": {"posteam_timeouts_remaining": 2, "defteam_timeouts_remaining": 2}
}
joblib.dump(artifacts, "../models/wp_model_meta.pkl")


R^2 on holdout: 0.9943579528872147
Saved model to ../models/wp_model.pkl


['../models/wp_model_meta.pkl']