In [26]:
import sys, os
sys.path.append(os.path.abspath('..'))  # go up one level from notebooks/

In [27]:
import nfl_data_py as nfl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import duckdb as db
from utils.wp_predict import predict_wps

In [28]:
seasons = list(range(2015, 2025))
pbp = nfl.import_pbp_data(seasons)

print(pbp.shape)

2015 done.
2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
2023 done.
2024 done.
Downcasting floats.
(483605, 398)


In [29]:
pbp[['game_id','play_id','season','posteam','defteam','score_differential','wp']].head(100)

Unnamed: 0,game_id,play_id,season,posteam,defteam,score_differential,wp
0,2015_01_BAL_DEN,1.0,2015,,,,0.422024
1,2015_01_BAL_DEN,36.0,2015,BAL,DEN,0.0,0.422024
2,2015_01_BAL_DEN,51.0,2015,BAL,DEN,0.0,0.422024
3,2015_01_BAL_DEN,75.0,2015,BAL,DEN,0.0,0.420599
4,2015_01_BAL_DEN,96.0,2015,BAL,DEN,0.0,0.403295
...,...,...,...,...,...,...,...
95,2015_01_BAL_DEN,2075.0,2015,DEN,BAL,-1.0,0.486117
96,2015_01_BAL_DEN,2099.0,2015,DEN,BAL,-1.0,0.468685
97,2015_01_BAL_DEN,2125.0,2015,BAL,DEN,1.0,0.564145
98,2015_01_BAL_DEN,2149.0,2015,BAL,DEN,1.0,0.614148


In [30]:
con = db.connect()
con.register("plays", pbp)

df = con.sql("""
WITH tries AS (
  SELECT
    season,
    game_id,
    play_id,
    qtr,
    game_seconds_remaining,
    posteam AS team,                  -- team attempting the try (the leader)
    two_point_attempt,
    LOWER(COALESCE(two_point_conv_result,'')) AS two_point_conv_result,
    extra_point_attempt,
    LOWER(COALESCE(extra_point_result,''))     AS extra_point_result,
    wp,
    "desc"
  FROM plays
  WHERE qtr = 2
    AND score_differential = 23                 -- pre-try margin +7 for the try team
    AND (two_point_attempt = 1 OR extra_point_attempt = 1)
),
-- If a game has multiple qualifying tries (rare: penalties/retries), keep the first
dedup AS (
  SELECT
    *,
    ROW_NUMBER() OVER (PARTITION BY game_id ORDER BY play_id) AS rn
  FROM tries
)
SELECT
  season,
  game_id,
  qtr,
  team               AS leading_team,
  game_seconds_remaining,
  wp,
  (two_point_attempt = 1) AS went_for_two,
  CASE WHEN two_point_attempt = 1 THEN (two_point_conv_result = 'success') END AS two_point_success,
  extra_point_result,
  "desc"
FROM dedup
WHERE rn = 1
ORDER BY game_id
""").df()

In [31]:
sd23_Q2 = df.copy()

# manually correcting timestamps to align with PFR
sd23_Q2.loc[0, "game_seconds_remaining"] = 1812
sd23_Q2.loc[2, "game_seconds_remaining"] = 1936
sd23_Q2.loc[4, "game_seconds_remaining"] = 1826
sd23_Q2.loc[6, "game_seconds_remaining"] = 2238

In [32]:
# Compute post-try WPs (leader perspective)
branches = sd23_Q2.apply(predict_wps, axis=1, base_margin=23)
sd23_Q2 = sd23_Q2.join(branches)
sd23_Q2.shape
sd23_Q2.head(40)

Unnamed: 0,season,game_id,qtr,leading_team,game_seconds_remaining,wp,went_for_two,two_point_success,extra_point_result,desc,wp_fail,wp_xp_good,wp_2pt_good
0,2015,2015_01_CIN_OAK,2.0,CIN,1812.0,0.967314,False,,good,"2-M.Nugent extra point is GOOD, Center-46-C.Ha...",0.972048,0.972048,0.981255
1,2015,2015_03_BUF_MIA,2.0,BUF,1904.0,0.969832,False,,good,"2-D.Carpenter extra point is GOOD, Center-65-G...",0.97524,0.97524,0.982791
2,2015,2015_03_PHI_NYJ,2.0,PHI,1936.0,0.972102,False,,good,"1-C.Parkey extra point is GOOD, Center-46-J.Do...",0.978983,0.978983,0.986534
3,2015,2015_07_TB_WAS,2.0,TB,2299.0,0.967848,False,,good,"10-C.Barth extra point is GOOD, Center-48-A.De...",0.970208,0.970208,0.979292
4,2016,2016_02_TB_ARI,2.0,ARI,1826.0,0.932666,False,,good,"7-C.Catanzaro extra point is GOOD, Center-86-K...",0.970622,0.970622,0.979829
5,2016,2016_05_HOU_MIN,2.0,MIN,2326.0,0.969251,False,,good,(Kick formation) 3-B.Walsh extra point is GOOD...,0.969989,0.969989,0.979074
6,2016,2016_08_JAX_TEN,2.0,TEN,2238.0,0.964777,False,,good,"4-R.Succop extra point is GOOD, Center-48-B.Br...",0.97078,0.97078,0.979864
7,2016,2016_15_IND_MIN,2.0,IND,1833.0,0.949355,False,,good,(Kick formation) 4-A.Vinatieri extra point is ...,0.971381,0.971381,0.979373
8,2016,2016_20_GB_ATL,2.0,ATL,1803.0,0.977708,False,,good,"3-M.Bryant extra point is GOOD, Center-47-J.Ha...",0.972048,0.972048,0.981255
9,2017,2017_02_CHI_TB,2.0,TB,2049.0,0.975566,False,,failed,"2-N.Folk extra point is No Good, Wide Left, Ce...",0.972278,0.972278,0.98137


In [33]:
p_xp, p_2pt = 0.95, 0.50

sd23_Q2['ev_xp']  = p_xp * sd23_Q2['wp_xp_good']  + (1 - p_xp) * sd23_Q2['wp_fail']
sd23_Q2['ev_2pt'] = p_2pt * sd23_Q2['wp_2pt_good'] + (1 - p_2pt) * sd23_Q2['wp_fail']
sd23_Q2['ev_diff_2pt_minus_xp'] = sd23_Q2['ev_2pt'] - sd23_Q2['ev_xp']

sd23_Q2.to_parquet("../data/sd23_Q2.parquet")