In [1]:
import sys, os
sys.path.append(os.path.abspath('..'))  # go up one level from notebooks/

In [2]:
import nfl_data_py as nfl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import duckdb as db
from utils.wp_predict import predict_wps

In [3]:
seasons = list(range(2015, 2025))
pbp = nfl.import_pbp_data(seasons)

print(pbp.shape)

2015 done.
2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
2023 done.
2024 done.
Downcasting floats.
(483605, 398)


In [4]:
pbp[['game_id','play_id','season','posteam','defteam','score_differential','wp']].head(100)

Unnamed: 0,game_id,play_id,season,posteam,defteam,score_differential,wp
0,2015_01_BAL_DEN,1.0,2015,,,,0.422024
1,2015_01_BAL_DEN,36.0,2015,BAL,DEN,0.0,0.422024
2,2015_01_BAL_DEN,51.0,2015,BAL,DEN,0.0,0.422024
3,2015_01_BAL_DEN,75.0,2015,BAL,DEN,0.0,0.420599
4,2015_01_BAL_DEN,96.0,2015,BAL,DEN,0.0,0.403295
...,...,...,...,...,...,...,...
95,2015_01_BAL_DEN,2075.0,2015,DEN,BAL,-1.0,0.486117
96,2015_01_BAL_DEN,2099.0,2015,DEN,BAL,-1.0,0.468685
97,2015_01_BAL_DEN,2125.0,2015,BAL,DEN,1.0,0.564145
98,2015_01_BAL_DEN,2149.0,2015,BAL,DEN,1.0,0.614148


In [8]:
con = db.connect()
con.register("plays", pbp)

df = con.sql("""
WITH tries AS (
  SELECT
    season,
    game_id,
    play_id,
    qtr,
    game_seconds_remaining,
    posteam AS team,
    two_point_attempt,
    LOWER(COALESCE(two_point_conv_result,'')) AS two_point_conv_result,
    extra_point_attempt,
    LOWER(COALESCE(extra_point_result,''))     AS extra_point_result,
    wp,
    "desc"
  FROM plays
  WHERE qtr = 3
    AND score_differential = 15
    AND (two_point_attempt = TRUE OR extra_point_attempt = TRUE)  -- booleans
),
dedup AS (
  SELECT
    *,
    ROW_NUMBER() OVER (PARTITION BY game_id ORDER BY play_id) AS rn
  FROM tries
)
SELECT
  season,
  game_id,
  qtr,
  team               AS leading_team,
  game_seconds_remaining,
  wp,
  (two_point_attempt = TRUE) AS went_for_two,
  CASE WHEN two_point_attempt = TRUE THEN (two_point_conv_result = 'success') END AS two_point_success,
  extra_point_result,
  "desc"
FROM dedup
WHERE rn = 1
ORDER BY game_id
""").df()

In [9]:
sd15_Q3 = df.copy()

# manually correcting timestamps to align with PFR
sd15_Q3.loc[0, "game_seconds_remaining"] = 1584
sd15_Q3.loc[3, "game_seconds_remaining"] = 1208
sd15_Q3.loc[4, "game_seconds_remaining"] = 936
sd15_Q3.loc[31, "game_seconds_remaining"] = 1552

In [10]:
# Compute post-try WPs (leader perspective)
branches = sd15_Q3.apply(predict_wps, axis=1, base_margin=23)
sd15_Q3 = sd15_Q3.join(branches)
sd15_Q3.shape
sd15_Q3.head(46)

Unnamed: 0,season,game_id,qtr,leading_team,game_seconds_remaining,wp,went_for_two,two_point_success,extra_point_result,desc,wp_fail,wp_xp_good,wp_2pt_good
0,2015,2015_10_BUF_NYJ,3.0,BUF,1584.0,0.908801,False,,good,"2-D.Carpenter extra point is GOOD, Center-65-G...",0.982986,0.982986,0.988423
1,2015,2015_11_KC_SD,3.0,KC,1107.0,0.937307,False,,good,"5-C.Santos extra point is GOOD, Center-41-J.Wi...",0.995611,0.995611,0.996341
2,2015,2015_14_PIT_CIN,3.0,PIT,1750.0,0.89148,False,,good,"9-C.Boswell extra point is GOOD, Center-60-G.W...",0.979034,0.979034,0.985687
3,2015,2015_18_PIT_CIN,3.0,PIT,1208.0,0.933753,True,False,,(Pass formation) TWO-POINT CONVERSION ATTEMPT....,0.993252,0.993252,0.993982
4,2016,2016_02_PHI_CHI,3.0,PHI,936.0,0.945556,False,,failed,"6-C.Sturgis extra point is No Good, Hit Left U...",0.99678,0.99678,0.99751
5,2016,2016_11_PHI_SEA,3.0,SEA,1351.0,0.934646,False,,good,"4-S.Hauschka extra point is GOOD, Center-48-N....",0.989476,0.989476,0.991221
6,2016,2016_19_SEA_ATL,3.0,ATL,1452.0,0.934709,False,,good,"3-M.Bryant extra point is GOOD, Center-47-J.Ha...",0.988729,0.988729,0.990473
7,2017,2017_03_SEA_TEN,3.0,TEN,994.0,0.950742,False,,good,"4-R.Succop extra point is GOOD, Center-48-B.Br...",0.997438,0.997438,0.998168
8,2017,2017_10_HOU_LA,3.0,LA,993.0,0.950742,False,,good,"4-G.Zuerlein extra point is GOOD, Center-44-J....",0.997438,0.997438,0.998168
9,2017,2017_12_LAC_DAL,3.0,LAC,1006.0,0.93771,False,,good,"8-D.Kaser extra point is GOOD, Center-47-M.Win...",0.997438,0.997438,0.998168


In [11]:
p_xp, p_2pt = 0.95, 0.50

sd15_Q3['ev_xp']  = p_xp * sd15_Q3['wp_xp_good']  + (1 - p_xp) * sd15_Q3['wp_fail']
sd15_Q3['ev_2pt'] = p_2pt * sd15_Q3['wp_2pt_good'] + (1 - p_2pt) * sd15_Q3['wp_fail']
sd15_Q3['ev_diff_2pt_minus_xp'] = sd15_Q3['ev_2pt'] - sd15_Q3['ev_xp']

sd15_Q3.to_parquet("../data/sd15_Q3.parquet")