In [6]:
import nfl_data_py as nfl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import duckdb as db

In [7]:
seasons = list(range(2015, 2025))
pbp = nfl.import_pbp_data(seasons)

print(pbp.shape)
pbp.head(10)

2015 done.
2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
2023 done.
2024 done.
Downcasting floats.
(483605, 398)


Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,was_pressure,route,defense_man_zone_type,defense_coverage_type,offense_names,defense_names,offense_positions,defense_positions,offense_numbers,defense_numbers
0,1.0,2015_01_BAL_DEN,2015091309,DEN,BAL,REG,1,,,,...,,,,,,,,,,
1,36.0,2015_01_BAL_DEN,2015091309,DEN,BAL,REG,1,BAL,away,DEN,...,,,,,,,,,,
2,51.0,2015_01_BAL_DEN,2015091309,DEN,BAL,REG,1,BAL,away,DEN,...,,,,,,,,,,
3,75.0,2015_01_BAL_DEN,2015091309,DEN,BAL,REG,1,BAL,away,DEN,...,,,,,,,,,,
4,96.0,2015_01_BAL_DEN,2015091309,DEN,BAL,REG,1,BAL,away,DEN,...,,,,,,,,,,
5,120.0,2015_01_BAL_DEN,2015091309,DEN,BAL,REG,1,BAL,away,DEN,...,,,,,,,,,,
6,141.0,2015_01_BAL_DEN,2015091309,DEN,BAL,REG,1,BAL,away,DEN,...,,,,,,,,,,
7,165.0,2015_01_BAL_DEN,2015091309,DEN,BAL,REG,1,BAL,away,DEN,...,,,,,,,,,,
8,187.0,2015_01_BAL_DEN,2015091309,DEN,BAL,REG,1,BAL,away,DEN,...,,,,,,,,,,
9,211.0,2015_01_BAL_DEN,2015091309,DEN,BAL,REG,1,DEN,home,BAL,...,,,,,,,,,,


In [16]:
pbp[['game_id','play_id','season','posteam','defteam','score_differential','wp']].head(100)

Unnamed: 0,game_id,play_id,season,posteam,defteam,score_differential,wp
0,2015_01_BAL_DEN,1.0,2015,,,,0.422024
1,2015_01_BAL_DEN,36.0,2015,BAL,DEN,0.0,0.422024
2,2015_01_BAL_DEN,51.0,2015,BAL,DEN,0.0,0.422024
3,2015_01_BAL_DEN,75.0,2015,BAL,DEN,0.0,0.420599
4,2015_01_BAL_DEN,96.0,2015,BAL,DEN,0.0,0.403295
...,...,...,...,...,...,...,...
95,2015_01_BAL_DEN,2075.0,2015,DEN,BAL,-1.0,0.486117
96,2015_01_BAL_DEN,2099.0,2015,DEN,BAL,-1.0,0.468685
97,2015_01_BAL_DEN,2125.0,2015,BAL,DEN,1.0,0.564145
98,2015_01_BAL_DEN,2149.0,2015,BAL,DEN,1.0,0.614148


In [160]:
con = db.connect()
con.register("plays", pbp)

df = con.sql("""
WITH tries AS (
  SELECT
    season,
    game_id,
    play_id,
    game_seconds_remaining,
    posteam AS team,                  -- team attempting the try (the leader)
    two_point_attempt,
    LOWER(COALESCE(two_point_conv_result,'')) AS two_point_conv_result,
    extra_point_attempt,
    LOWER(COALESCE(extra_point_result,''))     AS extra_point_result,
    "desc"
  FROM plays
  WHERE qtr = 4
    AND score_differential = 7                 -- pre-try margin +7 for the try team
    AND (two_point_attempt = 1 OR extra_point_attempt = 1)
),
-- If a game has multiple qualifying tries (rare: penalties/retries), keep the first
dedup AS (
  SELECT
    *,
    ROW_NUMBER() OVER (PARTITION BY game_id ORDER BY play_id) AS rn
  FROM tries
)
SELECT
  season,
  game_id,
  team               AS leading_team,
  game_seconds_remaining,
  (two_point_attempt = 1) AS went_for_two,
  CASE WHEN two_point_attempt = 1 THEN (two_point_conv_result = 'success') END AS two_point_success,
  extra_point_result,
  "desc"
FROM dedup
WHERE rn = 1
ORDER BY game_id
""").df()

In [164]:
df2 = df.copy()
df2.shape

(50, 8)

In [166]:
# get seasons for these games
seasons = pbp[['game_id','season']].drop_duplicates()
df2 = df2.merge(seasons, on='game_id', how='left')

# import schedules for those seasons
sched = nfl.import_schedules(sorted(df2['season'].dropna().unique()))

# identify winner
sched['winner_team'] = np.where(
    sched['home_score'] > sched['away_score'], sched['home_team'],
    np.where(sched['away_score'] > sched['home_score'], sched['away_team'], 'TIE')
)

# merge winner info
df2 = df2.merge(sched[['game_id','winner_team']], on='game_id', how='left')

# flag if the leader won
df2['leader_won'] = np.where(
    df2['winner_team'].eq('TIE'), np.nan,      # handle ties cleanly
    df2['winner_team'].eq(df2['leading_team'])
)

df2['time_remaining'] = (df2['game_seconds_remaining']
                         .apply(lambda x: f"{int(x//60):02d}:{int(x%60):02d}"))

# 4th Quarter 2-Point Conversion Analysis

**Objective:** Identify and analyze every NFL game in the last 10 seasons where a team leading by 1 point in the 4th quarter scored a TD, creating the choice between a 1- or 2-point conversion to make it a two-possession game.

## Data
- Source: nflfastR play-by-play data (via DuckDB + nfl_data_py)
- Sample size: 50 instances (2015–2024 seasons)

## Notes:
- 2015 marks the year the extra point distance was increased from 20 yds to 33 yds.

## Key fields:
- `game_id` — Unique game identifier
- `time_remaining` — Timestamp of the try play (MM:SS)
- `leading_team` — Team attempting the try
- `went_for_two` — True if attempted 2-pt, False if XP
- `try_success` — Whether the try (XP or 2pt) was successful
- `leader_won` — 1 if the leading team eventually won the game, 0 otherwise

## Method Summary
1. Queried 4Q extra point / 2-pt attempts where `score_differential = 7`.
2. Joined with schedule data to determine final game outcomes.
3. Exported a clean dataset of all 50 relevant scenarios over 10 years.

