In [3]:
# Packages
import nfl_data_py as nfl
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA


In [4]:
# Load the data
pbp = nfl.import_pbp_data(range(2020, 2025))
pbp.head(10)

2020 done.
2021 done.
2022 done.
2023 done.
2024 done.
Downcasting floats.


Unnamed: 0,play_id,game_id,old_game_id_x,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,defense_players,n_offense,n_defense,ngs_air_yards,time_to_throw,was_pressure,route,defense_man_zone_type,defense_coverage_type,old_game_id
0,1.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,,,,...,,0.0,0.0,,,,,,,
1,39.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,SF,home,ARI,...,00-0034073;00-0034730;00-0033783;00-0031961;00...,11.0,11.0,,,,,,,
2,54.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,SF,home,ARI,...,00-0029585;00-0035236;00-0031557;00-0035705;00...,11.0,11.0,4.19,2.436,0.0,OUT,MAN_COVERAGE,COVER_1,
3,93.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,SF,home,ARI,...,00-0029585;00-0035236;00-0031557;00-0035705;00...,11.0,11.0,,,,,,,
4,118.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,SF,home,ARI,...,00-0029585;00-0035236;00-0031557;00-0029747;00...,11.0,11.0,,,,,,,
5,143.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,SF,home,ARI,...,00-0029585;00-0035236;00-0029747;00-0027686;00...,11.0,11.0,,,,,,,
6,165.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,SF,home,ARI,...,00-0029560;00-0029585;00-0035236;00-0029747;00...,11.0,11.0,9.17,2.201,0.0,HITCH,ZONE_COVERAGE,COVER_3,
7,197.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,SF,home,ARI,...,00-0034490;00-0031557;00-0035248;00-0029747;00...,11.0,11.0,,,,,,,
8,226.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,ARI,away,SF,...,00-0034681;00-0033258;00-0034490;00-0029747;00...,11.0,11.0,,,,,,,
9,245.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,ARI,away,SF,...,00-0032057;00-0035717;00-0033870;00-0032164;00...,11.0,11.0,4.08,1.835,0.0,OUT,ZONE_COVERAGE,COVER_4,


In [5]:
# Filter to dropbacks only
dropbacks = pbp[pbp['qb_dropback'] == 1].copy()

# Calculate defensive skill metrics per defense per season
def_metrics = (
    dropbacks.groupby(['season', 'defteam'])
    .agg(
        def_epa_per_dropback=('epa', 'mean'),
        def_success_rate_allowed=('success', 'mean'),
        def_pressure_rate=('qb_hit', 'sum'),
        def_dropbacks=('qb_dropback', 'count'),
    )
    .assign(def_pressure_rate=lambda df: df.def_pressure_rate / df.def_dropbacks)
    .reset_index()
)

# Calculate OL proxy metrics per offense per season
ol_metrics = (
    dropbacks.groupby(['season', 'posteam'])
    .agg(
        pressures_allowed=('qb_hit', 'sum'),
        sacks_allowed=('sack', 'sum'),
        dropbacks=('qb_dropback', 'count'),
    )
    .assign(
        pressure_rate=lambda df: df.pressures_allowed / df.dropbacks,
        sack_rate=lambda df: df.sacks_allowed / df.dropbacks
    )
    .reset_index()
)

# Merge defensive and OL metrics onto dropbacks
dropbacks = dropbacks.merge(def_metrics, on=['season', 'defteam'], how='left')
dropbacks = dropbacks.merge(ol_metrics, on=['season', 'posteam'], how='left')

# Filter for true QBs with min dropbacks threshold
min_dropbacks = 150
qb_counts = dropbacks.groupby(['season', 'passer_player_name']).size().reset_index(name='dropbacks')
true_qbs = qb_counts[qb_counts['dropbacks'] >= min_dropbacks]['passer_player_name'].unique()
dropbacks_true_qbs = dropbacks[dropbacks['passer_player_name'].isin(true_qbs)].copy()

# Define clutch situations: 3rd/4th and long, last 5 minutes, close game (within 7 points)
dropbacks_true_qbs['clutch_situation'] = (
    (dropbacks_true_qbs['down'].isin([3, 4])) &
    (dropbacks_true_qbs['ydstogo'] >= 7) &
    (dropbacks_true_qbs['game_seconds_remaining'] <= 300) &
    (dropbacks_true_qbs['score_differential'].abs() <= 7)
)

# Add play-level adjusted EPA
dropbacks_true_qbs['adj_epa'] = (
    dropbacks_true_qbs['epa'] - dropbacks_true_qbs['def_epa_per_dropback']
) / (1 + dropbacks_true_qbs['pressure_rate'])

# Aggregate clutch adjusted EPA stats per QB
clutch_stats = (
    dropbacks_true_qbs[dropbacks_true_qbs['clutch_situation']]
    .groupby('passer_player_name')['adj_epa']
    .agg(['mean', 'sum', 'count'])
    .rename(columns={
        'mean': 'clutch_avg_adj_epa',
        'sum': 'clutch_total_adj_epa',
        'count': 'clutch_plays'
    })
    .reset_index()
)

# Aggregate non-clutch adjusted EPA stats per QB
non_clutch_stats = (
    dropbacks_true_qbs[~dropbacks_true_qbs['clutch_situation']]
    .groupby('passer_player_name')['adj_epa']
    .agg(['mean', 'sum', 'count'])
    .rename(columns={
        'mean': 'non_clutch_avg_adj_epa',
        'sum': 'non_clutch_total_adj_epa',
        'count': 'non_clutch_plays'
    })
    .reset_index()
)

# Aggregate overall QB metrics normalized by defense and OL skill
qb_adj = (
    dropbacks_true_qbs.groupby(['season', 'passer_player_name'])
    .agg(
        dropbacks=('qb_dropback', 'count'),
        total_epa=('epa', 'sum'),
        avg_epa_per_dropback=('epa', 'mean'),
        avg_def_epa_allowed=('def_epa_per_dropback', 'mean'),
        avg_def_success_rate_allowed=('def_success_rate_allowed', 'mean'),
        avg_def_pressure_rate=('def_pressure_rate', 'mean'),
        avg_ol_pressure_rate_allowed=('pressure_rate', 'mean'),
        avg_ol_sack_rate_allowed=('sack_rate', 'mean'),
    )
    .reset_index()
)

# Merge clutch stats with overall QB summary
qb_adj = qb_adj.merge(clutch_stats, on='passer_player_name', how='left')
qb_adj = qb_adj.merge(non_clutch_stats, on='passer_player_name', how='left')

# Calculate adjusted EPA metrics
qb_adj['epa_vs_expectation'] = qb_adj['avg_epa_per_dropback'] - qb_adj['avg_def_epa_allowed']
qb_adj['epa_vs_def_and_ol'] = qb_adj['epa_vs_expectation'] / (1 + qb_adj['avg_ol_pressure_rate_allowed'])

# Calculate clutch vs non-clutch adjusted EPA difference
qb_adj['clutch_vs_non_clutch_adj_diff'] = qb_adj['clutch_avg_adj_epa'] - qb_adj['non_clutch_avg_adj_epa']

# Filter to just QBs with over 200 dropbacks
qb_adj = qb_adj[qb_adj['dropbacks'] >= 200].copy()

qb_adj.head(10)


Unnamed: 0,season,passer_player_name,dropbacks,total_epa,avg_epa_per_dropback,avg_def_epa_allowed,avg_def_success_rate_allowed,avg_def_pressure_rate,avg_ol_pressure_rate_allowed,avg_ol_sack_rate_allowed,clutch_avg_adj_epa,clutch_total_adj_epa,clutch_plays,non_clutch_avg_adj_epa,non_clutch_total_adj_epa,non_clutch_plays,epa_vs_expectation,epa_vs_def_and_ol,clutch_vs_non_clutch_adj_diff
0,2020,A.Dalton,357,-15.207176,-0.042597,0.04246,0.469935,0.149414,0.132768,0.062147,-0.262139,-3.145671,12.0,-0.080141,-98.89338,1234,-0.085057,-0.075088,-0.181999
1,2020,A.Rodgers,634,195.30217,0.308048,0.099488,0.485396,0.133313,0.087746,0.039334,-0.513855,-10.277098,20.0,0.05771,123.383423,2138,0.20856,0.191736,-0.571565
2,2020,A.Smith,274,-37.050831,-0.135222,0.075793,0.471843,0.145804,0.157459,0.071823,-0.232869,-1.164343,5.0,-0.181369,-48.788186,269,-0.211015,-0.182309,-0.0515
3,2020,B.Mayfield,580,88.259171,0.152171,0.067905,0.470964,0.149943,0.088424,0.043408,-0.478491,-11.005298,23.0,0.028807,78.239811,2716,0.084266,0.07742,-0.507298
4,2020,B.Roethlisberger,696,52.28447,0.075121,0.097093,0.480352,0.134083,0.097333,0.018667,0.158771,2.857882,18.0,-0.069869,-95.720612,1370,-0.021972,-0.020023,0.22864
5,2020,C.Newton,398,-22.420835,-0.056334,0.08975,0.483444,0.13841,0.154309,0.074148,-0.158021,-0.790103,5.0,-0.162761,-86.263504,530,-0.146084,-0.126555,0.004741
6,2020,C.Wentz,492,-82.573616,-0.167833,0.030404,0.461321,0.146946,0.193863,0.090656,0.653237,7.185612,11.0,-0.106762,-148.612131,1392,-0.198236,-0.166046,0.759999
7,2020,D.Brees,475,51.384773,0.108178,0.087929,0.472518,0.135634,0.128682,0.046512,,,,0.017941,8.521829,475,0.020249,0.017941,
8,2020,D.Carr,540,42.028336,0.07783,0.065135,0.475666,0.137613,0.089552,0.046434,0.061448,1.84344,30.0,0.010131,26.696278,2635,0.012695,0.011652,0.051317
9,2020,D.Haskins,262,-44.358089,-0.169306,0.07413,0.471011,0.145264,0.157459,0.071823,-0.284758,-1.139032,4.0,-0.209165,-53.964521,258,-0.243435,-0.210319,-0.075593


In [6]:
# Advanced quarterback analytics
adv_qb_stats = dropbacks_true_qbs.groupby(['season', 'passer_player_name']).agg(
    avg_cpoe=('cpoe', 'mean'),
    avg_adj_epa=('adj_epa', 'mean'),
    avg_pass_yards=('yards_gained', 'mean'),
    avg_air_yards=('air_yards', 'mean'),
    avg_pressure_rate=('pressure_rate', 'mean'),
).reset_index()

# Clutch performance metrics
clutch_adv_stats = (
    dropbacks_true_qbs[dropbacks_true_qbs['clutch_situation']]
    .groupby('passer_player_name').agg(
        clutch_avg_cpoe=('cpoe', 'mean'),
        clutch_avg_adj_epa=('adj_epa', 'mean'),
        clutch_avg_pass_yards=('yards_gained', 'mean'),
        clutch_avg_air_yards=('air_yards', 'mean'),
        clutch_avg_pressure_rate=('pressure_rate', 'mean'),
    ).reset_index()
)
# Non-clutch performance metrics
non_clutch_adv_stats = (
    dropbacks_true_qbs[~dropbacks_true_qbs['clutch_situation']]
    .groupby('passer_player_name').agg(
        non_clutch_avg_cpoe=('cpoe', 'mean'),
        non_clutch_avg_adj_epa=('adj_epa', 'mean'),
        non_clutch_avg_pass_yards=('yards_gained', 'mean'),
        non_clutch_avg_air_yards=('air_yards', 'mean'),
        non_clutch_avg_pressure_rate=('pressure_rate', 'mean'),
    ).reset_index()
)

# Merge advanced all advanced stats
adv_qb_stats = adv_qb_stats.merge(clutch_adv_stats, on='passer_player_name', how='left')
adv_qb_stats = adv_qb_stats.merge(non_clutch_adv_stats, on='passer_player_name', how='left')



In [7]:
adv_qb_stats

Unnamed: 0,season,passer_player_name,avg_cpoe,avg_adj_epa,avg_pass_yards,avg_air_yards,avg_pressure_rate,clutch_avg_cpoe,clutch_avg_adj_epa,clutch_avg_pass_yards,clutch_avg_air_yards,clutch_avg_pressure_rate,non_clutch_avg_cpoe,non_clutch_avg_adj_epa,non_clutch_avg_pass_yards,non_clutch_avg_air_yards,non_clutch_avg_pressure_rate
0,2020,A.Dalton,-0.407478,-0.075088,5.563025,6.873874,0.132768,3.158078,-0.262139,7.416667,20.454546,0.138389,-1.245447,-0.080141,5.869530,7.134432,0.139585
1,2020,A.Rodgers,6.779110,0.191736,7.455836,7.907743,0.087746,-0.519887,-0.513855,6.600000,22.647058,0.126712,2.296140,0.057710,6.537418,7.425595,0.116415
2,2020,A.Smith,-0.845228,-0.182308,5.266423,5.103175,0.157459,-3.702763,-0.232869,9.600000,8.000000,0.157459,-0.783908,-0.181369,5.185874,5.044534,0.157459
3,2020,B.Mayfield,1.994527,0.077420,6.658621,8.250000,0.088424,-3.251248,-0.478491,4.913043,15.045455,0.113847,0.000250,0.028807,6.375920,7.872510,0.110233
4,2020,B.Roethlisberger,-0.527425,-0.020023,6.022988,7.085799,0.097333,-0.412974,0.158771,7.166667,12.294118,0.101228,-1.381870,-0.069869,5.670803,6.868300,0.100233
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273,2024,T.Huntley,3.104967,-0.232290,4.946309,7.848485,0.113636,7.196349,0.172145,7.666667,18.500000,0.123505,-0.219196,-0.146155,4.928058,7.485148,0.118941
274,2024,T.Lawrence,-0.968421,-0.037163,6.265574,9.395760,0.109656,-0.181260,-0.404960,6.545455,16.380953,0.110245,-1.061140,-0.018285,6.013933,7.943117,0.113549
275,2024,T.Tagovailoa,3.780918,0.116022,6.453682,5.720403,0.113636,-5.272086,0.477333,8.583333,14.083333,0.121503,2.148047,0.079071,6.802662,7.502190,0.119307
276,2024,T.Taylor,13.330581,0.313707,4.840000,4.000000,0.140089,-41.647678,-1.096310,0.000000,28.666666,0.182635,1.071235,-0.123969,5.466667,8.074405,0.165325


In [None]:
dropbacks_true_qbs['']

Unnamed: 0,season,passer_player_name,dropbacks,total_epa,avg_epa_per_dropback,avg_def_epa_allowed,avg_def_success_rate_allowed,avg_def_pressure_rate,avg_ol_pressure_rate_allowed,avg_ol_sack_rate_allowed,clutch_avg_adj_epa,clutch_total_adj_epa,clutch_plays,non_clutch_avg_adj_epa,non_clutch_total_adj_epa,non_clutch_plays,epa_vs_expectation,epa_vs_def_and_ol,clutch_vs_non_clutch_adj_diff
0,2020,A.Dalton,357,-15.207176,-0.042597,0.042460,0.469935,0.149414,0.132768,0.062147,-0.262139,-3.145671,12.0,-0.080141,-98.893380,1234,-0.085057,-0.075088,-0.181999
1,2020,A.Rodgers,634,195.302170,0.308048,0.099488,0.485396,0.133313,0.087746,0.039334,-0.513855,-10.277098,20.0,0.057710,123.383423,2138,0.208560,0.191736,-0.571565
2,2020,A.Smith,274,-37.050831,-0.135222,0.075793,0.471843,0.145804,0.157459,0.071823,-0.232869,-1.164343,5.0,-0.181369,-48.788186,269,-0.211015,-0.182309,-0.051500
3,2020,B.Mayfield,580,88.259171,0.152171,0.067905,0.470964,0.149943,0.088424,0.043408,-0.478491,-11.005298,23.0,0.028807,78.239811,2716,0.084266,0.077420,-0.507298
4,2020,B.Roethlisberger,696,52.284470,0.075121,0.097093,0.480352,0.134083,0.097333,0.018667,0.158771,2.857882,18.0,-0.069869,-95.720612,1370,-0.021972,-0.020023,0.228640
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,2024,S.Darnold,643,6.440769,0.010017,0.048754,0.463485,0.137877,0.163959,0.085672,-0.245918,-1.229589,5.0,-0.100586,-168.581994,1676,-0.038738,-0.033281,-0.145332
268,2024,S.Rattler,250,-84.787376,-0.339150,0.034115,0.464541,0.149540,0.134304,0.059871,,,,-0.329070,-82.267381,250,-0.373265,-0.329070,
274,2024,T.Lawrence,305,-3.281461,-0.010759,0.030480,0.451792,0.145621,0.109656,0.052373,-0.404960,-8.909130,22.0,-0.018285,-40.683785,2225,-0.041238,-0.037163,-0.386676
275,2024,T.Tagovailoa,421,85.623016,0.203380,0.074173,0.465299,0.129011,0.113636,0.065152,0.477333,5.727996,12.0,0.079071,172.295533,2179,0.129207,0.116022,0.398262
