In [3]:
import pandas as pd
import numpy as np
import nfl_data_py as nfl
import matplotlib as plt
import seaborn as sns
from functools import reduce

In [4]:
seasons = range(2010,2024+1)
pbp = nfl.import_pbp_data(seasons)
rosters = nfl.import_seasonal_rosters(seasons, ['season', 'player_id', 'player_name'])

2010 done.
2011 done.
2012 done.
2013 done.
2014 done.
2015 done.
2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
2023 done.
2024 done.
Downcasting floats.


# Quarterback Effieciency Calculations

- EPA per Dropback
- EPA per Sack
- EPA per Interception
- Short, Medium, Long EPA
- Left, Right, Center EPA
- CPOE
- Short, Medium, Long CPOE
- Left, Right, Center CPOE
- Average Depth of Target
- Explosive Pass Rate

In [5]:
print(pbp.filter(like = 'receiver').columns)

Index(['receiver_player_id', 'receiver_player_name',
       'lateral_receiver_player_id', 'lateral_receiver_player_name',
       'receiver', 'receiver_jersey_number', 'receiver_id'],
      dtype='object')


# EPA per Dropback

In [6]:
epa_pbp_pass = pbp.query('season_type == "REG" &\
                         `pass` == 1 &\
                         passer_id.notnull() &\
                         epa.notnull() &\
                         qb_spike != 1')

epa_pass = epa_pbp_pass\
    .groupby(['season', 'passer_id', 'passer'])\
    .agg({'epa': ['count', 'sum', 'mean']})

epa_pass.columns = list(map('_'.join, epa_pass.columns.values))

epa_pass.reset_index(inplace = True)

epa_pass = epa_pass.rename(columns = {'epa_count': 'dropbacks',
                                      'epa_mean': 'epa_per_dropback',
                                      'epa_sum': 'total_pass_epa'})

epa_pass.query('season == 2024 & dropbacks > 300')\
        .sort_values('epa_per_dropback', ascending = False)\
        .head(10)

Unnamed: 0,season,passer_id,passer,dropbacks,total_pass_epa,epa_per_dropback
1495,2024,00-0034796,L.Jackson,573,188.66687,0.329262
1499,2024,00-0034857,J.Allen,585,183.548187,0.313758
1476,2024,00-0033106,J.Goff,598,161.282562,0.269703
1511,2024,00-0036212,T.Tagovailoa,450,97.324303,0.216276
1498,2024,00-0034855,B.Mayfield,688,143.418335,0.208457
1557,2024,00-0039910,J.Daniels,633,121.554375,0.192029
1517,2024,00-0036442,J.Burrow,770,146.493591,0.190251
1532,2024,00-0037834,B.Purdy,554,104.02095,0.187763
1512,2024,00-0036264,J.Love,482,86.140289,0.178714
1484,2024,00-0033873,P.Mahomes,696,108.981522,0.156583


# EPA per Sack

In [7]:
epa_pbp_sack = pbp.query('season_type == "REG" &\
                          `pass` == 1 &\
                          sack == 1 &\
                          epa.notnull()')

epa_sack = epa_pbp_sack\
    .groupby(['season', 'passer', 'passer_id'])\
    .agg(epa_per_sack   = ('epa', 'mean'),
         sack_epa_total = ('epa', 'sum'),
         sacks_taken    = ('epa', 'count'))\
    .reset_index()

epa_sack

Unnamed: 0,season,passer,passer_id,epa_per_sack,sack_epa_total,sacks_taken
0,2010,A.Rodgers,00-0023459,-1.530488,-47.445129,31
1,2010,A.Smith,00-0023436,-1.751284,-43.782097,25
2,2010,B.Brohm,00-0026196,-1.992236,-5.976708,3
3,2010,B.Croyle,00-0024300,-1.011022,-4.044087,4
4,2010,B.Favre,00-0005106,-1.613184,-35.490047,22
...,...,...,...,...,...,...
1010,2024,T.Lance,00-0037012,-1.579193,-6.316772,4
1011,2024,T.Lawrence,00-0036971,-1.931592,-34.768654,18
1012,2024,T.McKee,00-0038400,-2.073076,-4.146152,2
1013,2024,T.Tagovailoa,00-0036212,-1.959115,-41.141418,21


# EPA per Turnover

In [8]:
epa_pbp_turnover = pbp.query('season_type == "REG" &\
                              (`pass` == 1 | rush == 1) &\
                              (interception == 1 | fumble_lost == 1) &\
                              epa.notnull()')

epa_turnover = epa_pbp_turnover\
    .groupby(['season', 'passer', 'passer_id'])\
    .agg(epa_per_turnover   = ('epa', 'mean'),
         total_turnover_epa = ('epa', 'sum'),
         total_turnovers    = ('epa', 'count'))\
    .reset_index()

epa_turnover

Unnamed: 0,season,passer,passer_id,epa_per_turnover,total_turnover_epa,total_turnovers
0,2010,A.Rodgers,00-0023459,-3.322917,-53.166679,16
1,2010,A.Smith,00-0023436,-4.354380,-56.606934,13
2,2010,B.Brohm,00-0026196,-4.349636,-21.748178,5
3,2010,B.Croyle,00-0024300,-3.496979,-3.496979,1
4,2010,B.Favre,00-0005106,-4.295163,-94.493576,22
...,...,...,...,...,...,...
947,2024,T.Huntley,00-0035993,-4.260641,-21.303204,5
948,2024,T.Lance,00-0037012,-5.057142,-10.114285,2
949,2024,T.Lawrence,00-0036971,-3.533387,-35.333870,10
950,2024,T.Tagovailoa,00-0036212,-4.724627,-42.521648,9


In [9]:
# EPA on Short, Medium, Long passes

In [10]:
pass_plays = pbp.query('season_type == "REG" &\
                        `pass` == 1 &\
                        air_yards.notnull() &\
                        epa.notnull()')

def pass_length(air_yards):
    if air_yards < 5:
        return 'short'
    elif 5 <= air_yards <= 10:
        return 'medium'
    else:
        return 'deep'

pass_plays['pass_length'] = pass_plays['air_yards'].apply(pass_length)

pass_length_epa_old = pass_plays\
    .groupby(['season', 'passer', 'passer_id', 'pass_length'])\
    .agg(total_plays = ('epa', 'count'),
         total_epa = ('epa', 'sum'),
         epa_per_play = ('epa', 'mean'))\
    .reset_index()

pass_length_epa = pass_length_epa_old.pivot(index = ['season', 'passer', 'passer_id'], columns = 'pass_length')

pass_length_epa.columns = [f"{stat}_{length}" for stat, length in pass_length_epa.columns]
pass_length_epa = pass_length_epa.reset_index()

pass_length_epa.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_plays['pass_length'] = pass_plays['air_yards'].apply(pass_length)


Unnamed: 0,season,passer,passer_id,total_plays_deep,total_plays_medium,total_plays_short,total_epa_deep,total_epa_medium,total_epa_short,epa_per_play_deep,epa_per_play_medium,epa_per_play_short
0,2010,A.Boldin,00-0022084,,,1.0,,,-1.045923,,,-1.045923
1,2010,A.Edwards,00-0027692,,,1.0,,,-1.441349,,,-1.441349
2,2010,A.Randle El,00-0021190,1.0,,1.0,4.212948,,2.523903,4.212948,,2.523903
3,2010,A.Rodgers,00-0023459,164.0,128.0,181.0,85.518974,43.522488,13.945068,0.521457,0.340019,0.077045
4,2010,A.Smith,00-0023436,87.0,88.0,163.0,29.616854,17.97773,-21.861862,0.340424,0.204292,-0.134122


# EPA on Right, Left, Middle

In [11]:
pass_plays = pbp.query('season_type == "REG" &\
                        `pass` == 1 &\
                        air_yards.notnull() &\
                        epa.notnull()')

pass_location_epa_old = pass_plays\
    .groupby(['season', 'passer', 'passer_id', 'pass_location'])\
    .agg(total_plays = ('epa', 'count'),
         total_epa = ('epa', 'sum'),
         epa_per_play = ('epa', 'mean'))\
    .reset_index()

pass_location_epa = pass_location_epa_old\
    .pivot(index = ['season', 'passer', 'passer_id'], 
           columns = 'pass_location')

pass_location_epa.columns = [f"{stat}_{location}" for stat, location in pass_location_epa.columns]
pass_location_epa = pass_location_epa.reset_index()

pass_location_epa.head()

Unnamed: 0,season,passer,passer_id,total_plays_left,total_plays_middle,total_plays_right,total_epa_left,total_epa_middle,total_epa_right,epa_per_play_left,epa_per_play_middle,epa_per_play_right
0,2010,A.Boldin,00-0022084,,,1.0,,,-1.045923,,,-1.045923
1,2010,A.Edwards,00-0027692,,,1.0,,,-1.441349,,,-1.441349
2,2010,A.Randle El,00-0021190,,,2.0,,,6.736851,,,3.368425
3,2010,A.Rodgers,00-0023459,159.0,102.0,212.0,35.47501,45.731415,61.780106,0.223113,0.448347,0.291416
4,2010,A.Smith,00-0023436,115.0,57.0,164.0,-15.642131,41.444645,-0.128923,-0.136019,0.727099,-0.000786


# EPA Depth vs Location

In [12]:
pass_plays = pbp.query('season_type == "REG" &\
                        `pass` == 1 &\
                        air_yards.notnull() &\
                        epa.notnull()')

def pass_length(air_yards):
    if air_yards < 5:
        return 'short'
    elif 5 <= air_yards <= 10:
        return 'medium'
    else:
        return 'deep'

pass_plays['pass_length'] = pass_plays['air_yards'].apply(pass_length)

pass_zone_epa_old = pass_plays\
    .groupby(['season', 'passer', 'passer_id', 'pass_length', 'pass_location'])\
    .agg(total_plays  = ('epa', 'count'),
         total_epa    = ('epa', 'sum'),
         epa_per_play = ('epa', 'mean'))\
    .reset_index()         

pass_zone_epa = pass_zone_epa_old\
    .pivot(index = ['season', 'passer', 'passer_id'],
           columns = ['pass_length', 'pass_location'])

pass_zone_epa.columns = [f"{stat}_{length}_{location}"
                         for stat, length, location in pass_zone_epa.columns]

pass_zone_epa = pass_zone_epa.reset_index()

pass_zone_epa.query('passer_id == "00-0023459"')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_plays['pass_length'] = pass_plays['air_yards'].apply(pass_length)


Unnamed: 0,season,passer,passer_id,total_plays_short_right,total_plays_deep_right,total_plays_deep_left,total_plays_deep_middle,total_plays_medium_left,total_plays_medium_middle,total_plays_medium_right,...,total_epa_short_middle,epa_per_play_short_right,epa_per_play_deep_right,epa_per_play_deep_left,epa_per_play_deep_middle,epa_per_play_medium_left,epa_per_play_medium_middle,epa_per_play_medium_right,epa_per_play_short_left,epa_per_play_short_middle
3,2010,A.Rodgers,00-0023459,91.0,63.0,55.0,46.0,45.0,25.0,58.0,...,-1.537186,0.193205,0.391421,0.442563,0.79388,0.294076,0.430004,0.336879,-0.035582,-0.049587
106,2011,A.Rodgers,00-0023459,104.0,72.0,48.0,52.0,32.0,33.0,61.0,...,-1.318731,0.163866,0.793971,1.162916,1.170244,0.782045,0.235294,0.501943,0.152172,-0.05072
202,2012,A.Rodgers,00-0023459,104.0,79.0,40.0,49.0,44.0,50.0,65.0,...,8.820589,0.154371,0.606101,0.213675,0.657062,0.5348,0.489536,0.339458,0.101116,0.238394
286,2013,A.Rodgers,00-0023459,60.0,37.0,24.0,25.0,18.0,18.0,37.0,...,1.920657,0.135573,0.700101,0.80196,0.890316,0.492292,1.146975,-0.176211,0.230363,0.147743
375,2014,A.Rodgers,00-0023459,94.0,67.0,44.0,50.0,56.0,35.0,53.0,...,9.126457,0.176759,0.787166,-0.048897,1.187545,0.366172,0.829167,0.616015,-0.063349,0.217297
473,2015,A.Rodgers,00-0023459,107.0,62.0,74.0,42.0,50.0,34.0,51.0,...,8.039432,-0.050779,0.218797,0.235625,0.956383,-0.034796,0.487797,-0.180434,0.056459,0.146171
557,2016,A.Rodgers,00-0023459,102.0,65.0,77.0,50.0,64.0,38.0,73.0,...,6.900644,0.142083,0.60254,0.369955,0.139471,0.426008,0.855154,0.327091,0.063228,0.191685
651,2017,A.Rodgers,00-0023459,48.0,19.0,27.0,10.0,20.0,19.0,26.0,...,0.139123,0.178477,0.89799,0.163978,1.439851,0.013577,0.342901,0.050451,0.111115,0.009937
748,2018,A.Rodgers,00-0023459,115.0,64.0,83.0,41.0,60.0,38.0,57.0,...,8.566304,-0.035294,0.703206,0.320996,0.827313,0.320496,0.695284,-0.312541,-0.005257,0.29539
855,2019,A.Rodgers,00-0023459,112.0,69.0,62.0,49.0,46.0,34.0,45.0,...,1.314307,-0.044996,0.539153,0.106213,0.882685,-0.13742,0.320644,0.172471,0.145863,0.048678


# Completion Percentage Over Expected

In [13]:
cpoe_pbp = pbp.query('season_type == "REG" &\
                      cpoe.notnull() &\
                      qb_spike != 1 &\
                      passer_id.notnull() &\
                      `pass` == 1')

cpoe = cpoe_pbp\
    .groupby(['season', 'passer', 'passer_id'])\
    .agg({'cpoe': ['count','sum', 'mean']})

cpoe.columns = list(map('_'.join, cpoe.columns.values))

cpoe.reset_index(inplace = True)

cpoe = cpoe.rename(columns = {'cpoe_count': 'dropbacks_cpoe',
                              'cpoe_sum': 'total_cpoe',
                              'cpoe_mean': 'cpoe'})

cpoe.query('season == 2024 & dropbacks_cpoe > 300')\
    .sort_values('cpoe', ascending = False)\
    .head(10)

Unnamed: 0,season,passer,passer_id,dropbacks_cpoe,total_cpoe,cpoe
1418,2024,J.Hurts,00-0036389,334,2512.166992,7.521458
1406,2024,J.Burrow,00-0036442,628,4262.103027,6.786788
1414,2024,J.Goff,00-0033106,510,2909.908447,5.705703
1402,2024,G.Smith,00-0030565,554,3124.301758,5.639534
1434,2024,L.Jackson,00-0034796,454,2066.581787,4.551942
1448,2024,R.Wilson,00-0029263,306,1368.365112,4.471781
1449,2024,S.Darnold,00-0034869,514,1981.289307,3.854648
1466,2024,T.Tagovailoa,00-0036212,388,1466.996216,3.780918
1377,2024,B.Mayfield,00-0034855,562,2025.46167,3.604024
1408,2024,J.Daniels,00-0039910,461,1635.062256,3.546773


# CPOE by Depth

In [14]:
pass_plays = pbp.query('season_type == "REG" &\
                        `pass` == 1 &\
                        air_yards.notnull() &\
                        cpoe.notnull()')

def pass_length(air_yards):
    if air_yards < 5:
        return 'short'
    elif 5 <= air_yards <= 10:
        return 'medium'
    else:
        return 'deep'

pass_plays['pass_length'] = pass_plays['air_yards'].apply(pass_length)

pass_length_cpoe_old = pass_plays\
    .groupby(['season', 'passer', 'passer_id', 'pass_length'])\
    .agg(cpoe_attempts = ('cpoe', 'count'),
         cpoe          = ('cpoe', 'mean'))\
    .reset_index()

pass_length_cpoe = pass_length_cpoe_old\
    .pivot(index = ['season', 'passer', 'passer_id'], 
           columns = 'pass_length')

pass_length_cpoe.columns = [f"{stat}_{length}" 
                            for stat, length in pass_length_cpoe.columns]

pass_length_cpoe = pass_length_cpoe.reset_index()

pass_length_cpoe.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_plays['pass_length'] = pass_plays['air_yards'].apply(pass_length)


Unnamed: 0,season,passer,passer_id,cpoe_attempts_deep,cpoe_attempts_medium,cpoe_attempts_short,cpoe_deep,cpoe_medium,cpoe_short
0,2010,A.Boldin,00-0022084,,,1.0,,,56.395672
1,2010,A.Edwards,00-0027692,,,1.0,,,42.582005
2,2010,A.Randle El,00-0021190,1.0,,1.0,75.749519,,58.526581
3,2010,A.Rodgers,00-0023459,162.0,125.0,177.0,3.461022,7.551197,6.967185
4,2010,A.Smith,00-0023436,85.0,85.0,161.0,-2.916575,4.394106,-5.217501


# CPOE by Location (Left, Right, Middle)

In [15]:
pass_plays = pbp.query('season_type == "REG" &\
                        `pass` == 1 &\
                        air_yards.notnull() &\
                        cpoe.notnull()')

pass_location_cpoe_old = pass_plays\
    .groupby(['season', 'passer', 'passer_id', 'pass_location'])\
    .agg(cpoe_attempts = ('cpoe', 'count'),
         cpoe          = ('cpoe', 'mean'))\
    .reset_index()

pass_location_cpoe = pass_location_cpoe_old\
    .pivot(index = ['season', 'passer', 'passer_id'], 
           columns = 'pass_location')

pass_location_cpoe.columns = [f"{stat}_{location}" 
                            for stat, location in pass_location_cpoe.columns]

pass_location_cpoe = pass_location_cpoe.reset_index()

pass_location_cpoe.head()

Unnamed: 0,season,passer,passer_id,cpoe_attempts_left,cpoe_attempts_middle,cpoe_attempts_right,cpoe_left,cpoe_middle,cpoe_right
0,2010,A.Boldin,00-0022084,,,1.0,,,56.395672
1,2010,A.Edwards,00-0027692,,,1.0,,,42.582005
2,2010,A.Randle El,00-0021190,,,2.0,,,67.138046
3,2010,A.Rodgers,00-0023459,156.0,102.0,206.0,8.093043,-1.725072,8.015629
4,2010,A.Smith,00-0023436,112.0,57.0,162.0,-8.113976,8.729284,-1.871798


# CPOE Depth vs Location

In [16]:
pass_plays = pbp.query('season_type == "REG" &\
                        `pass` == 1 &\
                        air_yards.notnull() &\
                        cpoe.notnull()')

def pass_length(air_yards):
    if air_yards < 5:
        return 'short'
    elif 5 <= air_yards <= 10:
        return 'medium'
    else:
        return 'deep'

pass_plays['pass_length'] = pass_plays['air_yards'].apply(pass_length)

pass_zone_cpoe_old = pass_plays\
    .groupby(['season', 'passer', 'passer_id', 'pass_length', 'pass_location'])\
    .agg(total_plays_cpoe  = ('cpoe', 'count'),
         cpoe              = ('cpoe', 'mean'))\
    .reset_index()         

pass_zone_cpoe = pass_zone_cpoe_old\
    .pivot(index = ['season', 'passer', 'passer_id'],
           columns = ['pass_length', 'pass_location'])

pass_zone_cpoe.columns = [f"{stat}_{length}_{location}"
                         for stat, length, location in pass_zone_cpoe.columns]

pass_zone_cpoe = pass_zone_cpoe.reset_index()

pass_zone_cpoe

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_plays['pass_length'] = pass_plays['air_yards'].apply(pass_length)


Unnamed: 0,season,passer,passer_id,total_plays_cpoe_short_right,total_plays_cpoe_deep_right,total_plays_cpoe_deep_left,total_plays_cpoe_deep_middle,total_plays_cpoe_medium_left,total_plays_cpoe_medium_middle,total_plays_cpoe_medium_right,...,total_plays_cpoe_short_middle,cpoe_short_right,cpoe_deep_right,cpoe_deep_left,cpoe_deep_middle,cpoe_medium_left,cpoe_medium_middle,cpoe_medium_right,cpoe_short_left,cpoe_short_middle
0,2010,A.Boldin,00-0022084,1.0,,,,,,,...,,56.395672,,,,,,,,
1,2010,A.Edwards,00-0027692,1.0,,,,,,,...,,42.582005,,,,,,,,
2,2010,A.Randle El,00-0021190,1.0,1.0,,,,,,...,,58.526581,75.749519,,,,,,,
3,2010,A.Rodgers,00-0023459,88.0,62.0,54.0,46.0,44.0,25.0,56.0,...,31.0,10.850987,1.317407,5.026829,4.512122,15.610510,-14.304649,10.975953,5.244889,-0.835443
4,2010,A.Smith,00-0023436,84.0,37.0,24.0,24.0,23.0,21.0,41.0,...,12.0,2.087562,-10.133200,-0.196213,5.488693,2.743393,19.717096,-2.528245,-14.879297,-4.018205
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1464,2024,T.Lawrence,00-0036971,41.0,36.0,38.0,26.0,21.0,20.0,26.0,...,16.0,1.045840,-6.297791,-1.618034,15.076728,-4.145651,-12.129916,9.978349,-2.261099,-14.457893
1465,2024,T.McKee,00-0038400,8.0,5.0,8.0,1.0,3.0,5.0,8.0,...,3.0,2.899369,3.718293,2.280604,28.974712,26.198545,-17.488256,-15.697081,22.821777,-15.935974
1466,2024,T.Tagovailoa,00-0036212,77.0,32.0,32.0,35.0,32.0,30.0,31.0,...,26.0,8.546597,-5.096478,-3.334826,8.249669,-5.832596,4.529168,21.058058,3.139962,-4.002953
1467,2024,T.Taylor,00-0028118,9.0,3.0,,,,1.0,4.0,...,1.0,1.678749,17.148998,,,,26.778776,22.442945,26.777807,16.502470


# Average Depth Of Target (ADOT)

In [17]:
pass_plays = pbp.query('season_type == "REG" &\
                        air_yards.notnull() &\
                        `pass` == 1')

adot = pass_plays\
    .groupby(['season', 'passer', 'passer_id'])\
    .agg(air_yards_total  = ('air_yards', 'sum'),
         air_yards_targets = ('air_yards', 'count'))\
    .reset_index()

adot['adot'] = adot['air_yards_total'] / adot['air_yards_targets']

adot = adot.reset_index()

adot

Unnamed: 0,index,season,passer,passer_id,air_yards_total,air_yards_targets,adot
0,0,2010,A.Boldin,00-0022084,-11.0,1,-11.000000
1,1,2010,A.Edwards,00-0027692,0.0,1,0.000000
2,2,2010,A.Randle El,00-0021190,42.0,2,21.000000
3,3,2010,A.Rodgers,00-0023459,4411.0,473,9.325581
4,4,2010,A.Smith,00-0023436,2551.0,338,7.547337
...,...,...,...,...,...,...,...
1487,1487,2024,T.Lawrence,00-0036971,2659.0,283,9.395760
1488,1488,2024,T.McKee,00-0038400,322.0,45,7.155556
1489,1489,2024,T.Tagovailoa,00-0036212,2271.0,397,5.720403
1490,1490,2024,T.Taylor,00-0028118,88.0,22,4.000000


# ADOT by Depth

In [18]:
pass_plays = pbp.query('season_type == "REG" &\
                        air_yards.notnull() &\
                        `pass` == 1')

def pass_length(air_yards):
    if air_yards < 5:
        return 'short'
    elif 5 <= air_yards <= 10:
        return 'medium'
    else:
        return 'deep'

pass_plays['pass_length'] = pass_plays['air_yards'].apply(pass_length)

adot_depth_old = pass_plays\
    .groupby(['season', 'passer', 'passer_id', 'pass_length'])\
    .agg(air_yards_total   = ('air_yards', 'sum'),
         air_yards_targets = ('air_yards', 'count'),
         adot              = ('air_yards', 'mean'))\
    .reset_index()

adot_depth = adot_depth_old\
    .pivot(index = ['season', 'passer', 'passer_id'],
           columns = 'pass_length')

adot_depth.columns = [f"{stat}_{length}"
                         for stat, length in adot_depth.columns]


adot_depth = adot_depth.reset_index()

adot_depth

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_plays['pass_length'] = pass_plays['air_yards'].apply(pass_length)


Unnamed: 0,season,passer,passer_id,air_yards_total_deep,air_yards_total_medium,air_yards_total_short,air_yards_targets_deep,air_yards_targets_medium,air_yards_targets_short,adot_deep,adot_medium,adot_short
0,2010,A.Boldin,00-0022084,,,-11.0,,,1.0,,,-11.000000
1,2010,A.Edwards,00-0027692,,,0.0,,,1.0,,,0.000000
2,2010,A.Randle El,00-0021190,39.0,,3.0,1.0,,1.0,39.000000,,3.000000
3,2010,A.Rodgers,00-0023459,3449.0,876.0,86.0,164.0,128.0,181.0,21.030487,6.843750,0.475138
4,2010,A.Smith,00-0023436,1873.0,603.0,75.0,87.0,88.0,163.0,21.528736,6.852273,0.460123
...,...,...,...,...,...,...,...,...,...,...,...,...
1487,2024,T.Lawrence,00-0036971,2182.0,457.0,20.0,106.0,69.0,108.0,20.584906,6.623188,0.185185
1488,2024,T.McKee,00-0038400,214.0,98.0,10.0,14.0,16.0,15.0,15.285714,6.125000,0.666667
1489,2024,T.Tagovailoa,00-0036212,1795.0,633.0,-157.0,100.0,96.0,201.0,17.950001,6.593750,-0.781095
1490,2024,T.Taylor,00-0028118,47.0,46.0,-5.0,3.0,6.0,13.0,15.666667,7.666667,-0.384615


# ADOT by Location

In [19]:
pass_plays = pbp.query('season_type == "REG" &\
                        `pass` == 1 &\
                        air_yards.notnull() &\
                        cpoe.notnull()')

adot_location_old = pass_plays\
    .groupby(['season', 'passer', 'passer_id', 'pass_location'])\
    .agg(air_yards_targets  = ('air_yards', 'count'),
         adot               = ('air_yards', 'mean'),
         air_yards_total    = ('air_yards', 'sum'))\
    .reset_index()

adot_location = adot_location_old\
    .pivot(index = ['season', 'passer', 'passer_id'], 
           columns = 'pass_location')

adot_location.columns = [f"{stat}_{location}" 
                            for stat, location in adot_location.columns]

adot_location = adot_location.reset_index()

adot_location.head()


Unnamed: 0,season,passer,passer_id,air_yards_targets_left,air_yards_targets_middle,air_yards_targets_right,adot_left,adot_middle,adot_right,air_yards_total_left,air_yards_total_middle,air_yards_total_right
0,2010,A.Boldin,00-0022084,,,1.0,,,-11.0,,,-11.0
1,2010,A.Edwards,00-0027692,,,1.0,,,0.0,,,0.0
2,2010,A.Randle El,00-0021190,,,2.0,,,21.0,,,42.0
3,2010,A.Rodgers,00-0023459,156.0,102.0,206.0,9.339744,10.862745,8.572816,1457.0,1108.0,1766.0
4,2010,A.Smith,00-0023436,112.0,57.0,162.0,6.205357,12.578947,6.635802,695.0,717.0,1075.0


# ADOT Location vs Depth

In [20]:
pass_plays = pbp.query('season_type == "REG" &\
                        `pass` == 1 &\
                        air_yards.notnull()')

def pass_length(air_yards):
    if air_yards < 5:
        return 'short'
    elif 5 <= air_yards <= 10:
        return 'medium'
    else:
        return 'deep'

pass_plays['pass_length'] = pass_plays['air_yards'].apply(pass_length)

adot_zone_old = pass_plays\
    .groupby(['season', 'passer', 'passer_id', 'pass_length', 'pass_location'])\
    .agg(air_yards_targets  = ('air_yards', 'count'),
         adot               = ('air_yards', 'mean'),
         air_yards_total    = ('air_yards', 'sum'))\
    .reset_index()         

adot_zone = adot_zone_old\
    .pivot(index = ['season', 'passer', 'passer_id'],
           columns = ['pass_length', 'pass_location'])

adot_zone.columns = [f"{stat}_{length}_{location}"
                         for stat, length, location in adot_zone.columns]

adot_zone = adot_zone.reset_index()

adot_zone

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_plays['pass_length'] = pass_plays['air_yards'].apply(pass_length)


Unnamed: 0,season,passer,passer_id,air_yards_targets_short_right,air_yards_targets_deep_right,air_yards_targets_deep_left,air_yards_targets_deep_middle,air_yards_targets_medium_left,air_yards_targets_medium_middle,air_yards_targets_medium_right,...,adot_short_middle,air_yards_total_short_right,air_yards_total_deep_right,air_yards_total_deep_left,air_yards_total_deep_middle,air_yards_total_medium_left,air_yards_total_medium_middle,air_yards_total_medium_right,air_yards_total_short_left,air_yards_total_short_middle
0,2010,A.Boldin,00-0022084,1.0,,,,,,,...,,-11.0,,,,,,,,
1,2010,A.Edwards,00-0027692,1.0,,,,,,,...,,0.0,,,,,,,,
2,2010,A.Randle El,00-0021190,1.0,1.0,,,,,,...,,3.0,39.0,,,,,,,
3,2010,A.Rodgers,00-0023459,91.0,63.0,55.0,46.0,45.0,25.0,58.0,...,0.870968,29.0,1387.0,1159.0,903.0,305.0,178.0,393.0,30.0,27.0
4,2010,A.Smith,00-0023436,85.0,38.0,24.0,24.0,25.0,21.0,41.0,...,2.583333,17.0,808.0,506.0,541.0,178.0,145.0,275.0,27.0,31.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1486,2024,T.Lawrence,00-0036971,42.0,40.0,39.0,27.0,22.0,20.0,27.0,...,2.375000,4.0,799.0,759.0,624.0,146.0,128.0,183.0,-22.0,38.0
1487,2024,T.McKee,00-0038400,8.0,5.0,8.0,1.0,3.0,5.0,8.0,...,2.333333,-3.0,76.0,125.0,13.0,16.0,30.0,52.0,6.0,7.0
1488,2024,T.Tagovailoa,00-0036212,79.0,32.0,33.0,35.0,34.0,30.0,32.0,...,1.222222,-97.0,600.0,671.0,524.0,231.0,195.0,207.0,-93.0,33.0
1489,2024,T.Taylor,00-0028118,9.0,3.0,,,1.0,1.0,4.0,...,-3.000000,4.0,47.0,,,9.0,5.0,32.0,-6.0,-3.0


In [21]:
print(pbp.filter(like = 'pass').columns)

Index(['pass_length', 'pass_location', 'total_home_pass_epa',
       'total_away_pass_epa', 'total_home_pass_wpa', 'total_away_pass_wpa',
       'first_down_pass', 'incomplete_pass', 'pass_attempt', 'pass_touchdown',
       'complete_pass', 'passer_player_id', 'passer_player_name',
       'passing_yards', 'pass_defense_1_player_id',
       'pass_defense_1_player_name', 'pass_defense_2_player_id',
       'pass_defense_2_player_name', 'passer', 'passer_jersey_number', 'pass',
       'passer_id', 'xpass', 'pass_oe', 'number_of_pass_rushers'],
      dtype='object')


In [22]:
dataframes = [epa_pass,
              epa_sack,
              epa_turnover,
              pass_length_epa,
              pass_location_epa,
              pass_zone_epa,
              cpoe,
              pass_length_cpoe,
              pass_location_cpoe,
              pass_zone_cpoe,
              adot,
              adot_depth,
              adot_location,
              adot_zone
            ]

qb_advanced_stats = reduce(lambda left, right: pd.merge(left, right, on = ['season', 'passer', 'passer_id'], how = 'outer'), dataframes)

qb_advanced_stats.fillna(0, inplace = True)

qb_advanced_stats.query('season == 2024 & dropbacks > 300').sort_values('epa_per_dropback', ascending = False).head()

Unnamed: 0,season,passer_id,passer,dropbacks,total_pass_epa,epa_per_dropback,epa_per_sack,sack_epa_total,sacks_taken,epa_per_turnover,...,adot_short_middle,air_yards_total_short_right,air_yards_total_deep_right,air_yards_total_deep_left,air_yards_total_deep_middle,air_yards_total_medium_left,air_yards_total_medium_middle,air_yards_total_medium_right,air_yards_total_short_left,air_yards_total_short_middle
1495,2024,00-0034796,L.Jackson,573,188.66687,0.329262,-1.689597,-38.860737,23.0,-5.275891,...,1.028571,-28.0,1217.0,1465.0,770.0,246.0,295.0,221.0,-116.0,36.0
1499,2024,00-0034857,J.Allen,585,183.548187,0.313758,-1.762314,-24.672396,14.0,-4.7879,...,3.074074,-48.0,1436.0,1344.0,552.0,300.0,191.0,284.0,-120.0,83.0
1476,2024,00-0033106,J.Goff,598,161.282562,0.269703,-1.736947,-53.845352,31.0,-4.355292,...,1.307692,-97.0,841.0,952.0,882.0,210.0,345.0,251.0,-60.0,85.0
1511,2024,00-0036212,T.Tagovailoa,450,97.324303,0.216276,-1.959115,-41.141418,21.0,-4.724627,...,1.222222,-97.0,600.0,671.0,524.0,231.0,195.0,207.0,-93.0,33.0
1498,2024,00-0034855,B.Mayfield,688,143.418335,0.208457,-1.686836,-67.473442,40.0,-4.408298,...,1.642857,-64.0,1159.0,1032.0,929.0,294.0,300.0,374.0,-99.0,46.0


In [24]:
qb_advanced_stats.to_csv('qb_advanced_stats.csv', index = False)