# **2023 NFL Big Data Bowl**
### Sol Ben-Ishay

## **DAPPS - Difficulty Adjusted Pass-Rush Pressure Score**

In [65]:
%run feature_engineering.ipynb

In [66]:
# Get all the relevant data to create the metric
player_level_coi = ['gameId', 'playId', 'nflId', 'displayName', 'pff_role', 'pff_positionLinedUp',
        'pff_pressure', 'pff_hit', 'pff_hurry', 'pff_sack']
play_level_coi = ['gameId','playId','passResult', 'down', 'yardsToGo']
metric_df = (player_level_df[player_level_coi].query(f"pff_role == 'Pass Rush'")
        .merge(play_level_df[play_level_coi], how='left', on=['gameId','playId'])
        .merge(rusher_pos_lined_up_dist_at_snap_df, how='left',on=['gameId','playId','nflId'])
        .merge(dl_num_blockers_df, how='left', on=['gameId','playId','nflId'])
        .merge(rusher_min_dist_df, how='left', on=['gameId','playId','nflId'])
        .merge(rusher_dist_at_action_df, how='left', on=['gameId','playId','nflId'])
        .merge(penalty_df, how='left', on=['gameId','playId','nflId'])
        .merge(snap_to_action_df, how='left', on=['gameId','playId'])
        .merge(rusher_pos_lined_up_dist_diff_df, how='left', on=['gameId','playId','nflId'])
        .drop(columns=['pff_role','multBlockers','passResult']))

## Clean up NAs
# A missing snap_to_action_time/distance at rel occurs when a certain play is missing either a snap or action event
# Investigate missing dl_alignment/distance at 1.5
metric_df = metric_df.dropna(subset=['dist_from_qb_at_action','snap_to_action_time','down','yardsToGo'], how='any')
# numBlockers and multBlockers is N/A when a player is unblocked
metric_df = metric_df.fillna(value={'numBlockers':0,'multBlockers':'U'})

## Filter plays
# Only look at plays that were either C, I, S, or IN (no scrambles, laterals, etc)
# metric_df = metric_df.query("passResult in ['C','I','S','IN','R']").drop(columns=["passResult"])
#  Only look at plays that have a snap to action time within the 95th percentile
metric_df = (metric_df[metric_df.snap_to_action_time < metric_df.snap_to_action_time.quantile(.95)])

metric_df = metric_df[['gameId', 'playId', 'down', 'yardsToGo', 'nflId', 'displayName', 'pff_positionLinedUp',
'pos_lined_up_dist_from_qb_at_snap', 'numBlockers', 'min_dist_from_qb', 'dist_from_qb_at_action', 'penalty',
'snap_to_action_time', 'pos_lined_up_diff_dist_from_qb', 'pff_hurry', 'pff_hit', 'pff_sack', 'pff_pressure']]

metric_df.loc[metric_df.pff_pressure == 0, 'pff_hurry_hit_sack'] = 1
metric_df.loc[metric_df.pff_hurry == 1, 'pff_hurry_hit_sack'] = 2
metric_df.loc[metric_df.pff_hit == 1, 'pff_hurry_hit_sack'] = 3
metric_df.loc[metric_df.pff_sack == 1, 'pff_hurry_hit_sack'] = 4

metric_df.head()

Unnamed: 0,gameId,playId,down,yardsToGo,nflId,displayName,pff_positionLinedUp,pos_lined_up_dist_from_qb_at_snap,numBlockers,min_dist_from_qb,dist_from_qb_at_action,penalty,snap_to_action_time,pos_lined_up_diff_dist_from_qb,pff_hurry,pff_hit,pff_sack,pff_pressure,pff_hurry_hit_sack
0,2021090900,97,3.0,2.0,41263,Demarcus Lawrence,LEO,7.802083,0.0,2.843835,3.238673,0.0,3.2,2.765722,1.0,0.0,0.0,1.0,2.0
1,2021090900,97,3.0,2.0,42403,Randy Gregory,ROLB,10.452009,1.0,3.153173,3.805391,0.0,3.2,5.341803,1.0,0.0,0.0,1.0,2.0
2,2021090900,97,3.0,2.0,44955,Carlos Watkins,DRT,5.783641,2.0,5.494879,10.437672,0.0,3.2,5.310975,0.0,0.0,0.0,0.0,1.0
4,2021090900,97,3.0,2.0,53504,Osa Odighizuwa,RE,6.521633,0.0,1.113059,2.496498,0.0,3.2,5.499835,1.0,0.0,0.0,1.0,2.0
5,2021090900,137,1.0,10.0,35441,Ndamukong Suh,DRT,6.466065,2.0,3.469308,3.635505,0.0,2.6,3.074737,0.0,0.0,0.0,0.0,1.0


In [67]:
# Check for NAs
if sum(metric_df.isna().sum().values) != 0:
    print(metric_df.isna().sum().loc[lambda x: x > 0])

In [68]:
# Scale the metric components
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

coi = ['down', 'yardsToGo', 'pos_lined_up_dist_from_qb_at_snap', 'numBlockers',
       'min_dist_from_qb', 'dist_from_qb_at_action', 'penalty', 'snap_to_action_time',
       'pos_lined_up_diff_dist_from_qb', 'pff_hurry_hit_sack']
metric_df[coi] = scaler.fit_transform(metric_df[coi])

reverse_rank_cols = ['min_dist_from_qb', 'dist_from_qb_at_action', 'penalty', 'snap_to_action_time', 'yardsToGo', 'down']
metric_df[reverse_rank_cols] = metric_df[reverse_rank_cols].apply(lambda x: 1-x)

metric_df.head()

Unnamed: 0,gameId,playId,down,yardsToGo,nflId,displayName,pff_positionLinedUp,pos_lined_up_dist_from_qb_at_snap,numBlockers,min_dist_from_qb,dist_from_qb_at_action,penalty,snap_to_action_time,pos_lined_up_diff_dist_from_qb,pff_hurry,pff_hit,pff_sack,pff_pressure,pff_hurry_hit_sack
0,2021090900,97,0.25,0.948718,41263,Demarcus Lawrence,LEO,0.675761,0.0,0.689913,0.918403,1.0,0.44898,0.284746,1.0,0.0,0.0,1.0,0.333333
1,2021090900,97,0.25,0.948718,42403,Randy Gregory,ROLB,0.963039,0.25,0.656184,0.903947,1.0,0.44898,0.364474,1.0,0.0,0.0,1.0,0.333333
2,2021090900,97,0.25,0.948718,44955,Carlos Watkins,DRT,0.456942,0.5,0.400848,0.734761,1.0,0.44898,0.36352,0.0,0.0,0.0,0.0,0.0
4,2021090900,97,0.25,0.948718,53504,Osa Odighizuwa,RE,0.536947,0.0,0.878634,0.937336,1.0,0.44898,0.369365,1.0,0.0,0.0,1.0,0.333333
5,2021090900,137,0.75,0.74359,35441,Ndamukong Suh,DRT,0.530923,0.5,0.621713,0.90828,1.0,0.571429,0.29431,0.0,0.0,0.0,0.0,0.0


In [70]:
# Create the DAPPS metric (include if the player got a penalty/penalty amt)
metric_df['Pressure Score'] = metric_df[['min_dist_from_qb','dist_from_qb_at_action','penalty','snap_to_action_time']].mean(axis=1)
metric_df['Difficulty Score'] = metric_df[['down','yardsToGo','pos_lined_up_dist_from_qb_at_snap','numBlockers','pos_lined_up_diff_dist_from_qb']].mean(axis=1)
metric_df['DAPPS'] = (metric_df['Pressure Score'] * .75) + (metric_df['Difficulty Score'] * .25)
metric_df.head()

Unnamed: 0,gameId,playId,down,yardsToGo,nflId,displayName,pff_positionLinedUp,pos_lined_up_dist_from_qb_at_snap,numBlockers,min_dist_from_qb,...,snap_to_action_time,pos_lined_up_diff_dist_from_qb,pff_hurry,pff_hit,pff_sack,pff_pressure,pff_hurry_hit_sack,Pressure Score,Difficulty Score,DAPPS
0,2021090900,97,0.25,0.948718,41263,Demarcus Lawrence,LEO,0.675761,0.0,0.689913,...,0.44898,0.284746,1.0,0.0,0.0,1.0,0.333333,0.764324,0.431845,0.681204
1,2021090900,97,0.25,0.948718,42403,Randy Gregory,ROLB,0.963039,0.25,0.656184,...,0.44898,0.364474,1.0,0.0,0.0,1.0,0.333333,0.752277,0.555246,0.70302
2,2021090900,97,0.25,0.948718,44955,Carlos Watkins,DRT,0.456942,0.5,0.400848,...,0.44898,0.36352,0.0,0.0,0.0,0.0,0.0,0.646147,0.503836,0.610569
4,2021090900,97,0.25,0.948718,53504,Osa Odighizuwa,RE,0.536947,0.0,0.878634,...,0.44898,0.369365,1.0,0.0,0.0,1.0,0.333333,0.816237,0.421006,0.71743
5,2021090900,137,0.75,0.74359,35441,Ndamukong Suh,DRT,0.530923,0.5,0.621713,...,0.571429,0.29431,0.0,0.0,0.0,0.0,0.0,0.775355,0.563765,0.722458


In [73]:
metric_df.sort_values(by=['DAPPS'], ascending=False).head(10)

Unnamed: 0,gameId,playId,down,yardsToGo,nflId,displayName,pff_positionLinedUp,pos_lined_up_dist_from_qb_at_snap,numBlockers,min_dist_from_qb,...,snap_to_action_time,pos_lined_up_diff_dist_from_qb,pff_hurry,pff_hit,pff_sack,pff_pressure,pff_hurry_hit_sack,Pressure Score,Difficulty Score,DAPPS
33148,2021103103,2868,1.0,1.0,44813,Myles Garrett,REO,0.730001,0.25,0.986122,...,0.653061,0.24841,0.0,1.0,0.0,1.0,0.666667,0.907387,0.645682,0.841961
12921,2021092610,2033,0.75,0.74359,43358,Yannick Ngakoue,REO,0.640398,0.25,0.948891,...,0.938776,0.206195,1.0,0.0,0.0,1.0,0.333333,0.934094,0.518037,0.830079
36150,2021110100,1351,0.75,0.74359,43332,Austin Johnson,DLT,0.459792,0.25,0.92825,...,0.877551,0.217808,0.0,1.0,0.0,1.0,0.666667,0.917421,0.484238,0.809126
30059,2021102405,1850,0.75,0.74359,42406,Frank Clark,REO,0.64789,0.25,0.955042,...,0.693878,0.231665,0.0,1.0,0.0,1.0,0.666667,0.902662,0.524629,0.808154
36149,2021110100,1351,0.75,0.74359,42349,Leonard Williams,DRT,0.453301,0.5,0.871724,...,0.877551,0.213358,0.0,0.0,0.0,0.0,0.0,0.899408,0.53205,0.807569
8225,2021091910,2074,0.75,0.871795,46081,Vita Vea,DRT,0.477765,0.5,0.873286,...,0.714286,0.221837,0.0,0.0,0.0,0.0,0.0,0.882445,0.564279,0.802904
3058,2021091208,1238,0.75,0.74359,46082,Daron Payne,NLT,0.425389,0.5,0.875859,...,0.755102,0.221221,0.0,0.0,0.0,0.0,0.0,0.894086,0.52804,0.802574
18581,2021100313,1854,0.75,0.74359,46081,Vita Vea,DRT,0.471409,0.5,0.923853,...,0.693878,0.25271,0.0,0.0,0.0,0.0,0.0,0.888516,0.543542,0.802273
22993,2021101012,3060,0.5,0.794872,42403,Randy Gregory,ROLB,0.635214,0.25,0.983644,...,0.673469,0.268663,0.0,1.0,0.0,1.0,0.666667,0.906419,0.48975,0.802252
21784,2021101008,1074,0.75,0.74359,43441,Matt Ioannidis,DLT,0.162122,0.25,0.933621,...,0.795918,0.254439,0.0,1.0,0.0,1.0,0.666667,0.925451,0.43203,0.802096


In [79]:
# Average DAPPS by player
high_snap_pass_rushers = player_level_df.query("officialPosition in ['NT','DT','DE'] and pff_role == 'Pass Rush'").nflId.value_counts().loc[lambda c: c > 35].index.tolist()
(metric_df[['nflId', 'displayName', 'DAPPS']]
.query(f"nflId in {high_snap_pass_rushers}")
.groupby(['nflId', 'displayName'])
.mean()
.sort_values(by="DAPPS", ascending=False)
.head(25))

Unnamed: 0_level_0,Unnamed: 1_level_0,DAPPS
nflId,displayName,Unnamed: 2_level_1
46199,Josh Sweat,0.703246
43358,Yannick Ngakoue,0.703064
46081,Vita Vea,0.702006
44813,Myles Garrett,0.701347
44826,Derek Barnett,0.701112
44915,Trey Hendrickson,0.700786
43354,Carl Nassib,0.700466
46249,Folorunso Fatukasi,0.700254
43378,Javon Hargrave,0.699993
42403,Randy Gregory,0.699218


In [76]:
# Average DAPPS by team
teams = frame_level_df[['gameId','playId','nflId','team']]
(metric_df[['gameId','playId', 'nflId', 'displayName', 'DAPPS']]
.query(f"nflId in {high_snap_pass_rushers}")
.merge(teams, on=['gameId','playId','nflId'], how='left')
.drop(columns=['gameId','playId','nflId','displayName'])
.groupby(['team'])
.mean()
.sort_values(by="DAPPS", ascending=False)
.head(10))

Unnamed: 0_level_0,DAPPS
team,Unnamed: 1_level_1
PHI,0.69109
LV,0.686755
NYJ,0.685277
WAS,0.684364
PIT,0.684199
CIN,0.683791
LA,0.683251
TB,0.683041
CHI,0.682802
CAR,0.682622


In [81]:
metric_df.query("displayName == 'Carl Nassib'").sort_values(by="DAPPS", ascending=False)

Unnamed: 0,gameId,playId,down,yardsToGo,nflId,displayName,pff_positionLinedUp,pos_lined_up_dist_from_qb_at_snap,numBlockers,min_dist_from_qb,...,snap_to_action_time,pos_lined_up_diff_dist_from_qb,pff_hurry,pff_hit,pff_sack,pff_pressure,pff_hurry_hit_sack,Pressure Score,Difficulty Score,DAPPS
4986,2021091300,4736,0.50,0.820513,43354,Carl Nassib,REO,0.705380,0.25,0.949171,...,6.122449e-01,0.232088,1.0,0.0,0.0,1.0,0.333333,0.880886,0.501596,0.786064
13091,2021092610,3990,0.75,0.743590,43354,Carl Nassib,LEO,0.663207,0.25,0.846787,...,7.142857e-01,0.245769,0.0,0.0,0.0,0.0,0.000000,0.871081,0.530513,0.785939
19017,2021100400,3257,0.50,0.743590,43354,Carl Nassib,LEO,0.669280,0.00,0.939940,...,6.938776e-01,0.262603,1.0,0.0,0.0,1.0,0.333333,0.901195,0.435095,0.784670
18940,2021100400,2088,0.50,0.974359,43354,Carl Nassib,LEO,0.633568,0.25,0.817903,...,6.938776e-01,0.238890,0.0,0.0,0.0,0.0,0.000000,0.859014,0.519363,0.774101
26957,2021101709,1733,0.75,0.666667,43354,Carl Nassib,LEO,0.739802,0.25,0.893421,...,5.714286e-01,0.256089,0.0,0.0,0.0,0.0,0.000000,0.851408,0.532512,0.771684
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4939,2021091300,4094,0.25,0.897436,43354,Carl Nassib,LEO,0.575919,0.00,0.501029,...,3.061224e-01,0.355995,0.0,0.0,0.0,0.0,0.000000,0.662475,0.415870,0.600824
30768,2021102407,2869,0.50,0.897436,43354,Carl Nassib,LEO,0.660403,0.50,0.415567,...,1.224490e-01,0.243008,0.0,0.0,0.0,0.0,0.000000,0.596063,0.560169,0.587089
30853,2021102407,3830,0.75,0.743590,43354,Carl Nassib,LEO,0.696253,0.00,0.618806,...,1.110223e-16,0.439041,0.0,0.0,0.0,0.0,0.000000,0.592648,0.525777,0.575930
22504,2021101010,3352,0.50,0.923077,43354,Carl Nassib,LEO,0.381183,0.00,0.528343,...,2.244898e-01,0.515867,0.0,0.0,0.0,0.0,0.000000,0.602353,0.464025,0.567771
