In [1]:
# Imports
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load the data
players_df = pd.read_csv('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/players.csv')
plays_df = pd.read_csv('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/full_play.csv')

In [3]:
# Rename the feature displayName to defender in the players df
players_df.rename(columns={'displayName':'defender'}, inplace=True)

In [4]:
# Count the number of plays for each defender
count_df = plays_df.groupby('defender')['playId'].count()

In [5]:
# Count the number of completions for each defender
completion_df = plays_df.groupby('defender')['passResult'].apply(lambda x: (x=='C').sum())

In [6]:
# Count the number of incompletions for each defender
incompletion_df = plays_df.groupby('defender')['passResult'].apply(lambda x: (x=='I').sum())

In [7]:
# Count the number of interceptions for each defender
interception_df = plays_df.groupby('defender')['passResult'].apply(lambda x: (x=='IN').sum())

In [8]:
# Sum the epa for each defender
epa_df = plays_df.groupby('defender')['epa'].sum()

In [9]:
# Merge the stats
merge_list = [count_df, completion_df, incompletion_df, interception_df, epa_df]
def_df = pd.concat(merge_list, axis=1)
def_df.columns = ['totalPassAttempts', 'completions', 'incompletions', 'interceptions', 'epa']
def_df['defender'] = def_df.index
def_df = def_df.reset_index(drop=True)
def_df = def_df[['defender','totalPassAttempts', 'completions', 'incompletions', 'interceptions', 'epa']]

In [10]:
# Drop all rows with fewer than 10 pass attempts
defender_df = def_df[(def_df['defender'] != 'None') & (def_df['totalPassAttempts'] > 10)]

In [11]:
# Calculate the completion percent
defender_df['completionPct'] = defender_df['completions'] / defender_df['totalPassAttempts']
defender_df.reset_index()

Unnamed: 0,index,defender,totalPassAttempts,completions,incompletions,interceptions,epa,completionPct
0,0,A.J. Bouye,67,43,23,1,6.205656,0.641791
1,1,A.J. Klein,29,17,11,1,-1.635733,0.586207
2,2,Aaron Colvin,30,24,6,0,11.995040,0.800000
3,3,Adam Jones,14,7,7,0,-5.497776,0.500000
4,4,Adarius Taylor,38,29,8,1,16.455525,0.763158
...,...,...,...,...,...,...,...,...
379,563,Xavier Woods,47,26,18,3,17.711797,0.553191
380,564,Za'Darius Smith,11,7,4,0,1.316462,0.636364
381,565,Zach Brown,44,32,12,0,3.490105,0.727273
382,566,Zach Cunningham,67,46,20,1,6.069930,0.686567


In [12]:
# Calculate the completion rank
defender_df['completionRank'] = defender_df['completionPct'].rank(ascending=1)
defender_df.reset_index()

Unnamed: 0,index,defender,totalPassAttempts,completions,incompletions,interceptions,epa,completionPct,completionRank
0,0,A.J. Bouye,67,43,23,1,6.205656,0.641791,186.5
1,1,A.J. Klein,29,17,11,1,-1.635733,0.586207,121.0
2,2,Aaron Colvin,30,24,6,0,11.995040,0.800000,347.0
3,3,Adam Jones,14,7,7,0,-5.497776,0.500000,42.0
4,4,Adarius Taylor,38,29,8,1,16.455525,0.763158,320.0
...,...,...,...,...,...,...,...,...,...
379,563,Xavier Woods,47,26,18,3,17.711797,0.553191,89.0
380,564,Za'Darius Smith,11,7,4,0,1.316462,0.636364,177.0
381,565,Zach Brown,44,32,12,0,3.490105,0.727273,286.0
382,566,Zach Cunningham,67,46,20,1,6.069930,0.686567,245.0


In [13]:
# Calculate the epa rank
defender_df['epaRank'] = defender_df['epa'].rank(ascending=1)
defender_df.reset_index()

Unnamed: 0,index,defender,totalPassAttempts,completions,incompletions,interceptions,epa,completionPct,completionRank,epaRank
0,0,A.J. Bouye,67,43,23,1,6.205656,0.641791,186.5,202.0
1,1,A.J. Klein,29,17,11,1,-1.635733,0.586207,121.0,91.0
2,2,Aaron Colvin,30,24,6,0,11.995040,0.800000,347.0,279.0
3,3,Adam Jones,14,7,7,0,-5.497776,0.500000,42.0,52.0
4,4,Adarius Taylor,38,29,8,1,16.455525,0.763158,320.0,318.0
...,...,...,...,...,...,...,...,...,...,...
379,563,Xavier Woods,47,26,18,3,17.711797,0.553191,89.0,335.0
380,564,Za'Darius Smith,11,7,4,0,1.316462,0.636364,177.0,124.0
381,565,Zach Brown,44,32,12,0,3.490105,0.727273,286.0,157.0
382,566,Zach Cunningham,67,46,20,1,6.069930,0.686567,245.0,198.0


In [14]:
# Calculate the interception rank
defender_df['intRank'] = defender_df['interceptions'].rank(ascending=0)
defender_df.reset_index()

Unnamed: 0,index,defender,totalPassAttempts,completions,incompletions,interceptions,epa,completionPct,completionRank,epaRank,intRank
0,0,A.J. Bouye,67,43,23,1,6.205656,0.641791,186.5,202.0,156.0
1,1,A.J. Klein,29,17,11,1,-1.635733,0.586207,121.0,91.0,156.0
2,2,Aaron Colvin,30,24,6,0,11.995040,0.800000,347.0,279.0,298.5
3,3,Adam Jones,14,7,7,0,-5.497776,0.500000,42.0,52.0,298.5
4,4,Adarius Taylor,38,29,8,1,16.455525,0.763158,320.0,318.0,156.0
...,...,...,...,...,...,...,...,...,...,...,...
379,563,Xavier Woods,47,26,18,3,17.711797,0.553191,89.0,335.0,34.5
380,564,Za'Darius Smith,11,7,4,0,1.316462,0.636364,177.0,124.0,298.5
381,565,Zach Brown,44,32,12,0,3.490105,0.727273,286.0,157.0,298.5
382,566,Zach Cunningham,67,46,20,1,6.069930,0.686567,245.0,198.0,156.0


In [15]:
# Calculate the combined rank
defender_df['combinedRank'] = defender_df['completionRank'] + defender_df['epaRank'] + defender_df['intRank']
defender_df.reset_index()

Unnamed: 0,index,defender,totalPassAttempts,completions,incompletions,interceptions,epa,completionPct,completionRank,epaRank,intRank,combinedRank
0,0,A.J. Bouye,67,43,23,1,6.205656,0.641791,186.5,202.0,156.0,544.5
1,1,A.J. Klein,29,17,11,1,-1.635733,0.586207,121.0,91.0,156.0,368.0
2,2,Aaron Colvin,30,24,6,0,11.995040,0.800000,347.0,279.0,298.5,924.5
3,3,Adam Jones,14,7,7,0,-5.497776,0.500000,42.0,52.0,298.5,392.5
4,4,Adarius Taylor,38,29,8,1,16.455525,0.763158,320.0,318.0,156.0,794.0
...,...,...,...,...,...,...,...,...,...,...,...,...
379,563,Xavier Woods,47,26,18,3,17.711797,0.553191,89.0,335.0,34.5,458.5
380,564,Za'Darius Smith,11,7,4,0,1.316462,0.636364,177.0,124.0,298.5,599.5
381,565,Zach Brown,44,32,12,0,3.490105,0.727273,286.0,157.0,298.5,741.5
382,566,Zach Cunningham,67,46,20,1,6.069930,0.686567,245.0,198.0,156.0,599.0


In [16]:
# Calculate the final rank based on the combined rank
defender_df['finalRank'] = defender_df['combinedRank'].rank(ascending=1)
defender_df.reset_index()
defender_df = defender_df.sort_values(by=['finalRank'])
defender_df.head(20)

Unnamed: 0,defender,totalPassAttempts,completions,incompletions,interceptions,epa,completionPct,completionRank,epaRank,intRank,combinedRank,finalRank
115,Damontae Kazee,39,14,18,7,-27.905742,0.358974,6.0,2.0,2.0,10.0,1.0
177,Eddie Jackson,57,24,27,6,-45.730553,0.421053,10.0,1.0,4.0,15.0,2.0
561,Xavien Howard,65,28,30,7,-17.084136,0.430769,11.0,7.0,2.0,20.0,3.0
313,Kevin Byard,46,19,22,5,-14.527635,0.413043,9.0,14.0,6.0,29.0,4.0
172,Duron Harmon,32,10,18,4,-12.39711,0.3125,3.0,20.0,14.5,37.5,5.0
454,Reshad Jones,39,19,16,4,-22.90865,0.487179,30.5,4.0,14.5,49.0,6.0
15,Andre Hal,13,4,6,3,-11.468255,0.307692,1.5,26.0,34.5,62.0,7.0
23,Anthony Harris,26,8,16,2,-17.638185,0.307692,1.5,6.0,73.5,81.0,8.0
374,Matt Milano,40,21,15,4,-14.578289,0.525,62.0,13.0,14.5,89.5,9.0
146,Denzel Ward,86,39,45,2,-16.70827,0.453488,17.0,8.0,73.5,98.5,10.0


In [17]:
# Determine the team for each player
team_df = plays_df[['def', 'defender']]
team_df = team_df.drop_duplicates()

In [18]:
# Determine the position of each player
position_df = players_df[['position', 'defender']]

In [19]:
# Merge the team and position dfs
temp_df = pd.merge(team_df, position_df, on = 'defender')

In [20]:
# Merge the team, position and defender stats
final_df = pd.merge(defender_df, temp_df, on = 'defender')

In [21]:
# Clean up the data - 2 WR in final data
final_df.drop(final_df[final_df['position'] == 'WR'].index, inplace = True) 

In [22]:
# Create the final df
final_df = final_df[['finalRank', 'defender', 'def', 'position', 'totalPassAttempts', 'completions', 'incompletions','interceptions', 'epa', 'completionPct', 'completionRank', 'epaRank', 'intRank', 'combinedRank']]
final_df.head(50)

Unnamed: 0,finalRank,defender,def,position,totalPassAttempts,completions,incompletions,interceptions,epa,completionPct,completionRank,epaRank,intRank,combinedRank
0,1.0,Damontae Kazee,ATL,SS,39,14,18,7,-27.905742,0.358974,6.0,2.0,2.0,10.0
1,2.0,Eddie Jackson,CHI,FS,57,24,27,6,-45.730553,0.421053,10.0,1.0,4.0,15.0
2,3.0,Xavien Howard,MIA,CB,65,28,30,7,-17.084136,0.430769,11.0,7.0,2.0,20.0
3,4.0,Kevin Byard,TEN,FS,46,19,22,5,-14.527635,0.413043,9.0,14.0,6.0,29.0
4,5.0,Duron Harmon,NE,FS,32,10,18,4,-12.39711,0.3125,3.0,20.0,14.5,37.5
5,6.0,Reshad Jones,MIA,SS,39,19,16,4,-22.90865,0.487179,30.5,4.0,14.5,49.0
6,7.0,Andre Hal,HOU,FS,13,4,6,3,-11.468255,0.307692,1.5,26.0,34.5,62.0
7,8.0,Anthony Harris,MIN,FS,26,8,16,2,-17.638185,0.307692,1.5,6.0,73.5,81.0
8,9.0,Matt Milano,BUF,OLB,40,21,15,4,-14.578289,0.525,62.0,13.0,14.5,89.5
9,10.0,Denzel Ward,CLE,CB,86,39,45,2,-16.70827,0.453488,17.0,8.0,73.5,98.5


In [23]:
final_df.to_csv('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/player_rankings.csv')

In [24]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/player_rankings.xlsx', index=True)