In [1]:
# Imports
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load the distance and player ranking data
dist_df = pd.read_csv('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/distance_2.csv')
rankings_df = pd.read_csv('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/player_rankings.csv')

In [3]:
# Clean up the columns
dist_df = dist_df.drop(['Unnamed: 0', 'Unnamed: 0.1'], axis=1)

In [4]:
# The number of plays for each player
count_df = dist_df.groupby('dPlayer')['playId'].count()

In [5]:
# The average defender distance from receiver when ball is received
end_dist_df = dist_df.groupby('dPlayer')['endDist'].mean()

In [6]:
# The average coverage distance for each defensive player while ball is in the air
cover_dist_df = dist_df.groupby('dPlayer')['covDist'].mean()

In [7]:
# The average closing speed for each defensive player while the ball is in the air
close_speed_df = dist_df.groupby('dPlayer')['closeSpeed'].mean()

In [8]:
# The merged df's for initial stats
merge_list = [count_df, end_dist_df, cover_dist_df, close_speed_df]
merged_df = pd.concat(merge_list, axis=1)
merged_df.columns = ['plays', 'endAvg', 'coverAvg', 'speedAvg']
merged_df.reset_index()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg
0,A.J. Bouye,268,3.324293,6.889101,0.452775
1,A.J. Klein,161,5.042501,10.662449,0.776639
2,Aaron Colvin,151,3.724591,7.746312,0.539261
3,Aaron Lynch,4,4.267399,8.341690,-0.239456
4,Adam Jones,39,3.549253,7.735683,0.880037
...,...,...,...,...,...
591,Zach Brown,159,4.483821,9.426334,0.660801
592,Zach Cunningham,213,4.738415,9.968443,0.649819
593,Zach Vigil,9,3.423993,7.576432,0.998169
594,Zaire Franklin,57,6.214266,12.438763,0.092613


In [9]:
# Eliminate defensive players with less than 10 plays
coverage_df = merged_df[merged_df['plays'] > 9]
coverage_df.reset_index()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg
0,A.J. Bouye,268,3.324293,6.889101,0.452775
1,A.J. Klein,161,5.042501,10.662449,0.776639
2,Aaron Colvin,151,3.724591,7.746312,0.539261
3,Adam Jones,39,3.549253,7.735683,0.880037
4,Adarius Taylor,157,5.228102,10.076555,-0.277348
...,...,...,...,...,...
498,Xavier Woods,122,5.640671,13.279344,2.564675
499,Za'Darius Smith,37,6.847204,12.833429,-1.063739
500,Zach Brown,159,4.483821,9.426334,0.660801
501,Zach Cunningham,213,4.738415,9.968443,0.649819


In [10]:
# The end distance rank
coverage_df['endRank'] = coverage_df['endAvg'].rank(ascending=1)
coverage_df.reset_index()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank
0,A.J. Bouye,268,3.324293,6.889101,0.452775,83.0
1,A.J. Klein,161,5.042501,10.662449,0.776639,365.0
2,Aaron Colvin,151,3.724591,7.746312,0.539261,148.0
3,Adam Jones,39,3.549253,7.735683,0.880037,116.0
4,Adarius Taylor,157,5.228102,10.076555,-0.277348,391.0
...,...,...,...,...,...,...
498,Xavier Woods,122,5.640671,13.279344,2.564675,431.0
499,Za'Darius Smith,37,6.847204,12.833429,-1.063739,491.0
500,Zach Brown,159,4.483821,9.426334,0.660801,283.0
501,Zach Cunningham,213,4.738415,9.968443,0.649819,319.0


In [11]:
# The coverage rank
coverage_df['coverageRank'] = coverage_df['coverAvg'].rank(ascending=1)
coverage_df.reset_index()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank,coverageRank
0,A.J. Bouye,268,3.324293,6.889101,0.452775,83.0,69.0
1,A.J. Klein,161,5.042501,10.662449,0.776639,365.0,377.0
2,Aaron Colvin,151,3.724591,7.746312,0.539261,148.0,138.0
3,Adam Jones,39,3.549253,7.735683,0.880037,116.0,136.0
4,Adarius Taylor,157,5.228102,10.076555,-0.277348,391.0,333.0
...,...,...,...,...,...,...,...
498,Xavier Woods,122,5.640671,13.279344,2.564675,431.0,480.0
499,Za'Darius Smith,37,6.847204,12.833429,-1.063739,491.0,470.0
500,Zach Brown,159,4.483821,9.426334,0.660801,283.0,271.0
501,Zach Cunningham,213,4.738415,9.968443,0.649819,319.0,322.0


In [12]:
# The closing speed rank
coverage_df['speedRank'] = coverage_df['speedAvg'].rank(ascending=0)
coverage_df.reset_index()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank,coverageRank,speedRank
0,A.J. Bouye,268,3.324293,6.889101,0.452775,83.0,69.0,260.0
1,A.J. Klein,161,5.042501,10.662449,0.776639,365.0,377.0,169.0
2,Aaron Colvin,151,3.724591,7.746312,0.539261,148.0,138.0,236.0
3,Adam Jones,39,3.549253,7.735683,0.880037,116.0,136.0,151.0
4,Adarius Taylor,157,5.228102,10.076555,-0.277348,391.0,333.0,449.0
...,...,...,...,...,...,...,...,...
498,Xavier Woods,122,5.640671,13.279344,2.564675,431.0,480.0,12.0
499,Za'Darius Smith,37,6.847204,12.833429,-1.063739,491.0,470.0,483.0
500,Zach Brown,159,4.483821,9.426334,0.660801,283.0,271.0,200.0
501,Zach Cunningham,213,4.738415,9.968443,0.649819,319.0,322.0,203.0


In [13]:
coverage_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/coverage_speed.xlsx', index=True)

In [14]:
# Merge the coverage and rankings dataframes
rankings_df.rename(columns = {'defender':'dPlayer'}, inplace = True) 
cr_df = pd.merge(coverage_df, rankings_df, on = 'dPlayer', how = 'inner')
cr_df.head()

Unnamed: 0.1,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank,coverageRank,speedRank,Unnamed: 0,finalRank,...,totalPassAttempts,completions,incompletions,interceptions,epa,completionPct,completionRank,epaRank,intRank,combinedRank
0,A.J. Bouye,268,3.324293,6.889101,0.452775,83.0,69.0,260.0,176,168.0,...,67,43,23,1,6.205656,0.641791,186.5,202.0,156.0,544.5
1,A.J. Klein,161,5.042501,10.662449,0.776639,365.0,377.0,169.0,90,84.0,...,29,17,11,1,-1.635733,0.586207,121.0,91.0,156.0,368.0
2,Aaron Colvin,151,3.724591,7.746312,0.539261,148.0,138.0,236.0,380,365.5,...,30,24,6,0,11.99504,0.8,347.0,279.0,298.5,924.5
3,Adam Jones,39,3.549253,7.735683,0.880037,116.0,136.0,151.0,95,89.0,...,14,7,7,0,-5.497776,0.5,42.0,52.0,298.5,392.5
4,Adarius Taylor,157,5.228102,10.076555,-0.277348,391.0,333.0,449.0,308,299.0,...,38,29,8,1,16.455525,0.763158,320.0,318.0,156.0,794.0


In [15]:
# Use select columns
final_df = cr_df[['dPlayer', 'plays', 'endAvg', 'coverAvg', 'speedAvg', 
                  'endRank', 'coverageRank', 'speedRank', 'finalRank']]
final_df.head()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank,coverageRank,speedRank,finalRank
0,A.J. Bouye,268,3.324293,6.889101,0.452775,83.0,69.0,260.0,168.0
1,A.J. Klein,161,5.042501,10.662449,0.776639,365.0,377.0,169.0,84.0
2,Aaron Colvin,151,3.724591,7.746312,0.539261,148.0,138.0,236.0,365.5
3,Adam Jones,39,3.549253,7.735683,0.880037,116.0,136.0,151.0,89.0
4,Adarius Taylor,157,5.228102,10.076555,-0.277348,391.0,333.0,449.0,299.0


In [19]:
# Sort by coverage rank
pd.set_option('display.max_rows', None)
final_df = final_df.sort_values(by=['finalRank'])
final_df.reset_index()

Unnamed: 0,index,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank,coverageRank,speedRank,finalRank
0,82,Damontae Kazee,75,5.524434,13.82115,2.581982,419.0,490.0,11.0,1.0
1,120,Eddie Jackson,129,3.920834,9.657901,2.262418,186.0,291.0,21.0,2.0
2,390,Xavien Howard,280,3.667533,7.609058,0.407507,136.0,131.0,279.0,3.0
3,222,Kevin Byard,162,3.318783,7.811291,1.356425,82.0,144.0,67.0,4.0
4,117,Duron Harmon,65,4.284789,11.277708,2.64613,250.0,413.0,8.0,5.0
5,317,Reshad Jones,114,5.833752,13.066791,1.771667,447.0,477.0,37.0,6.0
6,13,Andre Hal,36,4.852551,11.312255,1.707425,336.0,415.0,40.0,7.0
7,17,Anthony Harris,64,4.141039,9.934886,1.682483,229.0,318.0,41.0,8.0
8,269,Matt Milano,185,5.293136,10.918061,0.533454,400.0,391.0,239.0,9.0
9,101,Denzel Ward,322,3.855075,7.798657,0.248656,178.0,142.0,350.0,10.0
