In [1]:
# Imports
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load the distance and player ranking data
dist_df = pd.read_csv('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/distance_2.csv')
rankings_df = pd.read_csv('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/player_rankings.csv')

In [3]:
# Clean up the columns
dist_df = dist_df.drop(['Unnamed: 0', 'Unnamed: 0.1'], axis=1)

In [4]:
# The number of plays for each player
count_df = dist_df.groupby('dPlayer')['playId'].count()

In [5]:
# The average defender distance from receiver when ball is received
end_dist_df = dist_df.groupby('dPlayer')['endDist'].mean()

In [6]:
# The average coverage distance for each defensive player while ball is in the air
cover_dist_df = dist_df.groupby('dPlayer')['covDist'].mean()

In [7]:
# The average closing speed for each defensive player while the ball is in the air
close_speed_df = dist_df.groupby('dPlayer')['closeSpeed'].mean()

In [8]:
# The merged df's for initial stats
merge_list = [count_df, end_dist_df, cover_dist_df, close_speed_df]
merged_df = pd.concat(merge_list, axis=1)
merged_df.columns = ['plays', 'endAvg', 'coverAvg', 'speedAvg']
merged_df.reset_index()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg
0,A.J. Bouye,268,3.324293,6.889101,0.452775
1,A.J. Klein,161,5.042501,10.662449,0.776639
2,Aaron Colvin,151,3.724591,7.746312,0.539261
3,Aaron Lynch,4,4.267399,8.341690,-0.239456
4,Adam Jones,39,3.549253,7.735683,0.880037
...,...,...,...,...,...
591,Zach Brown,159,4.483821,9.426334,0.660801
592,Zach Cunningham,213,4.738415,9.968443,0.649819
593,Zach Vigil,9,3.423993,7.576432,0.998169
594,Zaire Franklin,57,6.214266,12.438763,0.092613


In [9]:
# Eliminate defensive players with less than 10 plays
coverage_df = merged_df[merged_df['plays'] > 9]
coverage_df.reset_index()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg
0,A.J. Bouye,268,3.324293,6.889101,0.452775
1,A.J. Klein,161,5.042501,10.662449,0.776639
2,Aaron Colvin,151,3.724591,7.746312,0.539261
3,Adam Jones,39,3.549253,7.735683,0.880037
4,Adarius Taylor,157,5.228102,10.076555,-0.277348
...,...,...,...,...,...
498,Xavier Woods,122,5.640671,13.279344,2.564675
499,Za'Darius Smith,37,6.847204,12.833429,-1.063739
500,Zach Brown,159,4.483821,9.426334,0.660801
501,Zach Cunningham,213,4.738415,9.968443,0.649819


In [10]:
# The end distance rank
coverage_df['endRank'] = coverage_df['endAvg'].rank(ascending=1)
coverage_df.reset_index()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank
0,A.J. Bouye,268,3.324293,6.889101,0.452775,83.0
1,A.J. Klein,161,5.042501,10.662449,0.776639,365.0
2,Aaron Colvin,151,3.724591,7.746312,0.539261,148.0
3,Adam Jones,39,3.549253,7.735683,0.880037,116.0
4,Adarius Taylor,157,5.228102,10.076555,-0.277348,391.0
...,...,...,...,...,...,...
498,Xavier Woods,122,5.640671,13.279344,2.564675,431.0
499,Za'Darius Smith,37,6.847204,12.833429,-1.063739,491.0
500,Zach Brown,159,4.483821,9.426334,0.660801,283.0
501,Zach Cunningham,213,4.738415,9.968443,0.649819,319.0


In [11]:
# The coverage rank
coverage_df['coverageRank'] = coverage_df['coverAvg'].rank(ascending=1)
coverage_df.reset_index()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank,coverageRank
0,A.J. Bouye,268,3.324293,6.889101,0.452775,83.0,69.0
1,A.J. Klein,161,5.042501,10.662449,0.776639,365.0,377.0
2,Aaron Colvin,151,3.724591,7.746312,0.539261,148.0,138.0
3,Adam Jones,39,3.549253,7.735683,0.880037,116.0,136.0
4,Adarius Taylor,157,5.228102,10.076555,-0.277348,391.0,333.0
...,...,...,...,...,...,...,...
498,Xavier Woods,122,5.640671,13.279344,2.564675,431.0,480.0
499,Za'Darius Smith,37,6.847204,12.833429,-1.063739,491.0,470.0
500,Zach Brown,159,4.483821,9.426334,0.660801,283.0,271.0
501,Zach Cunningham,213,4.738415,9.968443,0.649819,319.0,322.0


In [12]:
# The closing speed rank
coverage_df['speedRank'] = coverage_df['speedAvg'].rank(ascending=0)
coverage_df.reset_index()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank,coverageRank,speedRank
0,A.J. Bouye,268,3.324293,6.889101,0.452775,83.0,69.0,260.0
1,A.J. Klein,161,5.042501,10.662449,0.776639,365.0,377.0,169.0
2,Aaron Colvin,151,3.724591,7.746312,0.539261,148.0,138.0,236.0
3,Adam Jones,39,3.549253,7.735683,0.880037,116.0,136.0,151.0
4,Adarius Taylor,157,5.228102,10.076555,-0.277348,391.0,333.0,449.0
...,...,...,...,...,...,...,...,...
498,Xavier Woods,122,5.640671,13.279344,2.564675,431.0,480.0,12.0
499,Za'Darius Smith,37,6.847204,12.833429,-1.063739,491.0,470.0,483.0
500,Zach Brown,159,4.483821,9.426334,0.660801,283.0,271.0,200.0
501,Zach Cunningham,213,4.738415,9.968443,0.649819,319.0,322.0,203.0


In [13]:
# Merge the coverage and rankings dataframes
rankings_df.rename(columns = {'Defender':'dPlayer'}, inplace = True) 
cr_df = pd.merge(coverage_df, rankings_df, on = 'dPlayer', how = 'inner')
cr_df.head()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank,coverageRank,speedRank,Rank,Team,...,Attps,Compl,Incomp,Ints,epa,ComplPct,CompRank,epaRank,intRank,TotRank
0,A.J. Bouye,268,3.324293,6.889101,0.452775,83.0,69.0,260.0,168.0,JAX,...,67,43,23,1,6.21,64.18%,186.5,202,156.0,544.5
1,A.J. Klein,161,5.042501,10.662449,0.776639,365.0,377.0,169.0,84.0,NO,...,29,17,11,1,-1.64,58.62%,121.0,91,156.0,368.0
2,Aaron Colvin,151,3.724591,7.746312,0.539261,148.0,138.0,236.0,365.5,HOU,...,30,24,6,0,12.0,80.00%,347.0,279,298.5,924.5
3,Adam Jones,39,3.549253,7.735683,0.880037,116.0,136.0,151.0,89.0,DEN,...,14,7,7,0,-5.5,50.00%,42.0,52,298.5,392.5
4,Adarius Taylor,157,5.228102,10.076555,-0.277348,391.0,333.0,449.0,299.0,TB,...,38,29,8,1,16.46,76.32%,320.0,318,156.0,794.0


In [14]:
# Use select columns
final_df = cr_df[['dPlayer', 'plays', 'endAvg', 'coverAvg', 'speedAvg', 
                  'endRank', 'coverageRank', 'speedRank', 'Rank']]
final_df.head()

Unnamed: 0,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank,coverageRank,speedRank,Rank
0,A.J. Bouye,268,3.324293,6.889101,0.452775,83.0,69.0,260.0,168.0
1,A.J. Klein,161,5.042501,10.662449,0.776639,365.0,377.0,169.0,84.0
2,Aaron Colvin,151,3.724591,7.746312,0.539261,148.0,138.0,236.0,365.5
3,Adam Jones,39,3.549253,7.735683,0.880037,116.0,136.0,151.0,89.0
4,Adarius Taylor,157,5.228102,10.076555,-0.277348,391.0,333.0,449.0,299.0


In [15]:
# Sort by coverage rank
pd.set_option('display.max_rows', None)
final_df = final_df.sort_values(by=['coverageRank'])
final_df.reset_index()

Unnamed: 0,index,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank,coverageRank,speedRank,Rank
0,96,DeShawn Shead,111,2.351011,5.068357,0.244734,3.0,3.0,352.0,378.0
1,150,Isaac Yiadom,110,2.422259,5.117086,0.370803,5.0,5.0,300.0,101.0
2,307,Quinton Dunbar,141,2.512329,5.4693,0.61482,6.0,7.0,210.0,196.0
3,54,Byron Jones,412,2.856254,5.760222,0.135049,15.0,13.0,391.0,244.0
4,313,Rashard Robinson,31,2.884257,5.833097,0.143793,22.0,14.0,385.0,379.0
5,136,Gareon Conley,247,2.881106,5.852554,0.27689,20.0,15.0,337.0,86.0
6,391,Xavier Rhodes,236,2.882925,5.875651,0.316786,21.0,16.0,319.0,267.0
7,157,Jaire Alexander,261,2.835235,5.893845,0.55587,13.0,17.0,233.0,159.5
8,343,Steven Nelson,402,2.92601,5.92442,0.267885,27.0,19.0,338.0,58.0
9,153,Isaiah Oliver,122,2.934864,5.934268,0.10439,28.0,20.0,396.0,259.0


In [16]:
# Sort by closing speed rank
pd.set_option('display.max_rows', None)
final_df = final_df.sort_values(by=['speedRank'])
final_df.reset_index()

Unnamed: 0,index,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank,coverageRank,speedRank,Rank
0,7,Adrian Colbert,27,5.270796,13.297422,3.199529,397.0,481.0,3.0,165.0
1,354,Tedric Thompson,65,5.171527,13.490834,3.139264,383.0,484.0,4.0,185.0
2,156,Jahleel Addae,99,5.52455,13.49054,2.783882,420.0,483.0,7.0,145.0
3,117,Duron Harmon,65,4.284789,11.277708,2.64613,250.0,413.0,8.0,5.0
4,332,Sean Davis,109,4.931605,12.196296,2.613697,352.0,450.0,9.0,96.0
5,279,Mike Mitchell,48,6.740603,15.676507,2.606362,486.0,499.0,10.0,105.0
6,82,Damontae Kazee,75,5.524434,13.82115,2.581982,419.0,490.0,11.0,1.0
7,392,Xavier Woods,122,5.640671,13.279344,2.564675,431.0,480.0,12.0,122.0
8,6,Adrian Amos,173,4.151132,10.410373,2.548384,231.0,362.0,13.0,45.0
9,373,Tre Sullivan,46,5.989177,14.38114,2.499047,457.0,494.0,14.0,95.0


In [17]:
final_df.to_excel('/Users/JKMacBook/Documents/Lambda/Kaggle/NFL/nfl-big-data-bowl-2021/final/coverage_speed.xlsx', index=True)

In [18]:
# Sort by coverage rank
pd.set_option('display.max_rows', None)
final_df = final_df.sort_values(by=['endRank'])
final_df.reset_index()

Unnamed: 0,index,dPlayer,plays,endAvg,coverAvg,speedAvg,endRank,coverageRank,speedRank,Rank
0,96,DeShawn Shead,111,2.351011,5.068357,0.244734,3.0,3.0,352.0,378.0
1,150,Isaac Yiadom,110,2.422259,5.117086,0.370803,5.0,5.0,300.0,101.0
2,307,Quinton Dunbar,141,2.512329,5.4693,0.61482,6.0,7.0,210.0,196.0
3,282,Morgan Burnett,117,2.761016,6.30566,0.938334,11.0,45.0,131.0,110.0
4,276,Mike Ford,122,2.788198,6.024741,0.536923,12.0,25.0,237.0,370.0
5,157,Jaire Alexander,261,2.835235,5.893845,0.55587,13.0,17.0,233.0,159.5
6,54,Byron Jones,412,2.856254,5.760222,0.135049,15.0,13.0,391.0,244.0
7,310,Ramik Wilson,36,2.862431,6.149498,0.673522,17.0,32.0,198.0,268.5
8,60,Charvarius Ward,49,2.867529,6.292294,0.820086,18.0,43.0,164.0,347.0
9,42,Bradley Roby,329,2.872651,6.246901,0.780453,19.0,38.0,168.0,219.0
