# NFL Draft Combine from 2010-2023

By Dominic Graziano

### Merging Data Streams and Cleaning

In [324]:
import pandas as pd
import os
from os import listdir
import glob


In [325]:
FileNames = glob.glob('data/Combine_Results_*.csv')
print(FileNames)

['data/Combine_Results_2013.csv', 'data/Combine_Results_2012.csv', 'data/Combine_Results_2010.csv', 'data/Combine_Results_2011.csv', 'data/Combine_Results_2015.csv', 'data/Combine_Results_2014.csv', 'data/Combine_Results_2016.csv', 'data/Combine_Results_2017.csv', 'data/Combine_Results_2019.csv', 'data/Combine_Results_2018.csv', 'data/Combine_Results_2020.csv', 'data/Combine_Results_2021.csv', 'data/Combine_Results_2023.csv', 'data/Combine_Results_2022.csv']


In [326]:
combine_df = pd.concat(pd.read_csv(f) for f in FileNames)
combine_df.drop(['College','Player-additional'], axis = 1, inplace = True)
combine_df.head()

Unnamed: 0,Player,Pos,School,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Drafted (tm-rnd-yr)
0,Oday Aboushi,OT,Virginia,6-5,308.0,5.41,23.5,17.0,100.0,7.92,4.84,New York Jets / 5th / 141st pick / 2013
1,Johnny Adams,CB,Michigan State,5-10,185.0,4.48,,16.0,,,,
2,Robert Alford,CB,SE Louisiana,5-10,188.0,4.39,40.0,17.0,132.0,6.89,4.23,Atlanta Falcons / 2nd / 60th pick / 2013
3,Keenan Allen,WR,California,6-2,206.0,4.58,,,,,,San Diego Chargers / 3rd / 76th pick / 2013
4,Ryan Allen,P,Louisiana Tech,6-1,229.0,4.98,,,,,,


In [327]:
combine_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4741 entries, 0 to 323
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Player               4741 non-null   object 
 1   Pos                  4741 non-null   object 
 2   School               4741 non-null   object 
 3   Ht                   4712 non-null   object 
 4   Wt                   4717 non-null   float64
 5   40yd                 4194 non-null   float64
 6   Vertical             3731 non-null   float64
 7   Bench                3166 non-null   float64
 8   Broad Jump           3671 non-null   float64
 9   3Cone                2794 non-null   float64
 10  Shuttle              2909 non-null   float64
 11  Drafted (tm-rnd-yr)  2819 non-null   object 
dtypes: float64(7), object(5)
memory usage: 481.5+ KB


In [328]:
print('Missing Values')
for col in combine_df:
    noVal = sum(combine_df[col].isna())
    print('{}: {}'.format(col,noVal))

Missing Values
Player: 0
Pos: 0
School: 0
Ht: 29
Wt: 24
40yd: 547
Vertical: 1010
Bench: 1575
Broad Jump: 1070
3Cone: 1947
Shuttle: 1832
Drafted (tm-rnd-yr): 1922


#### Part 1: Changing the Ht column to the format I want

I want to the Ht column to be changed to inches rather than the string, so I split the column, change data types, run a calculation, then merge the foot and inch column into one column

In [329]:
combine_df['Ht'] = combine_df['Ht'].astype('string')
combine_df[['Ft_conversion', 'in']] = combine_df.Ht.str.split('-', expand = True)
combine_df = combine_df.drop('Ht', axis = 1)

In [330]:
combine_df['Ft_conversion'] = combine_df['Ft_conversion'].astype(float)
combine_df['in'] = combine_df['in'].astype(float)

combine_df['Ft_conversion'] = combine_df['Ft_conversion'] * 12

In [331]:
combine_df['Height_in_inches'] = combine_df['Ft_conversion'] + combine_df['in']

combine_df = combine_df.drop(['Ft_conversion','in'], axis = 1)

#### Part 2: Splitting Drafted(tm-rnd-yr) into 4 columns

Splitting into the 4 columns and then deleting the original Drafted(tm-rnd-yr)

In [332]:
combine_df['Drafted (tm-rnd-yr)'] = combine_df['Drafted (tm-rnd-yr)'].astype('string')
combine_df[['Drafted_Team','Round_Num', 'Pick_Num', 'Year']] = combine_df['Drafted (tm-rnd-yr)'].str.split('/', expand = True)

In [333]:
combine_df = combine_df.drop('Drafted (tm-rnd-yr)', axis = 1)

I want to get the round number and pick number without the text, as well as changing the data type. Additionally changing the null values in the 4 columns 

In [334]:
combine_df['Drafted_Team'] = combine_df['Drafted_Team'].fillna('Undrafted')

combine_df['Round_Num'] = combine_df['Round_Num'].str[:2]
combine_df['Round_Num'] = combine_df['Round_Num'].fillna('0')
combine_df['Round_Num'] = combine_df['Round_Num'].astype(int)

combine_df['Pick_Num'] = combine_df['Pick_Num'].str.replace('\D+','')
combine_df['Pick_Num'] = combine_df['Pick_Num'].fillna('0')
combine_df['Pick_Num'] = combine_df['Pick_Num'].astype(int)

combine_df['Year'] = combine_df['Year'].fillna('Undrafted')

combine_df.head()

  combine_df['Pick_Num'] = combine_df['Pick_Num'].str.replace('\D+','')


Unnamed: 0,Player,Pos,School,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Height_in_inches,Drafted_Team,Round_Num,Pick_Num,Year
0,Oday Aboushi,OT,Virginia,308.0,5.41,23.5,17.0,100.0,7.92,4.84,77.0,New York Jets,5,141,2013
1,Johnny Adams,CB,Michigan State,185.0,4.48,,16.0,,,,70.0,Undrafted,0,0,Undrafted
2,Robert Alford,CB,SE Louisiana,188.0,4.39,40.0,17.0,132.0,6.89,4.23,70.0,Atlanta Falcons,2,60,2013
3,Keenan Allen,WR,California,206.0,4.58,,,,,,74.0,San Diego Chargers,3,76,2013
4,Ryan Allen,P,Louisiana Tech,229.0,4.98,,,,,,73.0,Undrafted,0,0,Undrafted


#### Part 3: Fixing null values for the rest of the columns

In [335]:
combine_df['Wt'] = combine_df['Wt'].fillna(0)
combine_df['40yd'] = combine_df['40yd'].fillna(0)
combine_df['Vertical'] = combine_df['Vertical'].fillna(0)
combine_df['Bench'] = combine_df['Bench'].fillna(0)
combine_df['Broad Jump'] = combine_df['Broad Jump'].fillna(0)
combine_df['3Cone'] = combine_df['3Cone'].fillna(0)
combine_df['Shuttle'] = combine_df['Shuttle'].fillna(0)
combine_df['Height_in_inches'] =  combine_df['Height_in_inches'].fillna(0)

In [336]:
combine_df.head()

Unnamed: 0,Player,Pos,School,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Height_in_inches,Drafted_Team,Round_Num,Pick_Num,Year
0,Oday Aboushi,OT,Virginia,308.0,5.41,23.5,17.0,100.0,7.92,4.84,77.0,New York Jets,5,141,2013
1,Johnny Adams,CB,Michigan State,185.0,4.48,0.0,16.0,0.0,0.0,0.0,70.0,Undrafted,0,0,Undrafted
2,Robert Alford,CB,SE Louisiana,188.0,4.39,40.0,17.0,132.0,6.89,4.23,70.0,Atlanta Falcons,2,60,2013
3,Keenan Allen,WR,California,206.0,4.58,0.0,0.0,0.0,0.0,0.0,74.0,San Diego Chargers,3,76,2013
4,Ryan Allen,P,Louisiana Tech,229.0,4.98,0.0,0.0,0.0,0.0,0.0,73.0,Undrafted,0,0,Undrafted


In [337]:
print('Missing Values')
for col in combine_df:
    noVal = sum(combine_df[col].isna())
    print('{}: {}'.format(col,noVal))

Missing Values
Player: 0
Pos: 0
School: 0
Wt: 0
40yd: 0
Vertical: 0
Bench: 0
Broad Jump: 0
3Cone: 0
Shuttle: 0
Height_in_inches: 0
Drafted_Team: 0
Round_Num: 0
Pick_Num: 0
Year: 0


merge_df = []
for f_name in FileNames:
    df = pd.read_csv(f_name, usecols = ['Player','Pos','School','Ht','Wt','40yd','Vertical','Bench','Broad Jump','3Cone','Shuttle','Drafted (tm-rnd-yr)'])
    a = os.path.dirname(f_name)
    df['filename'] = os.path.basename(a)
merge_df.append(df)
print(merge_df)

df  =pd.concat(merge_df)
df.head()

In [340]:
combine_df[combine_df['40yd']!= 0].sort_values('40yd', ascending=True)

Unnamed: 0,Player,Pos,School,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Height_in_inches,Drafted_Team,Round_Num,Pick_Num,Year
251,John Ross,WR,Washington,188.0,4.22,37.0,0.0,133.0,0.00,0.00,71.0,Cincinnati Bengals,1,9,2017
13,Kalon Barnes,CB,Baylor,183.0,4.23,0.0,0.0,0.0,0.00,0.00,72.0,Carolina Panthers,7,242,2022
316,Tariq Woolen,CB,Texas-San Antonio,205.0,4.26,42.0,0.0,0.0,0.00,0.00,76.0,Seattle Seahawks,5,153,2022
6,Dri Archer,WR,Kent State,173.0,4.26,38.0,20.0,122.0,6.86,4.06,68.0,Pittsburgh Steelers,3,97,2014
287,DJ Turner,CB,Michigan,178.0,4.26,38.5,0.0,131.0,0.00,0.00,71.0,Undrafted,0,0,Undrafted
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218,Josh Oglesby,OT,Virginia Tech,338.0,5.75,28.5,22.0,0.0,0.00,0.00,79.0,Undrafted,0,0,Undrafted
234,Dace Richardson,OG,Iowa,320.0,5.76,23.5,19.0,87.0,8.58,5.38,77.0,Undrafted,0,0,Undrafted
190,Damien Mama,OG,USC,334.0,5.84,24.5,0.0,96.0,8.51,5.38,75.0,Undrafted,0,0,Undrafted
34,Orlando Brown,OT,Oklahoma,345.0,5.85,19.5,14.0,82.0,7.87,5.38,80.0,Baltimore Ravens,3,83,2018


In [338]:
df_speed = combine_df.sort_values('40yd', ascending= True)
df_speed.head(20)

Unnamed: 0,Player,Pos,School,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Height_in_inches,Drafted_Team,Round_Num,Pick_Num,Year
165,Anfernee Jennings,LB,Alabama,256.0,0.0,0.0,0.0,0.0,0.0,0.0,74.0,New England Patriots,3,87,2020
128,Terez Hall,LB,Missouri,230.0,0.0,0.0,20.0,0.0,0.0,0.0,73.0,Undrafted,0,0,Undrafted
127,Nate Hall,LB,Northwestern,225.0,0.0,0.0,0.0,0.0,0.0,0.0,74.0,Undrafted,0,0,Undrafted
123,Dre Greenlaw,LB,Arkansas,237.0,0.0,33.0,24.0,117.0,0.0,0.0,71.0,San Francisco 49ers,5,148,2019
122,Ethan Greenidge,OL,Villanova,327.0,0.0,0.0,0.0,0.0,0.0,0.0,76.0,Undrafted,0,0,Undrafted
2,Chase Allen,TE,Iowa St.,251.0,0.0,33.5,0.0,117.0,7.03,4.43,78.0,Undrafted,0,0,Undrafted
121,Donnell Greene,OT,Minnesota,335.0,0.0,0.0,0.0,0.0,0.0,0.0,77.0,Undrafted,0,0,Undrafted
43,Deonte Brown,OL,Alabama,344.0,0.0,27.0,0.0,0.0,0.0,0.0,75.0,Carolina Panthers,6,193,2021
317,Byron Young,DT,Alabama,294.0,0.0,26.0,24.0,108.0,7.68,0.0,75.0,Undrafted,0,0,Undrafted
315,Bryce Young,QB,Alabama,204.0,0.0,0.0,0.0,0.0,0.0,0.0,70.0,Undrafted,0,0,Undrafted
