In [41]:
!pip install ipywidgets matplotlib seaborn
# Welcome to Python! Let's start by reading in the NFL Combine data.
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
import seaborn as sns
# Reading the data from the URL
url = "https://raw.githubusercontent.com/CodingUrsus/nflcombine/main/nfl_combine_2010_to_2023.csv"
data = pd.read_csv(url)

# Display the first few rows of the data to get an idea of what it looks like
data.head(100)



Unnamed: 0,Year,Player,Pos,School,Height,Weight,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Drafted,Round,Pick
0,2010,Seyi Ajirotutu,WR,Fresno State,6-3,204.0,4.60,36.0,14.0,115.0,7.22,4.39,False,,
1,2010,Rahim Alem,DE,LSU,6-3,251.0,4.75,30.5,,106.0,7.54,4.80,False,,
2,2010,Charles Alexander,DT,LSU,6-4,300.0,5.40,,,,,,False,,
3,2010,Danario Alexander,WR,Missouri,6-5,215.0,4.62,,,,,,False,,
4,2010,Nate Allen,S,South Florida,6-0,207.0,4.50,,16.0,,,,True,2.0,37.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2010,Jacoby Ford,WR,Clemson,5-9,186.0,4.28,33.5,15.0,115.0,7.00,4.44,True,4.0,108.0
96,2010,Jason Fox,OT,Miami (FL),6-7,303.0,5.45,,23.0,,,,True,4.0,128.0
97,2010,Dominique Franks,CB,Oklahoma,5-11,194.0,4.46,33.5,10.0,117.0,7.32,4.33,True,5.0,135.0
98,2010,Junior Galette,OLB,Temple,6-2,257.0,4.77,33.5,25.0,113.0,7.04,4.16,False,,


In [42]:
# Let's explore the data! What columns do we have?
print("Columns in the dataset:")
print(data.columns)

# How many rows are in the dataset?
print(f"There are {len(data)} players in the dataset.")

Columns in the dataset:
Index(['Year', 'Player', 'Pos', 'School', 'Height', 'Weight', '40yd',
       'Vertical', 'Bench', 'Broad Jump', '3Cone', 'Shuttle', 'Drafted',
       'Round', 'Pick'],
      dtype='object')
There are 4741 players in the dataset.


In [43]:
# How many players in the NFL combine came from BYU?
byu_players = data[data["School"] == "BYU"]

print(f"Number of players from BYU: {len(byu_players)}")
print("Here are their names and positions:")
print(byu_players[["Player", "Pos"]])

Number of players from BYU: 27
Here are their names and positions:
                  Player  Pos
118             Max Hall   QB
226         Dennis Pitta   TE
281         Manase Tonga   FB
738         Loni Fangupo   DT
906        Matt Reynolds   OT
988          Ziggy Ansah   DE
1019        Braden Brown   OT
1441        Cody Hoffman   WR
1505  Eathyn Manumaleuna   DT
1540        Kyle Van Noy  OLB
1591     Daniel Sorensen    S
1618          Uani' Unga  ILB
1753           Alani Fua  OLB
2130     Bronson Kaufusi   DE
2469        Harvey Langi  ILB
2612     Jamaal Williams   RB
2931         Fred Warner  OLB
3250      Sione Takitaki   LB
3637      Zayne Anderson   LB
3702   Brady Christensen   OL
3907           Dax Milne   WR
4042       Khyiris Tonga   DL
4092         Zach Wilson   QB
4102      Tyler Allgeier   RB
4504      Blake Freeland   OT
4519          Jaren Hall   QB
4628          Puka Nacua   WR


In [44]:
# 1. Who was the fastest player in the year 2017?
fastest_2017 = data[data["Year"] == 2017].sort_values(by="40yd").iloc[0]
fastest_player_2017 = fastest_2017["Player"]
fastest_position_2017 = fastest_2017["Pos"]
fastest_40yd_time_2017 = fastest_2017["40yd"]
print(f"Fastest player in 2017 draft: {fastest_player_2017}, Position: {fastest_position_2017}, 40yd Dash Time: {fastest_40yd_time_2017}s")

Fastest player in 2017 draft: John Ross, Position: WR, 40yd Dash Time: 4.22s


In [45]:
# 2. What was the average 40 yard dash time for all positions across the data?
avg_40yd_by_position = data.groupby("Pos")["40yd"].mean().sort_values()
print("\nAverage 40 yard dash time by position:")
print(avg_40yd_by_position)


Average 40 yard dash time by position:
Pos
CB      4.492516
WR      4.507614
DB      4.538108
S       4.553483
RB      4.554333
LB      4.646870
EDGE    4.668873
OLB     4.690140
TE      4.746558
ILB     4.757642
FB      4.787750
QB      4.802087
DE      4.821107
P       4.893860
K       4.920968
DL      4.976346
LS      5.062500
DT      5.103525
OL      5.205182
OT      5.215511
C       5.234270
OG      5.272261
Name: 40yd, dtype: float64


In [46]:
# 3. What was the highest bench press in these years?
max_bench = data["Bench"].max()
max_bench_player = data[data["Bench"] == max_bench]["Player"].iloc[0]
print(f"\nHighest bench press: {max_bench} reps by player: {max_bench_player}")


Highest bench press: 49.0 reps by player: Stephen Paea


In [None]:
# 4. If you base bench press on position, what was the median number of reps?
median_bench_by_position = data.groupby("Pos")["Bench"].median()
print("\nMedian bench press reps by position:")
print(median_bench_by_position)

In [None]:
# 5. Plot the average 40 yard dash time by position
plt.figure(figsize=(10, 6))
sns.barplot(x=avg_40yd_by_position.index, y=avg_40yd_by_position.values, palette="viridis")
plt.title('Average 40 Yard Dash Time by Position')
plt.xlabel('Position')
plt.ylabel('Average 40 Yard Dash Time (seconds)')
plt.xticks(rotation=45)
plt.show()

In [None]:
# 6. Plot the median bench press by position using a boxplot
plt.figure(figsize=(10, 6))
sns.boxplot(x='Pos', y='Bench', data=data, palette="Set2")
plt.title('Boxplot of Bench Press Reps by Position')
plt.xlabel('Position')
plt.ylabel('Bench Press Reps')
plt.xticks(rotation=45)
plt.show()

In [19]:
# Convert height from "6-3" to inches
def height_to_inches(height):
    if pd.isna(height):
        return None
    feet, inches = map(int, height.split('-'))
    return feet * 12 + inches

data["Height"] = data["Height"].apply(height_to_inches)

# Drop rows with any missing values
data_cleaned = data.dropna()

# Drop irrelevant columns
columns_to_drop = ["Player", "School", "Year", "Round", "Pick"]
data_cleaned = data_cleaned.drop(columns=columns_to_drop)

# Save the cleaned data
cleaned_data = data_cleaned

# Show the first few rows of the cleaned data
cleaned_data.head()

Unnamed: 0,Pos,Height,Weight,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Drafted
5,DE,74.0,295.0,4.87,35.5,21.0,116.0,7.15,4.43,True
7,ILB,72.0,235.0,4.71,35.0,26.0,110.0,7.04,4.29,True
12,S,72.0,212.0,4.62,37.5,14.0,118.0,7.0,4.17,True
13,DT,73.0,293.0,4.75,33.0,34.0,117.0,7.33,4.43,True
21,S,72.0,211.0,4.4,43.0,19.0,130.0,6.8,4.23,True


In [20]:
# Encode the target variable (position) as numbers
label_encoder = LabelEncoder()
cleaned_data["Pos_encoded"] = label_encoder.fit_transform(cleaned_data["Pos"])

# Separate features (X) and target (y)
X = cleaned_data.drop(columns=["Pos", "Drafted", "Pos_encoded"])
y = cleaned_data["Pos_encoded"]

# Train the Random Forest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X, y)

In [33]:
ward_member = pd.DataFrame({
    "Height": [71],  # Height in inches
    "Weight": [200],  # Weight in pounds
    "40yd": [4.2],  # 40-yard dash time in seconds
    "Vertical": [24],  # Vertical jump in inches
    "Bench": [18],  # Bench press reps at 225 lbs
    "Broad Jump": [100],  # Broad jump in inches
    "3Cone": [10],  # 3-cone drill time in seconds
    "Shuttle": [6.5]  # Shuttle drill time in seconds
})

# Make a prediction
predicted_position = rf_model.predict(ward_member)[0]

# Decode and display the predicted position
print("Predicted Position:", label_encoder.inverse_transform([predicted_position])[0])

# Optionally, show the new player's data for clarity
print("\nNew player's data:")
print(ward_member)

Predicted Position: WR

New player's data:
   Height  Weight  40yd  Vertical  Bench  Broad Jump  3Cone  Shuttle
0      71     200   4.2        24     18         100     10      6.5


In [36]:
import ipywidgets as widgets
from IPython.display import display

# Function to make the prediction based on user input
def predict_position(height, weight, forty_yd, vertical, bench, broad_jump, three_cone, shuttle):
    # Create the input DataFrame
    ward_member = pd.DataFrame({
        "Height": [height],
        "Weight": [weight],
        "40yd": [forty_yd],
        "Vertical": [vertical],
        "Bench": [bench],
        "Broad Jump": [broad_jump],
        "3Cone": [three_cone],
        "Shuttle": [shuttle]
    })

    # Make the prediction
    predicted_position = rf_model.predict(ward_member)[0]
    predicted_position = label_encoder.inverse_transform([predicted_position])[0]

    # Display the result
    print(f"Predicted Position: {predicted_position}")
    print("\nNew player's data:")
    print(ward_member)

# Create widgets for input
height_widget = widgets.IntSlider(value=71, min=60, max=90, step=1, description='Height:')
weight_widget = widgets.IntSlider(value=200, min=150, max=350, step=1, description='Weight:')
forty_yd_widget = widgets.FloatSlider(value=4.2, min=4.0, max=6.0, step=0.1, description='40yd Time:')
vertical_widget = widgets.IntSlider(value=24, min=20, max=50, step=1, description='Vertical:')
bench_widget = widgets.IntSlider(value=18, min=0, max=50, step=1, description='Bench Reps:')
broad_jump_widget = widgets.IntSlider(value=100, min=90, max=150, step=1, description='Broad Jump:')
three_cone_widget = widgets.FloatSlider(value=10, min=6.0, max=8.0, step=0.1, description='3Cone Time:')
shuttle_widget = widgets.FloatSlider(value=6.5, min=4.0, max=6.0, step=0.1, description='Shuttle Time:')

# Create a button to trigger the prediction
predict_button = widgets.Button(description="Predict Position")

# Attach the function to the button
predict_button.on_click(lambda b: predict_position(
    height_widget.value, weight_widget.value, forty_yd_widget.value, vertical_widget.value,
    bench_widget.value, broad_jump_widget.value, three_cone_widget.value, shuttle_widget.value
))

# Display all the widgets and the button
display(height_widget, weight_widget, forty_yd_widget, vertical_widget, bench_widget,
        broad_jump_widget, three_cone_widget, shuttle_widget, predict_button)

IntSlider(value=71, description='Height:', max=90, min=60)

IntSlider(value=200, description='Weight:', max=350, min=150)

FloatSlider(value=4.2, description='40yd Time:', max=6.0, min=4.0)

IntSlider(value=24, description='Vertical:', max=50, min=20)

IntSlider(value=18, description='Bench Reps:', max=50)

IntSlider(value=100, description='Broad Jump:', max=150, min=90)

FloatSlider(value=8.0, description='3Cone Time:', max=8.0, min=6.0)

FloatSlider(value=6.0, description='Shuttle Time:', max=6.0, min=4.0)

Button(description='Predict Position', style=ButtonStyle())

Predicted Position: DE

New player's data:
   Height  Weight  40yd  Vertical  Bench  Broad Jump  3Cone  Shuttle
0      77     248   4.8        36     38         121    7.3      4.8
