Create a model to predict WR yards vs upcoming team

In [None]:
import nfl_data_py as nfl
import pandas as pd
import os
import urllib.request
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnnotationBbox
from matplotlib.offsetbox import OffsetImage
from PIL import Image
import numpy as np
from io import BytesIO
import requests
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import hvplot.pandas
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
# import data to get schedule and player stats
nfl_sched = nfl.import_schedules([2023])
weekly_nfl = nfl.import_weekly_data([2023])

In [None]:
# gather WR stats
wr_df_two = weekly_nfl[(weekly_nfl["position"] == "WR")]
new_columns = ['player_display_name','recent_team', 'opponent_team','fantasy_points', 'fantasy_points_ppr','receiving_yards','receiving_yards_after_catch','receptions','targets','receiving_tds','receiving_air_yards','receiving_first_downs','receiving_epa','target_share','air_yards_share','racr','wopr']
wr_df = wr_df_two[new_columns]
wr_df= wr_df[wr_df['fantasy_points_ppr'] != 0]
wr_df.head()
    

In [34]:
#create df for average player stats vs opponent
wr_mean_df = wr_df.groupby('player_display_name').mean().reset_index()
wr_mean_df.set_index('player_display_name', inplace=True)
wr_mean_df.head()

Unnamed: 0_level_0,fantasy_points,fantasy_points_ppr,receiving_yards,receiving_yards_after_catch,receptions,targets,receiving_tds,receiving_air_yards,receiving_first_downs,receiving_epa,target_share,air_yards_share,racr,wopr
player_display_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
A.J. Brown,13.35,20.6,103.5,38.5,7.25,10.75,0.5,139.75,5.0,5.029521,0.347103,0.456517,0.724344,0.840217
Adam Thielen,10.825,17.575001,71.75,26.25,6.75,8.25,0.5,63.75,4.5,5.032265,0.214629,0.276149,1.212583,0.515249
Alec Pierce,2.85,4.6,28.5,3.25,1.75,3.5,0.0,62.0,1.25,0.814339,0.09786,0.218507,0.654127,0.299744
Allen Lazard,5.725,8.225,42.25,7.0,2.5,4.0,0.25,61.25,2.0,2.825249,0.159429,0.352971,0.792635,0.486224
Allen Robinson,2.7,5.7,27.0,8.5,3.0,4.25,0.0,25.75,0.75,-0.094487,0.125862,0.104672,0.982128,0.262063


In [35]:
wr_mean_df.fillna(0, inplace=True)  # Fill NaN values with zeros in the DataFrame 'df'


In [36]:
# create ml to group into classes
# OVR MODEL
def get_clusters(k, data):
    # Create a copy of the DataFrame
    data = data.copy()

    # Initialize the K-Means model
    model = KMeans(n_clusters=k, random_state=0)

    # Fit the model
    model.fit(data)

    # Predict clusters
    predictions = model.predict(data)

    # Create return DataFrame with predicted clusters
    data["class"] = model.labels_

    return data

In [40]:
# get classes
# create ml 5 ks

classes_wr = get_clusters(5, wr_mean_df)
classes_wr.head()

wr_average = classes_wr.reset_index(drop=False)
wr_average.head()

Unnamed: 0,player_display_name,fantasy_points,fantasy_points_ppr,receiving_yards,receiving_yards_after_catch,receptions,targets,receiving_tds,receiving_air_yards,receiving_first_downs,receiving_epa,target_share,air_yards_share,racr,wopr,class
0,A.J. Brown,13.35,20.6,103.5,38.5,7.25,10.75,0.5,139.75,5.0,5.029521,0.347103,0.456517,0.724344,0.840217,2
1,Adam Thielen,10.825,17.575001,71.75,26.25,6.75,8.25,0.5,63.75,4.5,5.032265,0.214629,0.276149,1.212583,0.515249,0
2,Alec Pierce,2.85,4.6,28.5,3.25,1.75,3.5,0.0,62.0,1.25,0.814339,0.09786,0.218507,0.654127,0.299744,1
3,Allen Lazard,5.725,8.225,42.25,7.0,2.5,4.0,0.25,61.25,2.0,2.825249,0.159429,0.352971,0.792635,0.486224,4
4,Allen Robinson,2.7,5.7,27.0,8.5,3.0,4.25,0.0,25.75,0.75,-0.094487,0.125862,0.104672,0.982128,0.262063,1


In [41]:
# create df for defense vs player
wr_stats_team = wr_df.groupby(['opponent_team', 'player_display_name']).sum().reset_index()
wr_stats_team.head()

Unnamed: 0,opponent_team,player_display_name,fantasy_points,fantasy_points_ppr,receiving_yards,receiving_yards_after_catch,receptions,targets,receiving_tds,receiving_air_yards,receiving_first_downs,receiving_epa,target_share,air_yards_share,racr,wopr
0,ARI,Brandin Cooks,1.7,3.7,17.0,3.0,2,7,0,52.0,2.0,-5.286209,0.189189,0.275132,0.326923,0.476376
1,ARI,Brandon Aiyuk,14.8,20.799999,148.0,19.0,6,6,0,129.0,6.0,10.005992,0.285714,0.712707,1.147287,0.927466
2,ARI,CeeDee Lamb,6.2,10.2,53.0,15.0,4,7,0,52.0,2.0,-1.518677,0.189189,0.275132,1.019231,0.476376
3,ARI,Curtis Samuel,6.0,11.0,54.0,27.0,5,5,0,27.0,2.0,-0.042015,0.172414,0.129808,2.0,0.349486
4,ARI,Darius Slayton,6.2,9.2,62.0,15.0,3,6,0,132.0,3.0,0.973952,0.171429,0.37931,0.469697,0.52266


In [43]:
#add _against to all the columns in yards vs team
wr_vs_stats = wr_stats_team.columns = [col + '_vs' for col in wr_stats_team.columns]
wr_vs_stats

['opponent_team_vs_vs',
 'player_display_name_vs_vs',
 'fantasy_points_vs_vs',
 'fantasy_points_ppr_vs_vs',
 'receiving_yards_vs_vs',
 'receiving_yards_after_catch_vs_vs',
 'receptions_vs_vs',
 'targets_vs_vs',
 'receiving_tds_vs_vs',
 'receiving_air_yards_vs_vs',
 'receiving_first_downs_vs_vs',
 'receiving_epa_vs_vs',
 'target_share_vs_vs',
 'air_yards_share_vs_vs',
 'racr_vs_vs',
 'wopr_vs_vs']

In [55]:
wr_stats_team.colummns = wr_vs_stats
wr_stats_team.rename(columns={'opponent_team_vs_vs': 'opponent_team', 'player_display_number': 'player_display_name'}, inplace=True)

In [56]:
wr_stats_team.head()

Unnamed: 0,opponent_team,player_display_name,fantasy_points_vs_vs,fantasy_points_ppr_vs_vs,receiving_yards_vs_vs,receiving_yards_after_catch_vs_vs,receptions_vs_vs,targets_vs_vs,receiving_tds_vs_vs,receiving_air_yards_vs_vs,receiving_first_downs_vs_vs,receiving_epa_vs_vs,target_share_vs_vs,air_yards_share_vs_vs,racr_vs_vs,wopr_vs_vs
0,ARI,Brandin Cooks,1.7,3.7,17.0,3.0,2,7,0,52.0,2.0,-5.286209,0.189189,0.275132,0.326923,0.476376
1,ARI,Brandon Aiyuk,14.8,20.799999,148.0,19.0,6,6,0,129.0,6.0,10.005992,0.285714,0.712707,1.147287,0.927466
2,ARI,CeeDee Lamb,6.2,10.2,53.0,15.0,4,7,0,52.0,2.0,-1.518677,0.189189,0.275132,1.019231,0.476376
3,ARI,Curtis Samuel,6.0,11.0,54.0,27.0,5,5,0,27.0,2.0,-0.042015,0.172414,0.129808,2.0,0.349486
4,ARI,Darius Slayton,6.2,9.2,62.0,15.0,3,6,0,132.0,3.0,0.973952,0.171429,0.37931,0.469697,0.52266


In [67]:
wr_test_data = pd.merge(wr_stats_team, wr_average, on='player_display_name', how='inner')
wr_test_data.head()







Unnamed: 0,opponent_team,player_display_name,fantasy_points_vs_vs,fantasy_points_ppr_vs_vs,receiving_yards_vs_vs,receiving_yards_after_catch_vs_vs,receptions_vs_vs,targets_vs_vs,receiving_tds_vs_vs,receiving_air_yards_vs_vs,...,targets,receiving_tds,receiving_air_yards,receiving_first_downs,receiving_epa,target_share,air_yards_share,racr,wopr,class
0,ARI,Brandin Cooks,1.7,3.7,17.0,3.0,2,7,0,52.0,...,5.0,0.0,37.0,1.666667,-1.017194,0.153433,0.193276,0.765992,0.365443,1
1,NE,Brandin Cooks,2.7,6.7,27.0,8.0,4,4,0,19.0,...,5.0,0.0,37.0,1.666667,-1.017194,0.153433,0.193276,0.765992,0.365443,1
2,NYG,Brandin Cooks,2.2,4.2,22.0,2.0,2,4,0,40.0,...,5.0,0.0,37.0,1.666667,-1.017194,0.153433,0.193276,0.765992,0.365443,1
3,ARI,Brandon Aiyuk,14.8,20.799999,148.0,19.0,6,6,0,129.0,...,6.666667,0.666667,106.333336,5.666667,10.086728,0.27381,0.51554,0.953879,0.771592,2
4,LA,Brandon Aiyuk,4.3,7.3,43.0,6.0,3,6,0,81.0,...,6.666667,0.666667,106.333336,5.666667,10.086728,0.27381,0.51554,0.953879,0.771592,2


In [80]:
column_names = [
    'fantasy_points_vs_vs',
    'fantasy_points_ppr_vs_vs',
    'receiving_yards_vs_vs',
    'receiving_yards_after_catch_vs_vs',
    'receptions_vs_vs',
    'targets_vs_vs',
    'receiving_tds_vs_vs',
    'receiving_air_yards_vs_vs',
    'receiving_first_downs_vs_vs',
    'receiving_epa_vs_vs',
    'target_share_vs_vs',
    'air_yards_share_vs_vs',
    'racr_vs_vs',
    'wopr_vs_vs',
    'fantasy_points',
    'fantasy_points_ppr',
    'receiving_yards',
    'receiving_yards_after_catch',
    'receptions',
    'targets',
    'receiving_tds',
    'receiving_air_yards',
    'receiving_first_downs',
    'receiving_epa',
    'target_share',
    'air_yards_share',
    'racr',
    'wopr',
    'class'
    
]

In [92]:
# Split data into features (X) and target variable (y)
X = wr_test_data[column_names]
y = wr_test_data[['receptions_vs_vs', 'receiving_yards_vs_vs', 'receiving_tds_vs_vs']]

# Encode categorical variables using one-hot encoding if needed

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# Create a Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = mean_squared_error(y_test, predictions, squared=False)
r2 = r2_score(y_test, predictions)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R2): {r2}")

Mean Absolute Error (MAE): 9.057970826164234e-14
Mean Squared Error (MSE): 3.4635597326988855e-26
Root Mean Squared Error (RMSE): 1.1544894586864169e-13
R-squared (R2): 1.0


In [93]:
# create averages for a team to use in trained model
player_averages = wr_stats_team[wr_stats_team['opponent_team'] == 'ATL']
team_averages.mean()

  This is separate from the ipykernel package so we can avoid doing imports until


fantasy_points_vs_vs                  5.446154
fantasy_points_ppr_vs_vs              8.599999
receiving_yards_vs_vs                35.307693
receiving_yards_after_catch_vs_vs    12.076923
receptions_vs_vs                      3.153846
targets_vs_vs                         5.000000
receiving_tds_vs_vs                   0.307692
receiving_air_yards_vs_vs            52.923077
receiving_first_downs_vs_vs           2.000000
receiving_epa_vs_vs                   1.181010
target_share_vs_vs                    0.172655
air_yards_share_vs_vs                 0.232736
racr_vs_vs                            0.580662
wopr_vs_vs                            0.421898
dtype: float64

In [94]:
#get average stats for a player
player_averages = wr_average[wr_average['player_display_name'] == 'Nico Collins']
player_averages.mean()


  This is separate from the ipykernel package so we can avoid doing imports until


fantasy_points                  15.200000
fantasy_points_ppr              20.700001
receiving_yards                107.000000
receiving_yards_after_catch     48.750000
receptions                       5.500000
targets                          8.000000
receiving_tds                    0.750000
receiving_air_yards             96.000000
receiving_first_downs            4.250000
receiving_epa                    7.634870
target_share                     0.225474
air_yards_share                  0.344492
racr                             1.324284
wopr                             0.579356
class                            2.000000
dtype: float64

In [95]:
# Create the test data 
data_dict = {
    'fantasy_points_vs_vs': 5.446154,
    'fantasy_points_ppr_vs_vs': 8.599999,
    'receiving_yards_vs_vs': 35.307693,
    'receiving_yards_after_catch_vs_vs': 12.076923,
    'receptions_vs_vs': 3.153846,
    'targets_vs_vs': 5.000000,
    'receiving_tds_vs_vs': 0.307692,
    'receiving_air_yards_vs_vs': 52.923077,
    'receiving_first_downs_vs_vs': 2.000000,
    'receiving_epa_vs_vs': 1.181010,
    'target_share_vs_vs': 0.172655,
    'air_yards_share_vs_vs': 0.232736,
    'racr_vs_vs': 0.580662,
    'wopr_vs_vs': 0.421898,
    'fantasy_points': 15.200000,
    'fantasy_points_ppr': 20.700001,
    'receiving_yards': 107.000000,
    'receiving_yards_after_catch': 48.750000,
    'receptions': 5.500000,
    'targets': 8.000000,
    'receiving_tds': 0.750000,
    'receiving_air_yards': 96.000000,
    'receiving_first_downs': 4.250000,
    'receiving_epa': 7.634870,
    'target_share': 0.225474,
    'air_yards_share': 0.344492,
    'racr': 1.324284,
    'wopr': 0.579356,
    'class': 2
    
}


In [96]:
# Create a sample untrained dataset for X using the average values
data_untrained_test = {feature: [data_dict[feature] for _ in range(4)] for feature in data_dict}
X_untrained = pd.DataFrame(data_untrained_test)

In [97]:
# 2. Use the trained model to make predictions on X_untrained.
predictions_untrained_team = model.predict(X_untrained)
print(predictions_untrained_team)

[[ 3.153846 35.307693  0.307692]
 [ 3.153846 35.307693  0.307692]
 [ 3.153846 35.307693  0.307692]
 [ 3.153846 35.307693  0.307692]]
