In [130]:
import nfl_data_py as nfl
import pandas as pd
import os
import urllib.request
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnnotationBbox
from matplotlib.offsetbox import OffsetImage
from PIL import Image
import numpy as np
from io import BytesIO
import requests
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import hvplot.pandas
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# Graphing 
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.pyplot import figure

In [131]:
# import data to get schedule and player stats
nfl_sched = nfl.import_schedules([2023])
weekly_nfl = nfl.import_weekly_data([2023])

Downcasting floats.


In [132]:
# gather RB stats
rb_df_two = weekly_nfl[(weekly_nfl["position"] == "RB")]
new_columns = ['player_display_name','recent_team', 'opponent_team','fantasy_points', 'fantasy_points_ppr','receiving_yards','receptions','targets','rushing_tds','rushing_fumbles','rushing_epa','rushing_yards','carries']
rb_df = rb_df_two[new_columns]


In [133]:
# Sort the DataFrame by 'team' and a stat of your choice (e.g., rushing yards)
sorted_rb_stats = rb_df.sort_values(by=['recent_team', 'rushing_yards'], ascending=[True, False])

# Group by 'team' and select the top RB for each team
best_rb_per_team = sorted_rb_stats.groupby('recent_team').head(1)

In [134]:
# Create list of best RB's
rb_list = best_rb_per_team['player_display_name'].tolist()


In [135]:
# make df of best rb's
best_rb_df = weekly_nfl[weekly_nfl['player_display_name'].isin(rb_list)]
best_rb_df = best_rb_df[new_columns]
best_rb_df.reset_index(drop=True, inplace=True)
best_rb_df.rename(columns={"recent_team": "team"}, inplace=True)
best_rb_df.columns = [col + '_vs' if col not in ['opponent_team', 'team','player_display_name'] else col for col in best_rb_df.columns]

#best_rb_df.to_excel('rb.xlsx', index=False)

best_rb_df.head()

Unnamed: 0,player_display_name,team,opponent_team,fantasy_points_vs,fantasy_points_ppr_vs,receiving_yards_vs,receptions_vs,targets_vs,rushing_tds_vs,rushing_fumbles_vs,rushing_epa_vs,rushing_yards_vs,carries_vs
0,Derrick Henry,TEN,NO,11.9,13.9,56.0,2,3,0,0.0,-1.353622,63.0,15
1,Derrick Henry,TEN,LAC,15.5,18.5,15.0,3,4,1,0.0,-3.984881,80.0,25
2,Derrick Henry,TEN,CLE,2.0,2.0,0.0,0,0,0,0.0,-4.839803,20.0,11
3,Derrick Henry,TEN,CIN,23.379999,24.379999,11.0,1,1,1,0.0,2.615094,122.0,22
4,Derrick Henry,TEN,IND,6.2,9.2,19.0,3,3,0,0.0,-5.41944,43.0,13


In [136]:
# create a df to add opponent team to players mean
merge_df = ['player_display_name', 'opponent_team']
opponent = best_rb_df[merge_df]
opponent.head()

Unnamed: 0,player_display_name,opponent_team
0,Derrick Henry,NO
1,Derrick Henry,LAC
2,Derrick Henry,CLE
3,Derrick Henry,CIN
4,Derrick Henry,IND


In [137]:
#create df for average rb stats vs opponent
rb_mean_df = best_rb_df.groupby('player_display_name').mean().reset_index()
rb_mean_df.reset_index(drop=True, inplace=True)
rb_mean_df.fillna(0, inplace=True)  # Fill NaN values with zeros in the DataFrame 'df'
rb_mean_df.columns = [col + '_mean' if col not in ['player_display_name'] else col for col in rb_mean_df.columns]



In [138]:
# merge opponent team into rb_mean
rb_mean_df_two = pd.merge(rb_mean_df, opponent, on='player_display_name', how='left')
rb_mean_df.head()


Unnamed: 0,player_display_name,fantasy_points_vs_mean,fantasy_points_ppr_vs_mean,receiving_yards_vs_mean,receptions_vs_mean,targets_vs_mean,rushing_tds_vs_mean,rushing_fumbles_vs_mean,rushing_epa_vs_mean,rushing_yards_vs_mean,carries_vs_mean
0,A.J. Dillon,5.58,6.18,5.0,0.6,1.2,0.2,0.0,-1.119941,38.799999,12.8
1,Alexander Mattison,8.733334,11.733334,17.333334,3.0,4.666667,0.0,0.166667,-2.089447,53.333332,13.666667
2,Alvin Kamara,11.5,19.166666,28.666666,7.666667,8.333333,0.333333,0.0,-1.115929,66.333336,17.333333
3,Austin Ekeler,14.299999,18.299999,41.0,4.0,5.5,0.5,0.0,-0.23567,72.0,15.0
4,Bijan Robinson,11.5,15.833334,31.5,4.333333,5.333333,0.0,0.166667,-1.156249,66.833336,13.333333


In [142]:
# create df for team defenses vs Rb
rb_d_df_two = best_rb_df.groupby(['opponent_team']).mean().reset_index()
rb_d_df_two.columns = [col + '_def' if col not in ['opponent_team'] else col for col in rb_mean_df.columns]
rb_d_df_two = rb_d_df_two.rename(columns={'player_display_name_def': 'opponent_team'})

rb_d_df_two.head()



Unnamed: 0,opponent_team,fantasy_points_vs_mean_def,fantasy_points_ppr_vs_mean_def,receiving_yards_vs_mean_def,receptions_vs_mean_def,targets_vs_mean_def,rushing_tds_vs_mean_def,rushing_fumbles_vs_mean_def,rushing_epa_vs_mean_def,rushing_yards_vs_mean_def,carries_vs_mean_def
0,ARI,19.799999,23.299999,19.833334,3.5,4.0,0.833333,0.0,0.414586,98.166664,20.666667
1,ATL,8.04,9.84,15.0,1.8,1.8,0.0,0.0,-3.266087,53.400002,14.8
2,BAL,11.2,13.4,17.4,2.2,2.8,0.2,0.0,-1.562716,70.599998,16.0
3,BUF,15.466667,18.299999,23.833334,2.833333,3.833333,0.666667,0.166667,0.213847,87.5,14.5
4,CAR,17.65,20.65,27.25,3.0,4.0,0.75,0.0,2.590487,89.25,16.0


In [143]:
# merge dfs to get test data 
rb_test_data_two = pd.merge(rb_mean_df_two, rb_d_df_two, on='opponent_team', how='right')
rb_test_data_two.fillna(0, inplace=True)  # Fill NaN values with zeros in the DataFrame 'df'



In [144]:

rb_test_data_two.head()
rb_test_data_two.to_excel('rb_main.xlsx', index=False)


In [145]:
# create columns for training
train_cols = ['fantasy_points_vs_mean', 'fantasy_points_ppr_vs_mean', 'receiving_yards_vs_mean',
    'receptions_vs_mean', 'targets_vs_mean','rushing_fumbles_vs_mean','rushing_tds_vs_mean', 
            'rushing_yards_vs_mean', 'carries_vs_mean',
    'rushing_epa_vs_mean', 'fantasy_points_vs_mean_def', 'fantasy_points_ppr_vs_mean_def',
    'receiving_yards_vs_mean_def', 'receptions_vs_mean_def', 'targets_vs_mean_def',
              'rushing_fumbles_vs_mean_def', 'rushing_epa_vs_mean_def',
             ]

training = ['rushing_tds_vs_mean_def', 'rushing_yards_vs_mean_def', 'carries_vs_mean_def']

In [146]:
testing_cols


['fantasy_points_vs_mean_def',
 'fantasy_points_ppr_vs_mean_def',
 'receiving_yards_vs_mean_def',
 'receptions_vs_mean_def',
 'targets_vs_mean_def',
 'rushing_fumbles_vs_mean_def',
 'rushing_epa_vs_mean_def',
 'fantasy_points_vs_mean',
 'fantasy_points_ppr_vs_mean',
 'receiving_yards_vs_mean',
 'receptions_vs_mean',
 'targets_vs_mean',
 'rushing_tds_vs_mean',
 'rushing_fumbles_vs_mean',
 'rushing_epa_vs_mean',
 'rushing_yards_vs_mean',
 'carries_vs_mean']

In [148]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Split data into features (X) and target variable (y)
X = rb_test_data_two[train_cols]
y = rb_test_data_two[training]

# Encode categorical variables using one-hot encoding if needed

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# Create a StandardScaler
scaler = StandardScaler()

# Fit and transform the scaler on the training data
X_train_scaled = scaler.fit_transform(X_train)

# Transform the test data using the same scaler
X_test_scaled = scaler.transform(X_test)

# Create a Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train_scaled, y_train)

# Make predictions
predictions = model.predict(X_test_scaled)

# Evaluate the model
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = mean_squared_error(y_test, predictions, squared=False)
r2 = r2_score(y_test, predictions)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R2): {r2}")


Mean Absolute Error (MAE): 2.4568546389480708
Mean Squared Error (MSE): 19.584762555596125
Root Mean Squared Error (RMSE): 3.0729997074156032
R-squared (R2): 0.7505988979917421


In [149]:
# create DF for upcoming week
# create stats to show upcpoming games
grouped = nfl_sched.groupby('week')

# Select the group corresponding to week 6
week_6_data = grouped.get_group(7)
get_teams =['home_team','away_team']
schedule_week_df = week_6_data[get_teams]
schedule_week_df.reset_index(drop=True, inplace=True)
schedule_week_df.head(16)

Unnamed: 0,home_team,away_team
0,NO,JAX
1,BAL,DET
2,CHI,LV
3,IND,CLE
4,NE,BUF
5,NYG,WAS
6,TB,ATL
7,LA,PIT
8,SEA,ARI
9,DEN,GB


In [150]:
# Create a reversed DataFrame
reversed_df = pd.DataFrame({'home_team': schedule_week_df['away_team'], 'away_team': schedule_week_df['home_team']})

# Concatenate the original and reversed DataFrames
final_week_df = pd.concat([schedule_week_df, reversed_df], ignore_index=True)
final_week_df = final_week_df.rename(columns={'home_team': 'team', 'away_team': 'opponent_team'})


In [151]:
# merge player mean with team name
teams = ['player_display_name', 'team']
rb_mean_team = best_rb_df[teams]

In [152]:
#merge to create test data
player_to_df = pd.merge(final_week_df, rb_mean_team, on='team', how='left')
team_player_vs = player_to_df.drop_duplicates()
team_player_vs.head()

Unnamed: 0,team,opponent_team,player_display_name
0,NO,JAX,Alvin Kamara
3,BAL,DET,Gus Edwards
9,CHI,LV,Khalil Herbert
14,IND,CLE,Zack Moss
19,NE,BUF,Ezekiel Elliott


In [153]:
#still creating test data
add_defense = pd.merge(team_player_vs, rb_d_df_two, on='opponent_team', how='left')
add_defense.head()

Unnamed: 0,team,opponent_team,player_display_name,fantasy_points_vs_mean_def,fantasy_points_ppr_vs_mean_def,receiving_yards_vs_mean_def,receptions_vs_mean_def,targets_vs_mean_def,rushing_tds_vs_mean_def,rushing_fumbles_vs_mean_def,rushing_epa_vs_mean_def,rushing_yards_vs_mean_def,carries_vs_mean_def
0,NO,JAX,Alvin Kamara,9.32,12.92,24.6,3.6,4.2,0.4,0.0,-0.145414,44.599998,10.4
1,BAL,DET,Gus Edwards,6.2,8.366667,13.5,2.166667,3.166667,0.333333,0.0,-0.644289,28.5,9.333333
2,CHI,LV,Khalil Herbert,9.24,10.44,8.8,1.2,1.6,0.4,0.0,1.09601,59.599998,12.6
3,IND,CLE,Zack Moss,5.94,7.74,5.4,1.8,2.8,0.0,0.0,-2.718306,42.0,12.0
4,NE,BUF,Ezekiel Elliott,15.466667,18.299999,23.833334,2.833333,3.833333,0.666667,0.166667,0.213847,87.5,14.5


In [154]:
#test data
final_testing = pd.merge(add_defense, rb_mean_df, on='player_display_name', how='left')
final_testing.head()
final_testing.to_excel('rb_test.xlsx', index=False)


In [155]:
#remove specific columns for test and make list
testing_official = final_testing.drop(columns=['team', 'opponent_team','rushing_tds_vs_mean_def', 'rushing_yards_vs_mean_def', 'carries_vs_mean_def','player_display_name'])
testing_cols = testing_official.columns.tolist()

In [156]:
testing_official.head()

Unnamed: 0,fantasy_points_vs_mean_def,fantasy_points_ppr_vs_mean_def,receiving_yards_vs_mean_def,receptions_vs_mean_def,targets_vs_mean_def,rushing_fumbles_vs_mean_def,rushing_epa_vs_mean_def,fantasy_points_vs_mean,fantasy_points_ppr_vs_mean,receiving_yards_vs_mean,receptions_vs_mean,targets_vs_mean,rushing_tds_vs_mean,rushing_fumbles_vs_mean,rushing_epa_vs_mean,rushing_yards_vs_mean,carries_vs_mean
0,9.32,12.92,24.6,3.6,4.2,0.0,-0.145414,11.5,19.166666,28.666666,7.666667,8.333333,0.333333,0.0,-1.115929,66.333336,17.333333
1,6.2,8.366667,13.5,2.166667,3.166667,0.0,-0.644289,6.25,6.75,2.166667,0.5,0.666667,0.166667,0.0,-0.937413,47.0,12.0
2,9.24,10.44,8.8,1.2,1.6,0.0,1.09601,8.3,10.3,16.6,2.0,3.6,0.0,0.0,0.168067,54.400002,10.2
3,5.94,7.74,5.4,1.8,2.8,0.0,-2.718306,17.92,20.719999,22.0,2.8,3.4,0.8,0.0,-0.069171,93.199997,19.2
4,15.466667,18.299999,23.833334,2.833333,3.833333,0.166667,0.213847,4.866667,7.033333,9.833333,2.166667,2.666667,0.166667,0.0,-0.536792,32.166668,8.166667


In [157]:
# Now, use the trained model to predict rb_yards for df_test
X_test = testing_official[testing_cols]
y_pred = model.predict(X_test)

# The predictions are stored in y_pred
# add predicted scores into df: 
predict_cols = ['team','opponent_team', 'player_display_name']
df_predictions = final_testing[predict_cols]
scores_df = pd.DataFrame(y_pred, columns=['rushing_td_s', 'rushing_yards', 'rushing_carries'])
df_predictions = pd.concat([df_predictions, scores_df], axis=1)
df_predictions = df_predictions.dropna()
df_predictions.head()

  f"X has feature names, but {self.__class__.__name__} was fitted without"


Unnamed: 0,team,opponent_team,player_display_name,rushing_td_s,rushing_yards,rushing_carries
0,NO,JAX,Alvin Kamara,730748.762995,-27460480.0,-2442579.0
1,BAL,DET,Gus Edwards,166352.873323,-1530956.0,91881.06
2,CHI,LV,Khalil Herbert,926932.787144,-44156550.0,-4378165.0
3,IND,CLE,Zack Moss,462339.031849,-18563740.0,-1708742.0
4,NE,BUF,Ezekiel Elliott,436869.807751,-11485550.0,-783346.5
