In [5]:
import nfl_data_py as nfl
import pandas as pd
import os
import urllib.request
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnnotationBbox
from matplotlib.offsetbox import OffsetImage
from PIL import Image
import numpy as np
from io import BytesIO
import requests
# ML Libraries
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import hvplot.pandas
import plotly.express as px

# Graphing 
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.pyplot import figure

In [6]:
#Import Data
nfl_data = nfl.import_schedules([2021,2022,2023])
original_df = nfl_data.groupby('week')

In [7]:
# Select relevant columns for team vs. team DataFrame
team_vs_team_df = nfl_data[['week', 'away_team', 'away_score', 'home_team', 'home_score']]


In [8]:
# Get data from games
weekly_nfl = nfl.import_weekly_data([2023])
weekly_nfl.head()

Downcasting floats.


Unnamed: 0,player_id,player_name,player_display_name,position,position_group,headshot_url,recent_team,season,week,season_type,...,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr,special_teams_tds,fantasy_points,fantasy_points_ppr
0,00-0023459,A.Rodgers,Aaron Rodgers,QB,QB,https://static.www.nfl.com/image/private/f_aut...,NYJ,2023,1,REG,...,0.0,,0,,,,,0.0,0.0,0.0
1,00-0024243,M.Lewis,Marcedes Lewis,TE,TE,https://static.www.nfl.com/image/private/f_aut...,CHI,2023,4,REG,...,0.0,0.483465,0,0.0,0.03125,-0.012397,0.038197,0.0,0.8,1.8
2,00-0026498,M.Stafford,Matthew Stafford,QB,QB,https://static.www.nfl.com/image/private/f_aut...,LA,2023,1,REG,...,0.0,,0,,,,,0.0,14.46,14.46
3,00-0026498,M.Stafford,Matthew Stafford,QB,QB,https://static.www.nfl.com/image/private/f_aut...,LA,2023,2,REG,...,0.0,,0,,,,,0.0,13.98,13.98
4,00-0026498,M.Stafford,Matthew Stafford,QB,QB,https://static.www.nfl.com/image/private/f_aut...,LA,2023,3,REG,...,0.0,,0,,,,,0.0,11.46,11.46


In [9]:
#clean up data to show data columsn for each game
columns = [ 'week','recent_team','opponent_team','passing_epa',
           'passing_yards', 'rushing_epa','rushing_yards','receiving_epa'
           ]


In [10]:
#create Df
games_df = weekly_nfl[columns]
games_df.head()

Unnamed: 0,week,recent_team,opponent_team,passing_epa,passing_yards,rushing_epa,rushing_yards,receiving_epa
0,1,NYJ,BUF,-2.03196,0.0,,0.0,
1,4,CHI,DEN,,0.0,,0.0,0.483465
2,1,LA,SEA,20.679981,334.0,0.868086,11.0,
3,2,LA,SF,-5.089193,307.0,-0.43833,17.0,
4,3,LA,CIN,-8.40479,269.0,2.529576,7.0,


In [9]:
# Groupby team and week offense stats
team_by_week = games_df.groupby(['week', 'recent_team'])
team_by_week.head()
team_stats_per_game = team_by_week.sum()
team_stats_per_game.index.set_names(['week', 'team_offense'], inplace=True)
team_stats_per_game = team_stats_per_game.reset_index()
#team_stats_per_game_off = team_stats_per_game.drop(columns='index')

team_stats_per_game_off = team_stats_per_game.rename(columns={'team_defense': 'team_offense'})
# List of columns to add '_def' to
columns_to_suffix = ['passing_epa',
           'passing_yards', 'rushing_epa','rushing_yards','receiving_epa']

team_stats_per_game_off = team_stats_per_game_off.rename(columns={col: col + '_offense' for col in columns_to_suffix})

team_stats_per_game_off.head()

Unnamed: 0,week,team_offense,passing_epa_offense,passing_yards_offense,rushing_epa_offense,rushing_yards_offense,receiving_epa_offense
0,1,ARI,-9.239668,132.0,-11.179296,96.0,-2.79934
1,1,ATL,-4.45045,115.0,-0.595772,130.0,0.718799
2,1,BAL,-4.415663,169.0,0.155152,110.0,7.985488
3,1,BUF,-1.409397,236.0,-3.168568,97.0,5.10084
4,1,CAR,-10.528563,146.0,-8.897445,154.0,-3.103


In [10]:
# Groupby team and week defense stats
team_by_week_def = games_df.groupby(['week', 'opponent_team'])
team_by_week_def.head()
team_stats_per_game_def = team_by_week_def.sum()

# List of columns to add '_def' to
columns_to_suffix_def = ['passing_epa',
           'passing_yards', 'rushing_epa','rushing_yards','receiving_epa']

# Add '_def' suffix to the specified columns
team_stats_per_game_def = team_stats_per_game_def.rename(columns={col: col + '_def' for col in columns_to_suffix_def})
team_stats_per_game_def.index.set_names(['week', 'team_defense'], inplace=True)
team_stats_per_game_def = team_stats_per_game_def.reset_index()


In [11]:
team_stats_per_game_def.head()

Unnamed: 0,week,team_defense,passing_epa_def,passing_yards_def,rushing_epa_def,rushing_yards_def,receiving_epa_def
0,1,ARI,-13.413063,202.0,-5.708867,92.0,1.409773
1,1,ATL,-10.528563,146.0,-8.897445,154.0,-3.103
2,1,BAL,-11.156192,242.0,-8.125071,72.0,2.796677
3,1,BUF,-5.221493,140.0,2.80344,172.0,3.47571
4,1,CAR,-4.45045,115.0,-0.595772,130.0,0.718799


In [12]:
# merge both dfs so it shows offense and defense stats per game
team_total_df = team_stats_per_game.merge(team_stats_per_game_def, left_on='team_offense', right_on='team_defense')
# Filter rows where 'team_offense' is equal to 'team_defense'
team_total_df = team_total_df[team_total_df['team_offense'] == team_total_df['team_defense']]
#remove duplicates
team_total_df = team_total_df[(team_total_df['team_offense'] == team_total_df['team_defense']) & (team_total_df['week_x'] == team_total_df['week_y'])]
team_total_df.head(1)

Unnamed: 0,week_x,team_offense,passing_epa,passing_yards,rushing_epa,rushing_yards,receiving_epa,week_y,team_defense,passing_epa_def,passing_yards_def,rushing_epa_def,rushing_yards_def,receiving_epa_def
0,1,ARI,-9.239668,132.0,-11.179296,96.0,-2.79934,1,ARI,-13.413063,202.0,-5.708867,92.0,1.409773


In [13]:
# create a copy of team_total_df for home team and away team

# Make the first copy of the original DataFrame
home_team_df = team_total_df.copy()

# Make a second copy from the first copy
away_team_df = home_team_df.copy()

In [14]:
#make a home_team df
# Add '_home' to each column name
for column in home_team_df.columns:
    new_column_name_home = column + '_home'
    home_team_df.rename(columns={column: new_column_name_home}, inplace=True)
    
home_team_df.head()

Unnamed: 0,week_x_home,team_offense_home,passing_epa_home,passing_yards_home,rushing_epa_home,rushing_yards_home,receiving_epa_home,week_y_home,team_defense_home,passing_epa_def_home,passing_yards_def_home,rushing_epa_def_home,rushing_yards_def_home,receiving_epa_def_home
0,1,ARI,-9.239668,132.0,-11.179296,96.0,-2.79934,1,ARI,-13.413063,202.0,-5.708867,92.0,1.409773
7,2,ARI,7.430885,228.0,2.366217,151.0,7.653436,2,ARI,8.180348,321.0,6.719742,127.0,11.507128
14,3,ARI,4.560631,189.0,7.001422,222.0,8.408302,3,ARI,-3.659615,249.0,8.314601,185.0,0.522374
21,4,ARI,3.898311,265.0,5.478536,105.0,6.421503,4,ARI,22.844091,283.0,2.675849,124.0,24.983665
28,5,ARI,-15.651534,166.0,-1.152939,142.0,-5.404606,5,ARI,9.498447,317.0,-8.893337,93.0,16.416182


In [15]:
#make a away_team df
# Add '_away' to each column name
for column in away_team_df.columns:
    new_column_name_away = column + '_away'
    away_team_df.rename(columns={column: new_column_name_away}, inplace=True)
away_team_df.head()

Unnamed: 0,week_x_away,team_offense_away,passing_epa_away,passing_yards_away,rushing_epa_away,rushing_yards_away,receiving_epa_away,week_y_away,team_defense_away,passing_epa_def_away,passing_yards_def_away,rushing_epa_def_away,rushing_yards_def_away,receiving_epa_def_away
0,1,ARI,-9.239668,132.0,-11.179296,96.0,-2.79934,1,ARI,-13.413063,202.0,-5.708867,92.0,1.409773
7,2,ARI,7.430885,228.0,2.366217,151.0,7.653436,2,ARI,8.180348,321.0,6.719742,127.0,11.507128
14,3,ARI,4.560631,189.0,7.001422,222.0,8.408302,3,ARI,-3.659615,249.0,8.314601,185.0,0.522374
21,4,ARI,3.898311,265.0,5.478536,105.0,6.421503,4,ARI,22.844091,283.0,2.675849,124.0,24.983665
28,5,ARI,-15.651534,166.0,-1.152939,142.0,-5.404606,5,ARI,9.498447,317.0,-8.893337,93.0,16.416182


In [16]:
# Make df with points and stats

# merge home team with home stats
home_team = team_vs_team_df.merge(home_team_df, left_on='home_team', right_on='team_offense_home')
#remove duplicates
home_team = home_team[(home_team['home_team'] ==  home_team['team_offense_home']) & (home_team['week_x_home'] == home_team['week'])]
home_team.head(10)

Unnamed: 0,week,away_team,away_score,home_team,home_score,week_x_home,team_offense_home,passing_epa_home,passing_yards_home,rushing_epa_home,rushing_yards_home,receiving_epa_home,week_y_home,team_defense_home,passing_epa_def_home,passing_yards_def_home,rushing_epa_def_home,rushing_yards_def_home,receiving_epa_def_home
0,1,DET,21.0,KC,20.0,1,KC,-6.453613,226.0,-2.581925,90.0,-5.052831,1,KC,6.543757,253.0,-3.636585,118.0,6.054341
8,3,CHI,10.0,KC,41.0,3,KC,14.211285,303.0,0.524553,153.0,14.669461,3,KC,-13.247833,99.0,2.051218,116.0,-9.638733
17,6,DEN,8.0,KC,19.0,6,KC,4.125069,306.0,-9.408838,96.0,9.299211,6,KC,-18.298592,95.0,4.712027,115.0,-16.197714
54,1,CAR,10.0,ATL,24.0,1,ATL,-4.45045,115.0,-0.595772,130.0,0.718799,1,ATL,-10.528563,146.0,-8.897445,154.0,-3.103
61,2,GB,24.0,ATL,25.0,2,ATL,3.119444,237.0,3.12432,211.0,5.143248,2,ATL,2.631139,151.0,-0.244532,84.0,4.467519
70,5,HOU,19.0,ATL,21.0,5,ATL,15.965662,351.0,-12.824566,96.0,9.291822,5,ATL,3.293357,249.0,-7.147551,64.0,6.310635
77,6,WAS,24.0,ATL,16.0,6,ATL,-8.266499,307.0,-4.672254,106.0,-0.72494,6,ATL,-0.199739,151.0,-1.354204,72.0,6.594811
102,1,HOU,9.0,BAL,25.0,1,BAL,-4.415663,169.0,0.155152,110.0,7.985488,1,BAL,-11.156192,242.0,-8.125071,72.0,2.796677
110,3,IND,22.0,BAL,19.0,3,BAL,-7.251347,202.0,2.10036,186.0,-2.607719,3,BAL,-8.418345,227.0,-4.538659,139.0,-0.047984
156,1,CIN,3.0,CLE,24.0,1,CLE,-10.871305,154.0,-0.349015,206.0,-6.493987,1,CLE,-19.882656,82.0,-4.316365,75.0,-13.301556


In [17]:
# merge away team with home stats
away_team = home_team.merge(away_team_df, left_on='away_team', right_on='team_offense_away')
#remove duplicates
away_team = away_team[(away_team['away_team'] ==  away_team['team_offense_away']) & (away_team['week_x_away'] == away_team['week'])]
away_team.head(10)

Unnamed: 0,week,away_team,away_score,home_team,home_score,week_x_home,team_offense_home,passing_epa_home,passing_yards_home,rushing_epa_home,...,rushing_epa_away,rushing_yards_away,receiving_epa_away,week_y_away,team_defense_away,passing_epa_def_away,passing_yards_def_away,rushing_epa_def_away,rushing_yards_def_away,receiving_epa_def_away
0,1,DET,21.0,KC,20.0,1,KC,-6.453613,226.0,-2.581925,...,-3.636585,118.0,6.054341,1,DET,-6.453613,226.0,-2.581925,90.0,-5.052831
11,6,DET,20.0,TB,6.0,6,TB,-8.300112,206.0,-4.832714,...,-7.840065,40.0,22.679295,6,DET,-8.300112,206.0,-4.832714,46.0,-7.366295
15,4,DET,34.0,GB,20.0,4,GB,-12.69318,246.0,0.423209,...,4.333835,211.0,6.932714,4,DET,-12.69318,246.0,0.423209,27.0,-5.993364
20,3,CHI,10.0,KC,41.0,3,KC,14.211285,303.0,0.524553,...,2.051218,116.0,-9.638733,3,CHI,14.211285,303.0,0.524553,153.0,14.669461
28,5,CHI,40.0,WAS,20.0,5,WAS,3.675323,388.0,-5.300707,...,8.263792,178.0,17.594566,5,CHI,3.675323,388.0,-5.300707,29.0,5.126959
31,2,CHI,17.0,TB,27.0,2,TB,13.258485,317.0,-1.234122,...,0.217876,67.0,-1.142673,2,CHI,13.258485,317.0,-1.234122,120.0,14.781831
41,6,DEN,8.0,KC,19.0,6,KC,4.125069,306.0,-9.408838,...,4.712027,115.0,-16.197714,6,DEN,4.125069,306.0,-9.408838,96.0,9.299211
45,4,DEN,31.0,CHI,28.0,4,CHI,7.142285,335.0,-0.285303,...,2.192693,97.0,16.564686,4,DEN,7.142285,335.0,-0.285303,171.0,21.83526
50,3,DEN,20.0,MIA,70.0,3,MIA,31.062447,376.0,12.792768,...,-5.999615,69.0,-5.733638,3,DEN,31.062447,376.0,12.792768,350.0,31.062447
54,1,CAR,10.0,ATL,24.0,1,ATL,-4.45045,115.0,-0.595772,...,-8.897445,154.0,-3.103,1,CAR,-4.45045,115.0,-0.595772,130.0,0.718799


In [18]:
# rename df
full_df = away_team.copy()

In [19]:
full_df.to_excel('training_nfl.xlsx', index=False)


In [20]:
# Create Columns for training
column_train_x = [
   'passing_epa_home','passing_yards_home','rushing_epa_home','rushing_yards_home','receiving_epa_home',
    'passing_epa_def_home','passing_yards_def_home','rushing_epa_def_home','rushing_yards_def_home','receiving_epa_def_home',
       'passing_epa_away','passing_yards_away','rushing_epa_away','rushing_yards_away','receiving_epa_away',
           'passing_epa_def_away','passing_yards_def_away','rushing_epa_def_away','rushing_yards_def_away','receiving_epa_def_away',


]




column_train_y = ['home_score','away_score']

In [23]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# model
# Split data into features (X) and target variable (y)
X = full_df[column_train_x]
y = full_df[column_train_y]

# Encode categorical variables using one-hot encoding if needed

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# Create a Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = mean_squared_error(y_test, predictions, squared=False)
r2 = r2_score(y_test, predictions)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R2): {r2}")


Mean Absolute Error (MAE): 4.235020110481663
Mean Squared Error (MSE): 30.336271832749055
Root Mean Squared Error (RMSE): 5.507229273753882
R-squared (R2): 0.6662738771336939


In [24]:
team_stats_per_game_off.head()

Unnamed: 0,week,team_offense,passing_epa_offense,passing_yards_offense,rushing_epa_offense,rushing_yards_offense,receiving_epa_offense
0,1,ARI,-9.239668,132.0,-11.179296,96.0,-2.79934
1,1,ATL,-4.45045,115.0,-0.595772,130.0,0.718799
2,1,BAL,-4.415663,169.0,0.155152,110.0,7.985488
3,1,BUF,-1.409397,236.0,-3.168568,97.0,5.10084
4,1,CAR,-10.528563,146.0,-8.897445,154.0,-3.103


In [37]:
### Create mean of these stats to create test data
#home team
mean_offense = team_stats_per_game_off.groupby('team_offense').mean()
mean_offense = mean_offense.drop('week', axis=1)
mean_offense = mean_offense.reset_index()

mean_offense.head()

Unnamed: 0,team_offense,passing_epa_offense,passing_yards_offense,rushing_epa_offense,rushing_yards_offense,receiving_epa_offense
0,ARI,-4.41059,202.5,0.188096,141.0,1.39677
1,ATL,-4.607017,233.666672,-3.324703,119.0,0.492473
2,BAL,0.036623,208.833328,-0.536992,144.833328,5.278067
3,BUF,9.227213,262.666656,2.322295,118.166664,10.637033
4,CAR,-7.374483,222.5,-2.244786,98.0,2.970957


In [36]:
#defense team
# Split the DataFrame into non-numerical and numerical columns
mean_defense = team_stats_per_game_def.groupby('team_defense').mean()
# Remove the week column
mean_defense = mean_defense.drop('week', axis=1)
mean_defense = mean_defense.reset_index()

mean_defense.head()


Unnamed: 0,team_defense,passing_epa_def,passing_yards_def,rushing_epa_def,rushing_yards_def,receiving_epa_def
0,ARI,4.453543,266.333344,1.171767,133.333328,10.682318
1,ATL,0.3869,191.166672,-5.158113,99.0,4.457527
2,BAL,-8.648625,197.666672,-3.817931,97.666664,-0.177976
3,BUF,-5.156078,215.333328,-1.881225,133.666672,2.43645
4,CAR,0.209295,214.0,3.305121,144.333328,4.945242


In [38]:
# Merge on defense vs offense per team
test_data_df = pd.merge(mean_defense, mean_offense, left_on='team_defense',right_on='team_offense', how='left')
test_data_df.head()

Unnamed: 0,team_defense,passing_epa_def,passing_yards_def,rushing_epa_def,rushing_yards_def,receiving_epa_def,team_offense,passing_epa_offense,passing_yards_offense,rushing_epa_offense,rushing_yards_offense,receiving_epa_offense
0,ARI,4.453543,266.333344,1.171767,133.333328,10.682318,ARI,-4.41059,202.5,0.188096,141.0,1.39677
1,ATL,0.3869,191.166672,-5.158113,99.0,4.457527,ATL,-4.607017,233.666672,-3.324703,119.0,0.492473
2,BAL,-8.648625,197.666672,-3.817931,97.666664,-0.177976,BAL,0.036623,208.833328,-0.536992,144.833328,5.278067
3,BUF,-5.156078,215.333328,-1.881225,133.666672,2.43645,BUF,9.227213,262.666656,2.322295,118.166664,10.637033
4,CAR,0.209295,214.0,3.305121,144.333328,4.945242,CAR,-7.374483,222.5,-2.244786,98.0,2.970957


In [39]:
# create DF for upcoming week
# create stats to show upcpoming games
grouped = nfl_data.groupby('week')

# Select the group corresponding to week 6
week_6_data = grouped.get_group(7)
get_teams =['home_team','away_team','home_score','away_score']
schedule_week_df = week_6_data[get_teams]
schedule_week_df.reset_index(drop=True, inplace=True)
schedule_week_df.head()

Unnamed: 0,home_team,away_team,home_score,away_score
0,NO,JAX,,
1,BAL,DET,,
2,CHI,LV,,
3,IND,CLE,,
4,NE,BUF,,


In [40]:
# create a copy of team_total_df for home team and away team for testing

# Make the first copy of the original DataFrame
home_team_test = test_data_df.copy()

# Make a second copy from the first copy
away_team_test = home_team_test.copy()

In [41]:
# Add '_home' to each column name
for column in home_team_test.columns:
    new_column_name_test = column + '_home'
    home_team_test.rename(columns={column: new_column_name_test}, inplace=True)
    
home_team_test.head()

Unnamed: 0,team_defense_home,passing_epa_def_home,passing_yards_def_home,rushing_epa_def_home,rushing_yards_def_home,receiving_epa_def_home,team_offense_home,passing_epa_offense_home,passing_yards_offense_home,rushing_epa_offense_home,rushing_yards_offense_home,receiving_epa_offense_home
0,ARI,4.453543,266.333344,1.171767,133.333328,10.682318,ARI,-4.41059,202.5,0.188096,141.0,1.39677
1,ATL,0.3869,191.166672,-5.158113,99.0,4.457527,ATL,-4.607017,233.666672,-3.324703,119.0,0.492473
2,BAL,-8.648625,197.666672,-3.817931,97.666664,-0.177976,BAL,0.036623,208.833328,-0.536992,144.833328,5.278067
3,BUF,-5.156078,215.333328,-1.881225,133.666672,2.43645,BUF,9.227213,262.666656,2.322295,118.166664,10.637033
4,CAR,0.209295,214.0,3.305121,144.333328,4.945242,CAR,-7.374483,222.5,-2.244786,98.0,2.970957


In [42]:
# Add '_away' to each column name
for column in away_team_test.columns:
    new_column_name_test_away = column + '_away'
    away_team_test.rename(columns={column: new_column_name_test_away}, inplace=True)
    
away_team_test.head()

Unnamed: 0,team_defense_away,passing_epa_def_away,passing_yards_def_away,rushing_epa_def_away,rushing_yards_def_away,receiving_epa_def_away,team_offense_away,passing_epa_offense_away,passing_yards_offense_away,rushing_epa_offense_away,rushing_yards_offense_away,receiving_epa_offense_away
0,ARI,4.453543,266.333344,1.171767,133.333328,10.682318,ARI,-4.41059,202.5,0.188096,141.0,1.39677
1,ATL,0.3869,191.166672,-5.158113,99.0,4.457527,ATL,-4.607017,233.666672,-3.324703,119.0,0.492473
2,BAL,-8.648625,197.666672,-3.817931,97.666664,-0.177976,BAL,0.036623,208.833328,-0.536992,144.833328,5.278067
3,BUF,-5.156078,215.333328,-1.881225,133.666672,2.43645,BUF,9.227213,262.666656,2.322295,118.166664,10.637033
4,CAR,0.209295,214.0,3.305121,144.333328,4.945242,CAR,-7.374483,222.5,-2.244786,98.0,2.970957


In [45]:
# Make df with points and stats for testing upcoming week

# merge home team with home stats
testing_home = schedule_week_df.merge(home_team_test, left_on='home_team', right_on='team_offense_home')
#remove duplicates
#testing_home = home_team[(home_team['home_team'] ==  home_team['team_offense_home']) & (home_team['week_x_home'] == home_team['week'])]
testing_home.head(1)

Unnamed: 0,home_team,away_team,home_score,away_score,team_defense_home,passing_epa_def_home,passing_yards_def_home,rushing_epa_def_home,rushing_yards_def_home,receiving_epa_def_home,team_offense_home,passing_epa_offense_home,passing_yards_offense_home,rushing_epa_offense_home,rushing_yards_offense_home,receiving_epa_offense_home
0,NO,JAX,,,NO,-7.62633,197.833328,-2.9569,96.333336,-2.518713,NO,-1.254757,237.5,-4.608746,95.833336,5.215531


In [69]:
# merge away team with home stats
testing_away = testing_home.merge(away_team_test, left_on='away_team', right_on='team_offense_away')
#remove duplicates
#away_team = away_team[(away_team['away_team'] ==  away_team['team_offense_away']) & (away_team['week_x_away'] == away_team['week'])]
testing_away.head(10)

Unnamed: 0,home_team,away_team,home_score,away_score,team_defense_home,passing_epa_def_home,passing_yards_def_home,rushing_epa_def_home,rushing_yards_def_home,receiving_epa_def_home,...,passing_yards_def_away,rushing_epa_def_away,rushing_yards_def_away,receiving_epa_def_away,team_offense_away,passing_epa_offense_away,passing_yards_offense_away,rushing_epa_offense_away,rushing_yards_offense_away,receiving_epa_offense_away
0,NO,JAX,,,NO,-7.62633,197.833328,-2.9569,96.333336,-2.518713,...,281.166656,-3.290178,75.333336,2.495816,JAX,-1.85745,241.166672,-4.086153,113.5,4.676511
1,BAL,DET,,,BAL,-8.648625,197.666672,-3.817931,97.666664,-0.177976,...,242.333328,-2.960432,64.666664,1.104093,DET,7.262212,269.666656,-2.930749,124.166664,9.979189
2,CHI,LV,,,CHI,9.498192,276.166656,-2.79149,89.5,11.845867,...,205.833328,0.768187,121.666664,4.188753,LV,-0.159963,236.5,-5.302732,73.5,6.095745
3,IND,CLE,,,IND,1.267793,265.166656,-2.734393,113.5,8.021579,...,143.800003,-4.404891,79.0,-3.873337,CLE,-11.188498,198.199997,-2.514973,147.0,-5.323103
4,NE,BUF,,,NE,1.666471,217.333328,-4.985253,104.0,7.553789,...,215.333328,-1.881225,133.666672,2.43645,BUF,9.227213,262.666656,2.322295,118.166664,10.637033
5,NYG,WAS,,,NYG,2.9166,221.833328,1.890979,147.5,4.007718,...,261.0,1.204172,129.0,9.049459,WAS,-3.569055,250.0,0.133346,87.833336,4.2215
6,TB,ATL,,,TB,-0.901837,265.0,-3.38691,83.800003,5.091319,...,191.166672,-5.158113,99.0,4.457527,ATL,-4.607017,233.666672,-3.324703,119.0,0.492473
7,LA,PIT,,,LA,-2.014671,219.166672,-0.333838,122.166664,2.89832,...,265.399994,0.003047,143.800003,3.693972,PIT,-6.535837,209.0,-4.810061,80.400002,-1.622306
8,SEA,ARI,,,SEA,-0.09448,281.200012,-2.798033,79.199997,7.170323,...,266.333344,1.171767,133.333328,10.682318,ARI,-4.41059,202.5,0.188096,141.0,1.39677
9,DEN,GB,,,DEN,9.249393,285.833344,1.430092,172.333328,15.101387,...,215.0,0.274906,143.399994,4.303432,GB,-1.999582,216.600006,-0.564284,81.599998,1.228456


In [80]:
# rename df
full_test = testing_away.copy()
#full_test = full_test.reset_index()

full_test = full_test.rename(columns={
    'passing_epa_offense_away': 'passing_epa_away',
    'passing_yards_offense_away': 'passing_yards_away',
    'receiving_epa_offense_away': 'receiving_epa_away',
    'rushing_epa_offense_away': 'rushing_epa_away',
    'rushing_yards_offense_away': 'rushing_yards_away',
     'passing_epa_offense_home': 'passing_epa_home',
    'passing_yards_offense_home': 'passing_yards_home',
    'receiving_epa_offense_home': 'receiving_epa_home',
    'rushing_epa_offense_home': 'rushing_epa_home',
    'rushing_yards_offense_home': 'rushing_yards_home',
})
full_test.dtypes

home_team                  object
away_team                  object
home_score                float64
away_score                float64
team_defense_home          object
passing_epa_def_home      float32
passing_yards_def_home    float32
rushing_epa_def_home      float32
rushing_yards_def_home    float32
receiving_epa_def_home    float32
team_offense_home          object
passing_epa_home          float32
passing_yards_home        float32
rushing_epa_home          float32
rushing_yards_home        float32
receiving_epa_home        float32
team_defense_away          object
passing_epa_def_away      float32
passing_yards_def_away    float32
rushing_epa_def_away      float32
rushing_yards_def_away    float32
receiving_epa_def_away    float32
team_offense_away          object
passing_epa_away          float32
passing_yards_away        float32
rushing_epa_away          float32
rushing_yards_away        float32
receiving_epa_away        float32
dtype: object

In [81]:
column_test_x = ['passing_epa_def_away', 'passing_yards_def_away',
       'rushing_epa_def_away', 'rushing_yards_def_away',
       'receiving_epa_def_away','passing_epa_away', 'passing_yards_away',
    'rushing_epa_away', 'rushing_yards_away','receiving_epa_away',
     'passing_epa_def_home', 'passing_yards_def_home',
       'rushing_epa_def_home', 'rushing_yards_def_home',
       'receiving_epa_def_home','passing_epa_home', 'passing_yards_home',
    'rushing_epa_home', 'rushing_yards_home',
       'receiving_epa_home',            
                ]

In [82]:
# Now, use the trained model to predict rb_yards for df_test
X_test = full_test[column_test_x]
y_pred = model.predict(X_test)

# The predictions are stored in y_pred
print("Predicted Scores:")
print(y_pred)

Predicted Scores:
[[22.781195 17.761145]
 [21.590487 19.379492]
 [21.915148 20.791338]
 [19.204367 18.544151]
 [16.876463 26.17446 ]
 [21.146936 22.420744]
 [21.153624 16.65793 ]
 [26.946722 18.029741]
 [27.130922 18.227472]
 [21.791609 23.566286]
 [27.643904 20.858118]
 [26.435663 26.643957]
 [18.343729 24.795095]]


Feature names must be in the same order as they were in fit.



In [84]:
# add predicted scores into df: 
predict_cols = ['home_team','away_team']
df_predictions = full_test[predict_cols]
scores_df = pd.DataFrame(y_pred, columns=['home_score', 'away_score'])
df_predictions = pd.concat([df_predictions, scores_df], axis=1)
#df_predictions = df_predictions.dropna()
df_predictions.head(35)

Unnamed: 0,home_team,away_team,home_score,away_score
0,NO,JAX,22.781195,17.761145
1,BAL,DET,21.590487,19.379492
2,CHI,LV,21.915148,20.791338
3,IND,CLE,19.204367,18.544151
4,NE,BUF,16.876463,26.174459
5,NYG,WAS,21.146936,22.420744
6,TB,ATL,21.153624,16.65793
7,LA,PIT,26.946722,18.029741
8,SEA,ARI,27.130922,18.227472
9,DEN,GB,21.791609,23.566286
