In [1]:
# Import required libraries.
from sportsipy.nfl.boxscore import Boxscores, Boxscore
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import hvplot.pandas

pd.set_option('display.max_columns', None)

In [2]:
def get_schedule(year):
    weeks = list(range(1,18))
    schedule_df = pd.DataFrame()
    for w in range(len(weeks)):
        date_string = str(weeks[w]) + '-' + str(year)
        week_scores = Boxscores(weeks[w],year)
        week_games_df = pd.DataFrame()
        for g in range(len(week_scores.games[date_string])):
            game = pd.DataFrame(week_scores.games[date_string][g], index = [0])[['away_name', 'away_abbr','home_name', 'home_abbr','winning_name', 'winning_abbr' ]]
            game['week'] = weeks[w]
            week_games_df = pd.concat([week_games_df,game])
        schedule_df = pd.concat([schedule_df, week_games_df]).reset_index().drop(columns = 'index') 
    return schedule_df

def display(y_pred,X_test):
    for g in range(len(y_pred)):
        #win_prob = np.round(y_pred[g],2)
        win_prob = int(y_pred[g] * 100)
        away_team = X_test.reset_index().drop(columns = 'index').loc[g,'away_name']
        home_team = X_test.reset_index().drop(columns = 'index').loc[g,'home_name']
        print(f'The {away_team} have a probability of {win_prob}% of beating the {home_team}.')

In [3]:
df = pd.read_csv(r"C:\Users\ander\OneDrive\Desktop\2021_week_2_through_14.csv")

# Preview dataset
df.tail()

Unnamed: 0,away_name,away_abbr,home_name,home_abbr,week,win_perc_dif,first_downs_dif,fumbles_dif,interceptions_dif,net_pass_yards_dif,pass_attempts_dif,pass_completions_dif,pass_touchdowns_dif,pass_yards_dif,penalties_dif,points_dif,rush_attempts_dif,rush_touchdowns_dif,rush_yards_dif,time_of_possession_dif,times_sacked_dif,total_yards_dif,turnovers_dif,yards_from_penalties_dif,yards_lost_from_sacks_dif,fourth_down_perc_dif,third_down_perc_dif,result,elo_dif,qb_dif
186,New York Giants,nyg,Los Angeles Chargers,sdg,14,-0.25,-4.0,0.666667,-0.083333,-60.083333,-3.333333,-3.5,-1.333333,-58.416667,-1.5,-8.583333,0.583333,-0.333333,-10.166667,-1.25,0.083333,-70.25,0.0,-16.5,1.666667,-0.119048,-0.080808,0.0,-110.229362,-126.445654
187,Detroit Lions,det,Denver Broncos,den,14,-0.409091,-0.916667,0.416667,0.0,-24.833333,1.416667,0.916667,-0.25,-20.333333,1.5,-2.833333,-1.666667,0.0,-8.166667,-165.0,-0.083333,-33.0,0.083333,9.333333,4.5,-0.195971,-0.052985,0.0,-173.770079,-0.964572
188,Buffalo Bills,buf,Tampa Bay Buccaneers,tam,14,-0.166667,-2.0,0.75,0.083333,-51.75,-5.75,-5.0,-0.666667,-49.5,0.333333,-3.416667,3.916667,0.0,24.083333,26.583333,0.333333,-27.666667,0.083333,-0.416667,2.25,-0.25,0.00393,0.0,-44.281063,-16.339024
189,Chicago Bears,chi,Green Bay Packers,gnb,14,-0.416667,-2.166667,0.166667,0.75,-74.583333,-5.25,-4.833333,-1.166667,-63.166667,1.75,-6.833333,2.333333,0.166667,18.416667,-189.166667,1.416667,-56.166667,0.75,10.25,11.416667,-0.088235,-0.075342,0.0,-239.644166,-177.084461
190,Los Angeles Rams,ram,Arizona Cardinals,crd,14,-0.166667,-0.25,-1.25,0.166667,44.416667,6.083333,1.833333,0.666667,38.833333,-1.416667,-0.583333,-6.833333,-0.916667,-26.25,-176.25,-0.75,18.166667,0.25,-14.083333,-5.583333,-0.2,-0.026152,0.0,-61.165557,-20.046215


In [4]:
# Import Logistic Regression & Sklearn modules
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [5]:
# Set prediction week and split dataset workbook between prediction games and completed games.
pred_week = 5
comp_games_df = df[df['week'] < pred_week]
pred_games_df = df[df['week'] == pred_week]

In [6]:
# Rename train & test dataframes. Split features and results.
train_df = comp_games_df
test_df = pred_games_df

X_train = train_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_train = train_df[['result']] 
X_test = test_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_test = test_df[['result']]

In [7]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
# Logistic Model
clf_unscaled = LogisticRegression(penalty='l1', dual=False, tol=0.001, C=1.0, fit_intercept=True, 
                   intercept_scaling=1, class_weight='balanced', random_state=None, 
                   solver='liblinear', max_iter=1000, multi_class='ovr', verbose=0)

clf_scaled = LogisticRegression(penalty='l1', dual=False, tol=0.001, C=1.0, fit_intercept=True, 
                   intercept_scaling=1, class_weight='balanced', random_state=None, 
                   solver='liblinear', max_iter=1000, multi_class='ovr', verbose=0)

clf_unscaled.fit(X_train, np.ravel(y_train.values))
clf_scaled.fit(X_train_scaled, np.ravel(y_train.values))

y_pred_unscaled = clf_unscaled.predict_proba(X_test)
y_pred_scaled = clf_scaled.predict_proba(X_test_scaled)

y_pred_unscaled = y_pred_unscaled[:,1]
y_pred_scaled = y_pred_scaled[:,1]

print("Logistic Regression - Unscaled\n")
display(y_pred_unscaled,test_df)

print("\nLogistic Regression - Scaled\n")
display(y_pred_scaled,test_df)

Logistic Regression - Unscaled

The Los Angeles Rams have a probability of 13% of beating the Seattle Seahawks.
The New York Jets have a probability of 97% of beating the Atlanta Falcons.
The Philadelphia Eagles have a probability of 58% of beating the Carolina Panthers.
The New Orleans Saints have a probability of 78% of beating the Washington Football Team.
The Miami Dolphins have a probability of 15% of beating the Tampa Bay Buccaneers.
The Denver Broncos have a probability of 95% of beating the Pittsburgh Steelers.
The Detroit Lions have a probability of 77% of beating the Minnesota Vikings.
The Tennessee Titans have a probability of 67% of beating the Jacksonville Jaguars.
The New England Patriots have a probability of 92% of beating the Houston Texans.
The Green Bay Packers have a probability of 6% of beating the Cincinnati Bengals.
The Cleveland Browns have a probability of 93% of beating the Los Angeles Chargers.
The Chicago Bears have a probability of 22% of beating the Las Ve

In [9]:

# import tensorflow libraries
import tensorflow as tf
from keras.layers import Dense
from keras.models import Sequential
from sklearn.model_selection import train_test_split

In [10]:
pred_week = 14
comp_games_df = df[df['week'] < pred_week]
pred_games_df = df[df['week'] == pred_week]

In [11]:
train_df = comp_games_df
test_df = pred_games_df

X_train = train_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_train = train_df[['result']] 
X_test = test_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_test = test_df[['result']]

In [12]:
X_train

Unnamed: 0,win_perc_dif,first_downs_dif,fumbles_dif,interceptions_dif,net_pass_yards_dif,pass_attempts_dif,pass_completions_dif,pass_touchdowns_dif,pass_yards_dif,penalties_dif,points_dif,rush_attempts_dif,rush_touchdowns_dif,rush_yards_dif,time_of_possession_dif,times_sacked_dif,total_yards_dif,turnovers_dif,yards_from_penalties_dif,yards_lost_from_sacks_dif,fourth_down_perc_dif,third_down_perc_dif,elo_dif,qb_dif
0,0.000000,4.000000,-2.000000,0.000000,121.000000,16.000000,8.000000,0.000000,132.000000,-5.000000,-3.000000,-7.000000,1.000000,-66.000000,55.000000,1.000000,55.000000,0.000000,-18.000000,11.000000,-0.666667,0.200000,-50.848658,100.316973
1,1.000000,-4.000000,-1.000000,-1.000000,29.000000,-13.000000,-9.000000,2.000000,45.000000,0.000000,13.000000,10.000000,-1.000000,15.000000,-23.000000,2.000000,44.000000,-2.000000,-20.000000,16.000000,0.666667,-0.240260,-51.742731,34.616025
2,0.000000,4.000000,-1.000000,0.000000,-119.000000,-14.000000,-9.000000,4.000000,-128.000000,0.000000,19.000000,12.000000,-1.000000,60.000000,196.000000,-1.000000,-59.000000,-1.000000,-2.000000,-9.000000,1.000000,0.214286,204.627654,54.983874
3,0.000000,10.000000,0.000000,1.000000,232.000000,24.000000,16.000000,1.000000,247.000000,5.000000,10.000000,0.000000,2.000000,7.000000,445.000000,1.000000,239.000000,1.000000,30.000000,15.000000,0.000000,0.133333,-62.282436,-14.386838
4,0.000000,-3.000000,0.000000,0.000000,50.000000,-9.000000,-9.000000,-1.000000,55.000000,-7.000000,9.000000,-3.000000,1.000000,-42.000000,-270.000000,0.000000,8.000000,2.000000,-10.000000,5.000000,0.000000,-0.128205,36.874709,-13.318243
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,-0.090909,1.454545,0.272727,0.090909,-72.181818,-4.000000,-2.818182,-0.090909,-71.636364,-2.454545,-2.727273,5.818182,-0.181818,36.454545,81.818182,-0.090909,-35.727273,0.363636,-21.000000,0.545455,0.058462,0.048097,15.185078,-68.820114
173,-0.454545,-2.636364,0.727273,0.000000,-81.181818,-0.545455,-3.181818,-1.636364,-83.727273,2.272727,-11.454545,-0.727273,0.272727,19.909091,-60.545455,0.272727,-61.272727,0.454545,13.909091,-2.545455,0.121429,-0.065264,-285.686464,-102.890676
174,0.227273,4.909091,-0.090909,0.545455,4.363636,-4.272727,-2.545455,0.000000,9.636364,-0.636364,3.545455,9.090909,0.454545,63.636364,258.090909,1.090909,68.000000,0.272727,5.000000,5.272727,0.384615,-0.060289,107.685364,27.419825
175,-0.090909,-5.454545,-1.000000,-0.363636,-66.727273,-9.000000,-5.181818,-0.909091,-58.272727,-1.272727,-4.818182,0.909091,0.090909,2.818182,32.272727,1.000000,-63.909091,-0.909091,-9.000000,8.454545,0.030303,-0.138340,-173.767258,-78.641705


In [13]:
number_input_features = X_train.shape[1]

number_output_neurons = 1

hidden_nodes_layer1 =  (number_input_features + 1) // 2
hidden_nodes_layer2 = (hidden_nodes_layer1 + 1) // 2
hidden_nodes_layer3 = (hidden_nodes_layer2 + 1) // 2

nn = Sequential()

nn.add(Dense(units=hidden_nodes_layer1, activation='relu', input_dim=number_input_features))
nn.add(Dense(units=hidden_nodes_layer2, activation='relu'))
nn.add(Dense(units=hidden_nodes_layer3, activation='relu'))

nn.add(Dense(units=number_output_neurons, activation='sigmoid'))

nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 12)                300       
                                                                 
 dense_1 (Dense)             (None, 6)                 78        
                                                                 
 dense_2 (Dense)             (None, 3)                 21        
                                                                 
 dense_3 (Dense)             (None, 1)                 4         
                                                                 
Total params: 403
Trainable params: 403
Non-trainable params: 0
_________________________________________________________________


In [14]:
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit the model using 50 epochs and the training data
fit_model = nn.fit(X_train, y_train, epochs=500, verbose=0)

In [15]:
pred_week = 5
comp_games_df = df[df['week'] < pred_week]
pred_games_df = df[df['week'] == pred_week]

In [16]:
train_df = comp_games_df
test_df = pred_games_df

X_train = train_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_train = train_df[['result']] 
X_test = test_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_test = test_df[['result']]

In [17]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

rf_model = RandomForestClassifier(n_estimators=100, random_state=1)

# Fitting the model
rf_fitted = rf_model.fit(X_train, np.ravel(y_train.values))

y_pred = rf_fitted.predict_proba(X_test)
y_pred = y_pred[:,1]
y = rf_fitted.predict(X_test)

display(y_pred, test_df)

The Los Angeles Rams have a probability of 65% of beating the Seattle Seahawks.
The New York Jets have a probability of 38% of beating the Atlanta Falcons.
The Philadelphia Eagles have a probability of 44% of beating the Carolina Panthers.
The New Orleans Saints have a probability of 46% of beating the Washington Football Team.
The Miami Dolphins have a probability of 30% of beating the Tampa Bay Buccaneers.
The Denver Broncos have a probability of 61% of beating the Pittsburgh Steelers.
The Detroit Lions have a probability of 42% of beating the Minnesota Vikings.
The Tennessee Titans have a probability of 72% of beating the Jacksonville Jaguars.
The New England Patriots have a probability of 74% of beating the Houston Texans.
The Green Bay Packers have a probability of 64% of beating the Cincinnati Bengals.
The Cleveland Browns have a probability of 53% of beating the Los Angeles Chargers.
The Chicago Bears have a probability of 33% of beating the Las Vegas Raiders.
The New York Giant