In [18]:
# Import required libraries.
from sportsipy.nfl.boxscore import Boxscores, Boxscore
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import hvplot.pandas

pd.set_option('display.max_columns', None)

In [19]:
def get_schedule(year):
    weeks = list(range(1,18))
    schedule_df = pd.DataFrame()
    for w in range(len(weeks)):
        date_string = str(weeks[w]) + '-' + str(year)
        week_scores = Boxscores(weeks[w],year)
        week_games_df = pd.DataFrame()
        for g in range(len(week_scores.games[date_string])):
            game = pd.DataFrame(week_scores.games[date_string][g], index = [0])[['away_name', 'away_abbr','home_name', 'home_abbr','winning_name', 'winning_abbr' ]]
            game['week'] = weeks[w]
            week_games_df = pd.concat([week_games_df,game])
        schedule_df = pd.concat([schedule_df, week_games_df]).reset_index().drop(columns = 'index') 
    return schedule_df

def display(y_pred,X_test):
    for g in range(len(y_pred)):
        #win_prob = np.round(y_pred[g],2)
        win_prob = int(y_pred[g] * 100)
        away_team = X_test.reset_index().drop(columns = 'index').loc[g,'away_name']
        home_team = X_test.reset_index().drop(columns = 'index').loc[g,'home_name']
        print(f'The {away_team} have a probability of {win_prob}% of beating the {home_team}.')

In [20]:
df = pd.read_csv(r"C:\Users\ander\OneDrive\Desktop\2021_week_2_through_14.csv")

# Preview dataset
df.tail()

Unnamed: 0,away_name,away_abbr,home_name,home_abbr,week,win_perc_dif,first_downs_dif,fumbles_dif,interceptions_dif,net_pass_yards_dif,pass_attempts_dif,pass_completions_dif,pass_touchdowns_dif,pass_yards_dif,penalties_dif,points_dif,rush_attempts_dif,rush_touchdowns_dif,rush_yards_dif,time_of_possession_dif,times_sacked_dif,total_yards_dif,turnovers_dif,yards_from_penalties_dif,yards_lost_from_sacks_dif,fourth_down_perc_dif,third_down_perc_dif,result,elo_dif,qb_dif
186,New York Giants,nyg,Los Angeles Chargers,sdg,14,-0.25,-4.0,0.666667,-0.083333,-60.083333,-3.333333,-3.5,-1.333333,-58.416667,-1.5,-8.583333,0.583333,-0.333333,-10.166667,-1.25,0.083333,-70.25,0.0,-16.5,1.666667,-0.119048,-0.080808,0.0,-110.229362,-126.445654
187,Detroit Lions,det,Denver Broncos,den,14,-0.409091,-0.916667,0.416667,0.0,-24.833333,1.416667,0.916667,-0.25,-20.333333,1.5,-2.833333,-1.666667,0.0,-8.166667,-165.0,-0.083333,-33.0,0.083333,9.333333,4.5,-0.195971,-0.052985,0.0,-173.770079,-0.964572
188,Buffalo Bills,buf,Tampa Bay Buccaneers,tam,14,-0.166667,-2.0,0.75,0.083333,-51.75,-5.75,-5.0,-0.666667,-49.5,0.333333,-3.416667,3.916667,0.0,24.083333,26.583333,0.333333,-27.666667,0.083333,-0.416667,2.25,-0.25,0.00393,0.0,-44.281063,-16.339024
189,Chicago Bears,chi,Green Bay Packers,gnb,14,-0.416667,-2.166667,0.166667,0.75,-74.583333,-5.25,-4.833333,-1.166667,-63.166667,1.75,-6.833333,2.333333,0.166667,18.416667,-189.166667,1.416667,-56.166667,0.75,10.25,11.416667,-0.088235,-0.075342,0.0,-239.644166,-177.084461
190,Los Angeles Rams,ram,Arizona Cardinals,crd,14,-0.166667,-0.25,-1.25,0.166667,44.416667,6.083333,1.833333,0.666667,38.833333,-1.416667,-0.583333,-6.833333,-0.916667,-26.25,-176.25,-0.75,18.166667,0.25,-14.083333,-5.583333,-0.2,-0.026152,0.0,-61.165557,-20.046215


In [21]:
# Import Logistic Regression & Sklearn modules
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [22]:
# Set prediction week and split dataset workbook between prediction games and completed games.
pred_week = 14
comp_games_df = df[df['week'] < pred_week]
pred_games_df = df[df['week'] == pred_week]

In [23]:
# Rename train & test dataframes. Split features and results.
train_df = comp_games_df
test_df = pred_games_df

X_train = train_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_train = train_df[['result']] 
X_test = test_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_test = test_df[['result']]

In [24]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [25]:
# Logistic Model
clf_unscaled = LogisticRegression(penalty='l1', dual=False, tol=0.001, C=1.0, fit_intercept=True, 
                   intercept_scaling=1, class_weight='balanced', random_state=None, 
                   solver='liblinear', max_iter=1000, multi_class='ovr', verbose=0)

clf_scaled = LogisticRegression(penalty='l1', dual=False, tol=0.001, C=1.0, fit_intercept=True, 
                   intercept_scaling=1, class_weight='balanced', random_state=None, 
                   solver='liblinear', max_iter=1000, multi_class='ovr', verbose=0)

clf_unscaled.fit(X_train, np.ravel(y_train.values))
clf_scaled.fit(X_train_scaled, np.ravel(y_train.values))

y_pred_unscaled = clf_unscaled.predict_proba(X_test)
y_pred_scaled = clf_scaled.predict_proba(X_test_scaled)

y_pred_unscaled = y_pred_unscaled[:,1]
y_pred_scaled = y_pred_scaled[:,1]

print("Logistic Regression - Unscaled\n")
display(y_pred_unscaled,test_df)

print("\nLogistic Regression - Scaled\n")
display(y_pred_scaled,test_df)

Logistic Regression - Unscaled

The Pittsburgh Steelers have a probability of 53% of beating the Minnesota Vikings.
The San Francisco 49ers have a probability of 28% of beating the Cincinnati Bengals.
The Atlanta Falcons have a probability of 56% of beating the Carolina Panthers.
The Dallas Cowboys have a probability of 66% of beating the Washington Football Team.
The Jacksonville Jaguars have a probability of 18% of beating the Tennessee Titans.
The New Orleans Saints have a probability of 43% of beating the New York Jets.
The Las Vegas Raiders have a probability of 14% of beating the Kansas City Chiefs.
The Seattle Seahawks have a probability of 71% of beating the Houston Texans.
The Baltimore Ravens have a probability of 69% of beating the Cleveland Browns.
The New York Giants have a probability of 31% of beating the Los Angeles Chargers.
The Detroit Lions have a probability of 22% of beating the Denver Broncos.
The Buffalo Bills have a probability of 40% of beating the Tampa Bay Bu