In [20]:
#!pip install scikit-optimize
#!pip install eli5
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mpl_toolkits
from collections import deque
from functools import reduce
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn import ensemble
from sklearn.metrics import roc_curve, auc
import scipy.stats as st
import math
import statistics

from sklearn.model_selection import GridSearchCV

%matplotlib inline

In [2]:
# Get data
global_df = pd.read_csv("nfl_games_and_bets.csv")
global_df = global_df.drop(global_df[global_df.schedule_season == 2021].index)
global_df = global_df.drop(columns=['stadium','weather_temperature', 'weather_wind_mph','weather_humidity','weather_detail'])
global_df = global_df.drop(global_df[global_df.schedule_season < 2000].index)

# Account for team moves
old_to_new_team_name = {"San Diego Chargers": "Los Angeles Chargers", "St. Louis Rams": "Los Angeles Rams", \
"Washington Redskins" : "Washington Football Team", "Oakland Raiders": "Las Vegas Raiders"}
global_df = global_df.replace({"team_away": old_to_new_team_name}).replace({"team_home": old_to_new_team_name})

# Maintain consistency between favourite and team name columns
short_form_to_team_name = {"GB": "Green Bay Packers", "HOU": "Houston Texans", "KC": "Kansas City Chiefs", "BUF": "Buffalo Bills", \
 "TEN": "Tennessee Titans", "NO": "New Orleans Saints", "SEA": "Seattle Seahawks", "MIN": "Minnesota Vikings", \
 "TB": "Tampa Bay Buccaneers", "LVR": "Las Vegas Raiders", "BAL": "Baltimore Ravens", "LAC": "Los Angeles Chargers", \
 "IND": "Indianapolis Colts", "DET": "Detroit Lions", "CLE": "Cleveland Browns", "JAX": "Jacksonville Jaguars", "MIA": "Miami Dolphins", \
 "ARI": "Arizona Cardinals", "PIT": "Pittsburgh Steelers", "CHI": "Chicago Bears","ATL": "Atlanta Falcons", "CAR": "Carolina Panthers", \
 "LAR": "Los Angeles Rams", "CIN": "Cincinnati Bengals", "DAL": "Dallas Cowboys", "SF": "San Francisco 49ers", "NYG": "New York Giants", \
 "WAS": "Washington Football Team", "DEN": "Denver Broncos", "PHI": "Philadelphia Eagles", "NYJ": "New York Jets", "NE": "New England Patriots"}
team_name_to_short_form = {value: key for key, value in short_form_to_team_name.items()}

global_df = global_df.replace({'team_away': team_name_to_short_form}).replace({"team_home": team_name_to_short_form})

# Note: 'PICK' when spread == 0
# Determine if teams are within the same division

# AFC = A, NFC = N
# West = W, etc etc
team_to_division = {"ARI": "NW", "LAR": "NW", "SF": "NW", "SEA": "NW", "CAR": "NS", "TB": "NS", "NO": "NS", "ATL": "NS", \
 "GB": "NN", "CHI": "NN", "MIN": "NN", "DET": "NN", "WAS": "NE", "DAL": "NE", "PHI": "NE", "NYG": "NE", \
 "TEN": "AS", "HOU": "AS", "IND": "AS", "JAX": "AS", "BUF": "AE", "MIA": "AE", "NE": "AE", "NYJ": "AE", \
 "BAL": "AN", "PIT": "AN", "CLE": "AN", "CIN": "AN", "LVR": "AW", "DEN": "AW", "KC": "AW", "LAC": "AW"}

global_df2 = global_df
global_df2['home_division'] = global_df2.apply(lambda row: team_to_division[row.team_home], axis=1)
global_df2['away_division'] = global_df2.apply(lambda row: team_to_division[row.team_away], axis=1)
global_df2['intra_division'] = global_df2.apply(lambda row: row.home_division == row.away_division, axis=1)
global_df2 = global_df2.drop(columns=['home_division', 'away_division'])

# Create auxillary columns to make calculations easier
global_df3 = global_df2
global_df3['home_point_diff'] = global_df2.apply(lambda row: row.score_home - row.score_away, axis=1)
global_df3['away_point_diff'] = global_df3.apply(lambda row: row.score_away - row.score_home, axis=1)
global_df3['home_spread'] = global_df3.apply(lambda row: row.spread_favorite * -1 if row.team_favorite_id == row.team_away else row.spread_favorite, axis=1)

# Loop, sorry pandas 
team_to_games = {}

# Get last one result
for index, row in global_df3.iterrows():

    # Update the mapping
    if row.team_home not in team_to_games:
        team_to_games.update({row.team_home : deque([0,0,0])})

    if row.team_away not in team_to_games:
        team_to_games.update({row.team_away : deque([0,0,0])})

    last_games = team_to_games.get(row.team_home)
    home_last_3 = last_games[0] + last_games[1] + last_games[2]
    home_last_1 = last_games[0]
    last_games.pop()
    last_games.appendleft(row.home_point_diff)

    last_games = team_to_games.get(row.team_away)
    away_last_3 = last_games[0] + last_games[1] + last_games[2]
    away_last_1 = last_games[0]
    last_games.pop()
    last_games.appendleft(row.away_point_diff)

    global_df3.at[index, 'home_last_3'] = home_last_3
    global_df3.at[index, 'away_last_3'] = away_last_3
    global_df3.at[index, 'home_last_1'] = home_last_1
    global_df3.at[index, 'away_last_1'] = away_last_1

global_df_final = global_df3

global_df_final




Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium_neutral,intra_division,home_point_diff,away_point_diff,home_spread,home_last_3,away_last_3,home_last_1,away_last_1
4853,9/3/2000,2000,1,False,ATL,36.0,28.0,SF,ATL,-6.5,46.5,False,False,8.0,-8.0,-6.5,0.0,0.0,0.0,0.0
4854,9/3/2000,2000,1,False,BUF,16.0,13.0,TEN,BUF,-1.0,40.0,False,False,3.0,-3.0,-1.0,0.0,0.0,0.0,0.0
4855,9/3/2000,2000,1,False,CLE,7.0,27.0,JAX,JAX,-10.5,38.5,False,False,-20.0,20.0,10.5,0.0,0.0,0.0,0.0
4856,9/3/2000,2000,1,False,DAL,14.0,41.0,PHI,DAL,-6.0,39.5,False,True,-27.0,27.0,-6.0,0.0,0.0,0.0,0.0
4857,9/3/2000,2000,1,False,GB,16.0,20.0,NYJ,GB,-2.5,44.0,False,False,-4.0,4.0,-2.5,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10441,1/17/2021,2020,Division,True,KC,22.0,17.0,CLE,KC,-8.0,56.0,False,False,5.0,-5.0,-8.0,-11.0,6.0,-17.0,11.0
10442,1/17/2021,2020,Division,True,NO,20.0,30.0,TB,NO,-2.5,53.0,False,True,-10.0,10.0,-2.5,57.0,65.0,12.0,8.0
10443,1/24/2021,2020,Conference,True,GB,26.0,31.0,TB,GB,-3.0,53.0,False,False,-5.0,5.0,-3.0,59.0,35.0,14.0,10.0
10444,1/24/2021,2020,Conference,True,KC,38.0,24.0,BUF,KC,-3.0,55.0,False,False,14.0,-14.0,-3.0,-9.0,47.0,5.0,14.0


In [88]:
def print_stats(money, won, loss, push, sample_set):
    std_dev = statistics.stdev(sample_set)
    predicted_std_dev = std_dev / math.sqrt(len(sample_set))
    mean = statistics.mean(sample_set)
    # expected mean should be 0.5, if we have 50% win/loss
    z_score = (mean-0.5) / predicted_std_dev
    confidence = st.norm.cdf(abs(z_score))
    win_percent = won / (won+loss) * 100

    print ("Ending money is %d" %money)
    print ("Record: %d-%d-%d" %(won, loss, push))
    print ("Win Percent: %f" %win_percent)
    print ("Sample size %d, STD %f, mean %f" %(len(sample_set), std_dev, mean))
    print ("Z-Score %f" %z_score)
    print ("Confidence %f" %confidence)


def print_basic_stats(money, won, loss, push, sample_set):
    std_dev = statistics.stdev(sample_set)
    predicted_std_dev = std_dev / math.sqrt(len(sample_set))
    mean = statistics.mean(sample_set)
    # expected mean should be 0.5, if we have 50% win/loss
    z_score = (mean-0.5) / predicted_std_dev
    confidence = st.norm.cdf(abs(z_score))
    win_percent = won / (won+loss) * 100
    print ("Ending money is %d" %money)
    print ("Record: %d-%d-%d" %(won, loss, push))
    print ("Win Percent: %f" %win_percent)
    print ("Confidence %f" %confidence)


def bet_home(money, won, loss, push, sample_set, year_to_record):
    if row.away_point_diff - row.home_spread > 0:
        money = money - 300
        loss += 1
        sample_set.append(0)
        year_to_record.update({row.schedule_season: year_to_record.get(row.schedule_season, 0) -1 })
    elif row.away_point_diff - row.home_spread == 0:
        push +=1
    else:
        money = money + 273
        won += 1
        sample_set.append(1)
        year_to_record.update({row.schedule_season: year_to_record.get(row.schedule_season, 0) +1 })
    return money, won, loss, push, sample_set, year_to_record


def bet_away(money, won, loss, push, sample_set, year_to_record):
    if row.away_point_diff - row.home_spread > 0:
        money = money + 273
        won += 1
        sample_set.append(1)
        year_to_record.update({row.schedule_season: year_to_record.get(row.schedule_season, 0) +1 })
    elif row.away_point_diff - row.home_spread == 0:
        push +=1
    else:
        money = money - 300
        loss += 1
        sample_set.append(0)
        year_to_record.update({row.schedule_season: year_to_record.get(row.schedule_season, 0) -1 })
    return money, won, loss, push, sample_set, year_to_record

def key_number_away(number):
    return number == -2.5 or number == -3 or number == -6.5 or number == -7

def set_vars():
    money = 10000
    won = 0
    loss = 0
    push = 0
    sample_set = []
    year_to_record = {}
    return money, won, loss, push, sample_set, year_to_record




In [91]:
money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.away_last_1 <= -14 and row.away_last_1 >=-18 and row.intra_division and row.home_spread < 0): #bet on road underdog 
        money, won, loss, push, sample_set, year_to_record = bet_away(money, won, loss, push, sample_set, year_to_record)

print("\nPicking the road division underdog when they most recently lost between 14 and 18 points")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)

#-------------------------------------

money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.away_last_1 <= -14 and row.away_last_1 >=-18 and row.home_spread < 0): #bet on road underdog 
        money, won, loss, push, sample_set, year_to_record = bet_away(money, won, loss, push, sample_set, year_to_record)

print("\nPicking the road underdog when they most recently lost between 14 and 18 points")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)

#-------------------------------------

money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.home_spread < 0): #bet on road underdog 
        money, won, loss, push, sample_set, year_to_record = bet_away(money, won, loss, push, sample_set, year_to_record)
print("\nPicking the road underdog")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)

#-------------------------------------

money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.home_spread > 0):
        money, won, loss, push, sample_set, year_to_record = bet_home(money, won, loss, push, sample_set, year_to_record)
    
print("\nPicking the home underdog")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)

#-------------------------------------

money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.home_spread < 0):
        money, won, loss, push, sample_set, year_to_record = bet_home(money, won, loss, push, sample_set, year_to_record)
    
print("\nPicking the home favourite")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)

#-------------------------------------

money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.home_spread > 0 and row.home_last_1 <= -14 and row.home_last_1 >= -18):
        money, won, loss, push, sample_set, year_to_record = bet_home(money, won, loss, push, sample_set, year_to_record)
    
print("\nPicking the home underdog when they most recently lost between 14 and 18 points")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)

#-------------------------------------

money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.home_spread > 0 and row.home_last_1 <= -14 and row.home_last_1 >= -18 and row.intra_division):
        money, won, loss, push, sample_set, year_to_record = bet_home(money, won, loss, push, sample_set, year_to_record)
    
print("\nPicking the home division underdog when they most recently lost between 14 and 18 points")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)

#-------------------------------------

money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.home_spread <= -7): #bet on road underdog 
        money, won, loss, push, sample_set, year_to_record = bet_away(money, won, loss, push, sample_set, year_to_record)
print("\nPicking the road underdog when the spread is >7")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)

#-------------------------------------

money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.home_spread <= -14): #bet on road underdog 
        money, won, loss, push, sample_set, year_to_record = bet_away(money, won, loss, push, sample_set, year_to_record)
print("\nPicking the road underdog when the spread is >14")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)

#-------------------------------------

money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.away_last_3 <= -7 and row.intra_division): #bet on road underdog 
        money, won, loss, push, sample_set, year_to_record = bet_away(money, won, loss, push, sample_set, year_to_record)
print("\nPicking the division road underdog when they've lost by at least one touchdown in the last 3 games")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)

#-------------------------------------

money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.home_spread < 0 and row.over_under_line <= 42):
        money, won, loss, push, sample_set, year_to_record = bet_away(money, won, loss, push, sample_set, year_to_record)
print("\nPicking the road underdog when the OU is less than or equal to 42")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)

#-------------------------------------

money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.over_under_line <= 42):
        money, won, loss, push, sample_set, year_to_record = bet_away(money, won, loss, push, sample_set, year_to_record)
print("\nPicking the road team when the OU is less than or equal to 42")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)


#-------------------------------------

money, won, loss, push, sample_set, year_to_record = set_vars()

for row in global_df_final.itertuples():

    if (row.home_spread < 0 and not key_number_away(row.home_spread)):
        money, won, loss, push, sample_set, year_to_record = bet_away(money, won, loss, push, sample_set, year_to_record)
print("\nPicking the road underdog on non-key numbers")
print_basic_stats(money, won, loss, push, sample_set)
print(year_to_record)


Picking the road division underdog when they most recently lost between 14 and 18 points
Ending money is 17083
Record: 71-41-4
Win Percent: 63.392857
Confidence 0.998300
{2000: 1, 2001: -1, 2002: 3, 2003: 1, 2004: -1, 2005: -2, 2006: 3, 2007: 0, 2008: 0, 2009: 3, 2010: 4, 2011: 4, 2012: 3, 2013: 0, 2014: 3, 2015: 0, 2016: 0, 2017: 2, 2018: -2, 2019: 4, 2020: 5}

Picking the road underdog when they most recently lost between 14 and 18 points
Ending money is 20302
Record: 174-124-11
Win Percent: 58.389262
Confidence 0.998322
{2000: 10, 2001: -5, 2002: 5, 2003: -2, 2004: 4, 2005: -4, 2006: 6, 2007: 1, 2008: 1, 2009: 2, 2010: 7, 2011: 1, 2012: 7, 2013: 2, 2014: -1, 2015: 7, 2016: -3, 2017: 4, 2018: 1, 2019: 6, 2020: 1}

Picking the road underdog
Ending money is 3145
Record: 1865-1720-106
Win Percent: 52.022315
Confidence 0.992311
{2000: 11, 2001: 7, 2002: 14, 2003: 0, 2004: 8, 2005: -22, 2006: 18, 2007: -4, 2008: 14, 2009: 11, 2010: 7, 2011: 7, 2012: 11, 2013: -7, 2014: 8, 2015: 20, 2016:

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=f0efbe77-01fa-4860-b5ee-e7eac30d44e8' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>