In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import os

In [2]:
nfl_df = pd.read_csv("raw_data/spreadspoke_scores.csv")

In [3]:
nfl_df.dtypes

schedule_date           object
schedule_season          int64
schedule_week           object
schedule_playoff          bool
team_home               object
score_home             float64
score_away             float64
team_away               object
team_favorite_id        object
spread_favorite        float64
over_under_line         object
stadium                 object
stadium_neutral           bool
weather_temperature    float64
weather_wind_mph       float64
weather_humidity        object
weather_detail          object
dtype: object

In [4]:
nfl_df.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail
0,9/2/1966,1966,1,False,Miami Dolphins,14.0,23.0,Oakland Raiders,,,,Orange Bowl,False,83.0,6.0,71,
1,9/3/1966,1966,1,False,Houston Oilers,45.0,7.0,Denver Broncos,,,,Rice Stadium,False,81.0,7.0,70,
2,9/4/1966,1966,1,False,San Diego Chargers,27.0,7.0,Buffalo Bills,,,,Balboa Stadium,False,70.0,7.0,82,
3,9/9/1966,1966,2,False,Miami Dolphins,14.0,19.0,New York Jets,,,,Orange Bowl,False,82.0,11.0,78,
4,9/10/1966,1966,1,False,Green Bay Packers,24.0,3.0,Baltimore Colts,,,,Lambeau Field,False,64.0,8.0,62,


In [5]:
nfl_df.isnull().sum()

schedule_date              0
schedule_season            0
schedule_week              0
schedule_playoff           0
team_home                  0
score_home               137
score_away               137
team_away                  0
team_favorite_id        2616
spread_favorite         2616
over_under_line         2626
stadium                    0
stadium_neutral            0
weather_temperature      926
weather_wind_mph         926
weather_humidity        4546
weather_detail         10223
dtype: int64

In [6]:
#drop rows that don't have any scores
nfl_df_drop_scores = nfl_df.dropna(subset = ['score_home'])

In [7]:
nfl_df_drop_scores

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail
0,9/2/1966,1966,1,False,Miami Dolphins,14.0,23.0,Oakland Raiders,,,,Orange Bowl,False,83.0,6.0,71,
1,9/3/1966,1966,1,False,Houston Oilers,45.0,7.0,Denver Broncos,,,,Rice Stadium,False,81.0,7.0,70,
2,9/4/1966,1966,1,False,San Diego Chargers,27.0,7.0,Buffalo Bills,,,,Balboa Stadium,False,70.0,7.0,82,
3,9/9/1966,1966,2,False,Miami Dolphins,14.0,19.0,New York Jets,,,,Orange Bowl,False,82.0,11.0,78,
4,9/10/1966,1966,1,False,Green Bay Packers,24.0,3.0,Baltimore Colts,,,,Lambeau Field,False,64.0,8.0,62,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12792,11/1/2020,2020,8,False,Kansas City Chiefs,35.0,9.0,New York Jets,KC,-19.5,49,Arrowhead Stadium,False,,,,
12793,11/1/2020,2020,8,False,Miami Dolphins,28.0,17.0,Los Angeles Rams,LAR,-3.5,45.5,Hard Rock Stadium,False,,,,
12794,11/1/2020,2020,8,False,Philadelphia Eagles,23.0,9.0,Dallas Cowboys,PHI,-11.5,42.5,Lincoln Financial Field,False,,,,
12795,11/1/2020,2020,8,False,Seattle Seahawks,37.0,27.0,San Francisco 49ers,SEA,-3.0,53.5,CenturyLink Field,False,,,,


In [8]:
nfl_df_drop_scores.isnull().sum()

schedule_date              0
schedule_season            0
schedule_week              0
schedule_playoff           0
team_home                  0
score_home                 0
score_away                 0
team_away                  0
team_favorite_id        2479
spread_favorite         2479
over_under_line         2489
stadium                    0
stadium_neutral            0
weather_temperature      825
weather_wind_mph         825
weather_humidity        4409
weather_detail         10122
dtype: int64

In [9]:
#drop rows that don't have any spread_favorite
nfl_df_drop_spread = nfl_df_drop_scores.dropna(subset = ['spread_favorite'])

In [10]:
nfl_df_drop_spread

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail
170,1/15/1967,1966,Superbowl,True,Green Bay Packers,35.0,10.0,Kansas City Chiefs,GB,-14.0,,Los Angeles Memorial Coliseum,True,54.0,7.0,90,
350,1/14/1968,1967,Superbowl,True,Green Bay Packers,33.0,14.0,Oakland Raiders,GB,-13.5,43,Orange Bowl,True,60.0,12.0,74,
538,1/12/1969,1968,Superbowl,True,Baltimore Colts,7.0,16.0,New York Jets,IND,-18.0,40,Orange Bowl,True,66.0,12.0,80,
727,1/11/1970,1969,Superbowl,True,Kansas City Chiefs,23.0,7.0,Minnesota Vikings,MIN,-12.0,39,Tulane Stadium,True,55.0,14.0,84,
916,1/17/1971,1970,Superbowl,True,Baltimore Colts,16.0,13.0,Dallas Cowboys,IND,-2.5,36,Orange Bowl,True,59.0,11.0,60,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12792,11/1/2020,2020,8,False,Kansas City Chiefs,35.0,9.0,New York Jets,KC,-19.5,49,Arrowhead Stadium,False,,,,
12793,11/1/2020,2020,8,False,Miami Dolphins,28.0,17.0,Los Angeles Rams,LAR,-3.5,45.5,Hard Rock Stadium,False,,,,
12794,11/1/2020,2020,8,False,Philadelphia Eagles,23.0,9.0,Dallas Cowboys,PHI,-11.5,42.5,Lincoln Financial Field,False,,,,
12795,11/1/2020,2020,8,False,Seattle Seahawks,37.0,27.0,San Francisco 49ers,SEA,-3.0,53.5,CenturyLink Field,False,,,,


In [11]:
nfl_df_drop_spread.isnull().sum()

schedule_date             0
schedule_season           0
schedule_week             0
schedule_playoff          0
team_home                 0
score_home                0
score_away                0
team_away                 0
team_favorite_id          0
spread_favorite           0
over_under_line          10
stadium                   0
stadium_neutral           0
weather_temperature     595
weather_wind_mph        595
weather_humidity       4019
weather_detail         7801
dtype: int64

In [12]:
#drop remaining over_under_line
nfl_df_drop_over_under = nfl_df_drop_spread.dropna(subset = ['over_under_line'])

In [13]:
nfl_df_drop_over_under

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail
350,1/14/1968,1967,Superbowl,True,Green Bay Packers,33.0,14.0,Oakland Raiders,GB,-13.5,43,Orange Bowl,True,60.0,12.0,74,
538,1/12/1969,1968,Superbowl,True,Baltimore Colts,7.0,16.0,New York Jets,IND,-18.0,40,Orange Bowl,True,66.0,12.0,80,
727,1/11/1970,1969,Superbowl,True,Kansas City Chiefs,23.0,7.0,Minnesota Vikings,MIN,-12.0,39,Tulane Stadium,True,55.0,14.0,84,
916,1/17/1971,1970,Superbowl,True,Baltimore Colts,16.0,13.0,Dallas Cowboys,IND,-2.5,36,Orange Bowl,True,59.0,11.0,60,
1105,1/16/1972,1971,Superbowl,True,Dallas Cowboys,24.0,3.0,Miami Dolphins,DAL,-6.0,34,Tulane Stadium,True,34.0,18.0,40,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12792,11/1/2020,2020,8,False,Kansas City Chiefs,35.0,9.0,New York Jets,KC,-19.5,49,Arrowhead Stadium,False,,,,
12793,11/1/2020,2020,8,False,Miami Dolphins,28.0,17.0,Los Angeles Rams,LAR,-3.5,45.5,Hard Rock Stadium,False,,,,
12794,11/1/2020,2020,8,False,Philadelphia Eagles,23.0,9.0,Dallas Cowboys,PHI,-11.5,42.5,Lincoln Financial Field,False,,,,
12795,11/1/2020,2020,8,False,Seattle Seahawks,37.0,27.0,San Francisco 49ers,SEA,-3.0,53.5,CenturyLink Field,False,,,,


In [14]:
nfl_df_drop_over_under.isnull().sum()

schedule_date             0
schedule_season           0
schedule_week             0
schedule_playoff          0
team_home                 0
score_home                0
score_away                0
team_away                 0
team_favorite_id          0
spread_favorite           0
over_under_line           0
stadium                   0
stadium_neutral           0
weather_temperature     595
weather_wind_mph        595
weather_humidity       4019
weather_detail         7791
dtype: int64

In [15]:
#drop rows that don't have weather_temperature
nfl_df_drop_weather = nfl_df_drop_over_under.dropna(subset = ['weather_temperature'])

In [16]:
nfl_df_drop_weather

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail
350,1/14/1968,1967,Superbowl,True,Green Bay Packers,33.0,14.0,Oakland Raiders,GB,-13.5,43,Orange Bowl,True,60.0,12.0,74,
538,1/12/1969,1968,Superbowl,True,Baltimore Colts,7.0,16.0,New York Jets,IND,-18.0,40,Orange Bowl,True,66.0,12.0,80,
727,1/11/1970,1969,Superbowl,True,Kansas City Chiefs,23.0,7.0,Minnesota Vikings,MIN,-12.0,39,Tulane Stadium,True,55.0,14.0,84,
916,1/17/1971,1970,Superbowl,True,Baltimore Colts,16.0,13.0,Dallas Cowboys,IND,-2.5,36,Orange Bowl,True,59.0,11.0,60,
1105,1/16/1972,1971,Superbowl,True,Dallas Cowboys,24.0,3.0,Miami Dolphins,DAL,-6.0,34,Tulane Stadium,True,34.0,18.0,40,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12770,10/25/2020,2020,7,False,Arizona Cardinals,37.0,34.0,Seattle Seahawks,SEA,-3.5,56,University of Phoenix Stadium,False,72.0,0.0,,DOME
12771,10/25/2020,2020,7,False,Atlanta Falcons,22.0,23.0,Detroit Lions,ATL,-2.0,55,Mercedes-Benz Stadium,False,72.0,0.0,,DOME
12774,10/25/2020,2020,7,False,Houston Texans,20.0,35.0,Green Bay Packers,GB,-3.0,57,NRG Stadium,False,72.0,0.0,,DOME
12778,10/25/2020,2020,7,False,New Orleans Saints,27.0,24.0,Carolina Panthers,NO,-6.5,50,Mercedes-Benz Superdome,False,72.0,0.0,,DOME


In [17]:
nfl_df_drop_weather.isnull().sum()

schedule_date             0
schedule_season           0
schedule_week             0
schedule_playoff          0
team_home                 0
score_home                0
score_away                0
team_away                 0
team_favorite_id          0
spread_favorite           0
over_under_line           0
stadium                   0
stadium_neutral           0
weather_temperature       0
weather_wind_mph          0
weather_humidity       3424
weather_detail         7196
dtype: int64

In [18]:
nfl_df_clean = nfl_df_drop_weather

In [19]:
nfl_df_clean['team_home'].value_counts()

Minnesota Vikings       341
New Orleans Saints      338
Dallas Cowboys          334
Atlanta Falcons         333
New England Patriots    328
Detroit Lions           327
Seattle Seahawks        321
Kansas City Chiefs      319
Denver Broncos          319
Green Bay Packers       318
Philadelphia Eagles     318
Pittsburgh Steelers     318
San Francisco 49ers     318
Chicago Bears           316
Washington Redskins     316
New York Giants         316
Miami Dolphins          316
Buffalo Bills           315
Cincinnati Bengals      315
Tampa Bay Buccaneers    314
New York Jets           314
Indianapolis Colts      302
San Diego Chargers      299
Cleveland Browns        290
Oakland Raiders         217
Arizona Cardinals       215
Carolina Panthers       195
Jacksonville Jaguars    193
Baltimore Ravens        185
St. Louis Rams          175
Tennessee Titans        160
Houston Texans          152
Los Angeles Rams        150
Houston Oilers          144
Los Angeles Raiders      98
St. Louis Cardinals 

In [20]:
#merge team names
nfl_df_clean_home = nfl_df_clean.replace({"San Diego Chargers": "Los Angeles Chargers", 
                                          "Tennessee Oilers": "Tennessee Titans", 
                                          "Baltimore Colts": "Indianapolis Colts",
                                          "Phoenix Cardinals": "Arizona Cardinals",
                                          "St. Louis Cardinals": "Arizona Cardinals",
                                          "Los Angeles Raiders": "Las Vegas Raiders",
                                          "Los Angeles Rams": "St. Louis Rams",
                                          "Oakland Raiders": "Las Vegas Raiders",
                                          "Houston Oilers": "Tennessee Titans",
                                          "Washington Redskins": "Washington Football Team"
                                         })
nfl_df_clean_home['team_home'].value_counts()

Minnesota Vikings           341
Indianapolis Colts          340
New Orleans Saints          338
Dallas Cowboys              334
Atlanta Falcons             333
Arizona Cardinals           329
New England Patriots        328
Detroit Lions               327
St. Louis Rams              325
Seattle Seahawks            321
Tennessee Titans            320
Kansas City Chiefs          319
Denver Broncos              319
Green Bay Packers           318
San Francisco 49ers         318
Pittsburgh Steelers         318
Philadelphia Eagles         318
New York Giants             316
Chicago Bears               316
Washington Football Team    316
Miami Dolphins              316
Los Angeles Chargers        315
Cincinnati Bengals          315
Buffalo Bills               315
Las Vegas Raiders           315
New York Jets               314
Tampa Bay Buccaneers        314
Cleveland Browns            290
Carolina Panthers           195
Jacksonville Jaguars        193
Baltimore Ravens            185
Houston 

In [21]:
nfl_df_clean_home['team_favorite_id'].value_counts()

PIT     425
NE      412
DEN     405
DAL     405
SF      387
GB      368
MIN     367
PHI     361
MIA     342
NO      339
SEA     322
NYG     320
LAC     314
KC      308
WAS     307
IND     306
LAR     306
ATL     302
LVR     301
TEN     298
CHI     296
BUF     290
NYJ     277
CIN     255
DET     238
TB      232
ARI     215
BAL     209
CLE     197
CAR     187
JAX     158
PICK    142
HOU     122
Name: team_favorite_id, dtype: int64

In [22]:
#replace truncated team id with full team name
nfl_df_clean_home = nfl_df_clean.replace({"PIT": "Pittsburgh Steelers",
                                          "NE": "New England Patriots",
                                          "DEN": "Denver Broncos",
                                          "DAL": "Dalas Cowboys",
                                          "SF": "San Francisco 49ers",
                                          "GB": "Green Bay Packers",
                                          "MIN": "Minnesota Vikings",
                                          "PHI": "Philadelphia Eagles",
                                          "MIA": "Miami Dolphins",
                                          "NO": "New Orleans Saints",
                                          "SEA": "Seattle Seahawks",
                                          "NYG": "New York Giants",
                                          "LAC": "Los Angeles Chargers",
                                          "KC": "Kansas City Chiefs",
                                          "WAS": "Washington Football Team",
                                          "IND": "Indianapolis Colts",
                                          "LAR": "Los Angeles Rams",
                                          "ATL": "Atlanta Falcons",
                                          "LVR": "Las Vegas Raiders",
                                          "TEN": "Tennessee Titans",
                                          "CHI": "Chicago Bears",
                                          "Buf": "Buffalo Bills",
                                          "NYJ": "New York Jets",
                                          "CIN": "Cincinnati Bengals",
                                          "DET": "Detroit Lions",
                                          "TB": "Tampa Bay Buccaneers",
                                          "ARI": "Arizona Cardinals",
                                          "BAL": "Baltimore Ravens",
                                          "CLE": "Cleveland Browns",
                                          "CAR": "Carolina Panthers",
                                          "JAX": "Jacksonville Jaguars",
                                          "HOU": "Houston Texans"
                                         })

In [23]:
#create total score column
nfl_df_clean_home['total_score'] = nfl_df_clean_home['score_home'] + nfl_df_clean_home['score_away']
nfl_df_clean_home

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,total_score
350,1/14/1968,1967,Superbowl,True,Green Bay Packers,33.0,14.0,Oakland Raiders,Green Bay Packers,-13.5,43,Orange Bowl,True,60.0,12.0,74,,47.0
538,1/12/1969,1968,Superbowl,True,Baltimore Colts,7.0,16.0,New York Jets,Indianapolis Colts,-18.0,40,Orange Bowl,True,66.0,12.0,80,,23.0
727,1/11/1970,1969,Superbowl,True,Kansas City Chiefs,23.0,7.0,Minnesota Vikings,Minnesota Vikings,-12.0,39,Tulane Stadium,True,55.0,14.0,84,,30.0
916,1/17/1971,1970,Superbowl,True,Baltimore Colts,16.0,13.0,Dallas Cowboys,Indianapolis Colts,-2.5,36,Orange Bowl,True,59.0,11.0,60,,29.0
1105,1/16/1972,1971,Superbowl,True,Dallas Cowboys,24.0,3.0,Miami Dolphins,Dalas Cowboys,-6.0,34,Tulane Stadium,True,34.0,18.0,40,,27.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12770,10/25/2020,2020,7,False,Arizona Cardinals,37.0,34.0,Seattle Seahawks,Seattle Seahawks,-3.5,56,University of Phoenix Stadium,False,72.0,0.0,,DOME,71.0
12771,10/25/2020,2020,7,False,Atlanta Falcons,22.0,23.0,Detroit Lions,Atlanta Falcons,-2.0,55,Mercedes-Benz Stadium,False,72.0,0.0,,DOME,45.0
12774,10/25/2020,2020,7,False,Houston Texans,20.0,35.0,Green Bay Packers,Green Bay Packers,-3.0,57,NRG Stadium,False,72.0,0.0,,DOME,55.0
12778,10/25/2020,2020,7,False,New Orleans Saints,27.0,24.0,Carolina Panthers,New Orleans Saints,-6.5,50,Mercedes-Benz Superdome,False,72.0,0.0,,DOME,51.0


In [24]:
nfl_df_clean_home.dtypes

schedule_date           object
schedule_season          int64
schedule_week           object
schedule_playoff          bool
team_home               object
score_home             float64
score_away             float64
team_away               object
team_favorite_id        object
spread_favorite        float64
over_under_line         object
stadium                 object
stadium_neutral           bool
weather_temperature    float64
weather_wind_mph       float64
weather_humidity        object
weather_detail          object
total_score            float64
dtype: object

In [25]:
#change over_under_line column to a float dtype
nfl_df_clean_home['over_under_line'] = pd.to_numeric(nfl_df_clean_home['over_under_line'],errors='coerce')

In [26]:
#calc games taht were over or under total score and add new column
for index, row in nfl_df_clean_home.iterrows():
    total = row[17]
    line = row[10]
    if total > line:
        nfl_df_clean_home.loc[index,'over_under'] = 'over'
    elif total < line:
        nfl_df_clean_home.loc[index,'over_under'] = 'under'
    else:
        nfl_df_clean_home.loc[index,'over_under'] = 'push'

nfl_df_clean_home

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,total_score,over_under
350,1/14/1968,1967,Superbowl,True,Green Bay Packers,33.0,14.0,Oakland Raiders,Green Bay Packers,-13.5,43.0,Orange Bowl,True,60.0,12.0,74,,47.0,over
538,1/12/1969,1968,Superbowl,True,Baltimore Colts,7.0,16.0,New York Jets,Indianapolis Colts,-18.0,40.0,Orange Bowl,True,66.0,12.0,80,,23.0,under
727,1/11/1970,1969,Superbowl,True,Kansas City Chiefs,23.0,7.0,Minnesota Vikings,Minnesota Vikings,-12.0,39.0,Tulane Stadium,True,55.0,14.0,84,,30.0,under
916,1/17/1971,1970,Superbowl,True,Baltimore Colts,16.0,13.0,Dallas Cowboys,Indianapolis Colts,-2.5,36.0,Orange Bowl,True,59.0,11.0,60,,29.0,under
1105,1/16/1972,1971,Superbowl,True,Dallas Cowboys,24.0,3.0,Miami Dolphins,Dalas Cowboys,-6.0,34.0,Tulane Stadium,True,34.0,18.0,40,,27.0,under
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12770,10/25/2020,2020,7,False,Arizona Cardinals,37.0,34.0,Seattle Seahawks,Seattle Seahawks,-3.5,56.0,University of Phoenix Stadium,False,72.0,0.0,,DOME,71.0,over
12771,10/25/2020,2020,7,False,Atlanta Falcons,22.0,23.0,Detroit Lions,Atlanta Falcons,-2.0,55.0,Mercedes-Benz Stadium,False,72.0,0.0,,DOME,45.0,under
12774,10/25/2020,2020,7,False,Houston Texans,20.0,35.0,Green Bay Packers,Green Bay Packers,-3.0,57.0,NRG Stadium,False,72.0,0.0,,DOME,55.0,under
12778,10/25/2020,2020,7,False,New Orleans Saints,27.0,24.0,Carolina Panthers,New Orleans Saints,-6.5,50.0,Mercedes-Benz Superdome,False,72.0,0.0,,DOME,51.0,over


In [27]:
#calc performance against the spread
for index, row in nfl_df_clean_home.iterrows():
    home_team = row[4]
    away_team = row[7]
    fav_team = row[8]
    home_score = row[5]
    away_score = row[6]
    spread = row[9]
    if home_team == fav_team:
        fav_score = home_score + spread
        diff = fav_score - away_score
    elif away_team == fav_team:
        fav_score = away_score + spread
        diff = fav_score - home_score
    
    nfl_df_clean_home.loc[index,'spread_diff'] = diff
    if diff > 0:
        nfl_df_clean_home.loc[index,'cover'] = 'yes'
    elif diff < 0:
        nfl_df_clean_home.loc[index,'cover'] = 'no'
    else:
        nfl_df_clean_home.loc[index,'cover'] = 'push'

In [28]:
nfl_df_clean_home

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,...,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,total_score,over_under,spread_diff,cover
350,1/14/1968,1967,Superbowl,True,Green Bay Packers,33.0,14.0,Oakland Raiders,Green Bay Packers,-13.5,...,Orange Bowl,True,60.0,12.0,74,,47.0,over,5.5,yes
538,1/12/1969,1968,Superbowl,True,Baltimore Colts,7.0,16.0,New York Jets,Indianapolis Colts,-18.0,...,Orange Bowl,True,66.0,12.0,80,,23.0,under,5.5,yes
727,1/11/1970,1969,Superbowl,True,Kansas City Chiefs,23.0,7.0,Minnesota Vikings,Minnesota Vikings,-12.0,...,Tulane Stadium,True,55.0,14.0,84,,30.0,under,-28.0,no
916,1/17/1971,1970,Superbowl,True,Baltimore Colts,16.0,13.0,Dallas Cowboys,Indianapolis Colts,-2.5,...,Orange Bowl,True,59.0,11.0,60,,29.0,under,-28.0,no
1105,1/16/1972,1971,Superbowl,True,Dallas Cowboys,24.0,3.0,Miami Dolphins,Dalas Cowboys,-6.0,...,Tulane Stadium,True,34.0,18.0,40,,27.0,under,-28.0,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12770,10/25/2020,2020,7,False,Arizona Cardinals,37.0,34.0,Seattle Seahawks,Seattle Seahawks,-3.5,...,University of Phoenix Stadium,False,72.0,0.0,,DOME,71.0,over,-6.5,no
12771,10/25/2020,2020,7,False,Atlanta Falcons,22.0,23.0,Detroit Lions,Atlanta Falcons,-2.0,...,Mercedes-Benz Stadium,False,72.0,0.0,,DOME,45.0,under,-3.0,no
12774,10/25/2020,2020,7,False,Houston Texans,20.0,35.0,Green Bay Packers,Green Bay Packers,-3.0,...,NRG Stadium,False,72.0,0.0,,DOME,55.0,under,12.0,yes
12778,10/25/2020,2020,7,False,New Orleans Saints,27.0,24.0,Carolina Panthers,New Orleans Saints,-6.5,...,Mercedes-Benz Superdome,False,72.0,0.0,,DOME,51.0,over,-3.5,no


In [29]:
#remove games before the 1979 season
nfl_df_1979_bool = nfl_df_clean_home.loc[:,'schedule_season']>=1979
nfl_df_1979 = nfl_df_clean_home[nfl_df_1979_bool]
nfl_df_1979

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,...,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,total_score,over_under,spread_diff,cover
2501,9/1/1979,1979,1,False,Tampa Bay Buccaneers,31.0,16.0,Detroit Lions,Tampa Bay Buccaneers,-3.0,...,Houlihan's Stadium,False,79.0,9.0,87,,47.0,over,12.0,yes
2502,9/2/1979,1979,1,False,Buffalo Bills,7.0,9.0,Miami Dolphins,Miami Dolphins,-5.0,...,Ralph Wilson Stadium,False,74.0,15.0,74,,16.0,under,-3.0,no
2503,9/2/1979,1979,1,False,Chicago Bears,6.0,3.0,Green Bay Packers,Chicago Bears,-3.0,...,Soldier Field,False,78.0,11.0,68,,9.0,under,0.0,push
2504,9/2/1979,1979,1,False,Denver Broncos,10.0,0.0,Cincinnati Bengals,Denver Broncos,-3.0,...,Mile High Stadium,False,69.0,6.0,38,,10.0,under,7.0,yes
2505,9/2/1979,1979,1,False,Kansas City Chiefs,14.0,0.0,Baltimore Colts,Kansas City Chiefs,-1.0,...,Arrowhead Stadium,False,76.0,8.0,71,,14.0,under,13.0,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12770,10/25/2020,2020,7,False,Arizona Cardinals,37.0,34.0,Seattle Seahawks,Seattle Seahawks,-3.5,...,University of Phoenix Stadium,False,72.0,0.0,,DOME,71.0,over,-6.5,no
12771,10/25/2020,2020,7,False,Atlanta Falcons,22.0,23.0,Detroit Lions,Atlanta Falcons,-2.0,...,Mercedes-Benz Stadium,False,72.0,0.0,,DOME,45.0,under,-3.0,no
12774,10/25/2020,2020,7,False,Houston Texans,20.0,35.0,Green Bay Packers,Green Bay Packers,-3.0,...,NRG Stadium,False,72.0,0.0,,DOME,55.0,under,12.0,yes
12778,10/25/2020,2020,7,False,New Orleans Saints,27.0,24.0,Carolina Panthers,New Orleans Saints,-6.5,...,Mercedes-Benz Superdome,False,72.0,0.0,,DOME,51.0,over,-3.5,no


In [30]:
nfl_df_1979_absolute = nfl_df_1979['spread_diff'].abs()
nfl_df_1979

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,...,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,total_score,over_under,spread_diff,cover
2501,9/1/1979,1979,1,False,Tampa Bay Buccaneers,31.0,16.0,Detroit Lions,Tampa Bay Buccaneers,-3.0,...,Houlihan's Stadium,False,79.0,9.0,87,,47.0,over,12.0,yes
2502,9/2/1979,1979,1,False,Buffalo Bills,7.0,9.0,Miami Dolphins,Miami Dolphins,-5.0,...,Ralph Wilson Stadium,False,74.0,15.0,74,,16.0,under,-3.0,no
2503,9/2/1979,1979,1,False,Chicago Bears,6.0,3.0,Green Bay Packers,Chicago Bears,-3.0,...,Soldier Field,False,78.0,11.0,68,,9.0,under,0.0,push
2504,9/2/1979,1979,1,False,Denver Broncos,10.0,0.0,Cincinnati Bengals,Denver Broncos,-3.0,...,Mile High Stadium,False,69.0,6.0,38,,10.0,under,7.0,yes
2505,9/2/1979,1979,1,False,Kansas City Chiefs,14.0,0.0,Baltimore Colts,Kansas City Chiefs,-1.0,...,Arrowhead Stadium,False,76.0,8.0,71,,14.0,under,13.0,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12770,10/25/2020,2020,7,False,Arizona Cardinals,37.0,34.0,Seattle Seahawks,Seattle Seahawks,-3.5,...,University of Phoenix Stadium,False,72.0,0.0,,DOME,71.0,over,-6.5,no
12771,10/25/2020,2020,7,False,Atlanta Falcons,22.0,23.0,Detroit Lions,Atlanta Falcons,-2.0,...,Mercedes-Benz Stadium,False,72.0,0.0,,DOME,45.0,under,-3.0,no
12774,10/25/2020,2020,7,False,Houston Texans,20.0,35.0,Green Bay Packers,Green Bay Packers,-3.0,...,NRG Stadium,False,72.0,0.0,,DOME,55.0,under,12.0,yes
12778,10/25/2020,2020,7,False,New Orleans Saints,27.0,24.0,Carolina Panthers,New Orleans Saints,-6.5,...,Mercedes-Benz Superdome,False,72.0,0.0,,DOME,51.0,over,-3.5,no


In [31]:
#create absolute value spread diff column
nfl_df_1979['spread_diff_absolute'] = nfl_df_1979['spread_diff'].abs()
nfl_df_1979

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nfl_df_1979['spread_diff_absolute'] = nfl_df_1979['spread_diff'].abs()


Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,...,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,total_score,over_under,spread_diff,cover,spread_diff_absolute
2501,9/1/1979,1979,1,False,Tampa Bay Buccaneers,31.0,16.0,Detroit Lions,Tampa Bay Buccaneers,-3.0,...,False,79.0,9.0,87,,47.0,over,12.0,yes,12.0
2502,9/2/1979,1979,1,False,Buffalo Bills,7.0,9.0,Miami Dolphins,Miami Dolphins,-5.0,...,False,74.0,15.0,74,,16.0,under,-3.0,no,3.0
2503,9/2/1979,1979,1,False,Chicago Bears,6.0,3.0,Green Bay Packers,Chicago Bears,-3.0,...,False,78.0,11.0,68,,9.0,under,0.0,push,0.0
2504,9/2/1979,1979,1,False,Denver Broncos,10.0,0.0,Cincinnati Bengals,Denver Broncos,-3.0,...,False,69.0,6.0,38,,10.0,under,7.0,yes,7.0
2505,9/2/1979,1979,1,False,Kansas City Chiefs,14.0,0.0,Baltimore Colts,Kansas City Chiefs,-1.0,...,False,76.0,8.0,71,,14.0,under,13.0,yes,13.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12770,10/25/2020,2020,7,False,Arizona Cardinals,37.0,34.0,Seattle Seahawks,Seattle Seahawks,-3.5,...,False,72.0,0.0,,DOME,71.0,over,-6.5,no,6.5
12771,10/25/2020,2020,7,False,Atlanta Falcons,22.0,23.0,Detroit Lions,Atlanta Falcons,-2.0,...,False,72.0,0.0,,DOME,45.0,under,-3.0,no,3.0
12774,10/25/2020,2020,7,False,Houston Texans,20.0,35.0,Green Bay Packers,Green Bay Packers,-3.0,...,False,72.0,0.0,,DOME,55.0,under,12.0,yes,12.0
12778,10/25/2020,2020,7,False,New Orleans Saints,27.0,24.0,Carolina Panthers,New Orleans Saints,-6.5,...,False,72.0,0.0,,DOME,51.0,over,-3.5,no,3.5


In [32]:
nfl_df_1979['game_score_diff_abs'] = (nfl_df_1979['score_home'] - nfl_df_1979['score_away']).abs()
nfl_df_1979

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nfl_df_1979['game_score_diff_abs'] = (nfl_df_1979['score_home'] - nfl_df_1979['score_away']).abs()


Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,...,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,total_score,over_under,spread_diff,cover,spread_diff_absolute,game_score_diff_abs
2501,9/1/1979,1979,1,False,Tampa Bay Buccaneers,31.0,16.0,Detroit Lions,Tampa Bay Buccaneers,-3.0,...,79.0,9.0,87,,47.0,over,12.0,yes,12.0,15.0
2502,9/2/1979,1979,1,False,Buffalo Bills,7.0,9.0,Miami Dolphins,Miami Dolphins,-5.0,...,74.0,15.0,74,,16.0,under,-3.0,no,3.0,2.0
2503,9/2/1979,1979,1,False,Chicago Bears,6.0,3.0,Green Bay Packers,Chicago Bears,-3.0,...,78.0,11.0,68,,9.0,under,0.0,push,0.0,3.0
2504,9/2/1979,1979,1,False,Denver Broncos,10.0,0.0,Cincinnati Bengals,Denver Broncos,-3.0,...,69.0,6.0,38,,10.0,under,7.0,yes,7.0,10.0
2505,9/2/1979,1979,1,False,Kansas City Chiefs,14.0,0.0,Baltimore Colts,Kansas City Chiefs,-1.0,...,76.0,8.0,71,,14.0,under,13.0,yes,13.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12770,10/25/2020,2020,7,False,Arizona Cardinals,37.0,34.0,Seattle Seahawks,Seattle Seahawks,-3.5,...,72.0,0.0,,DOME,71.0,over,-6.5,no,6.5,3.0
12771,10/25/2020,2020,7,False,Atlanta Falcons,22.0,23.0,Detroit Lions,Atlanta Falcons,-2.0,...,72.0,0.0,,DOME,45.0,under,-3.0,no,3.0,1.0
12774,10/25/2020,2020,7,False,Houston Texans,20.0,35.0,Green Bay Packers,Green Bay Packers,-3.0,...,72.0,0.0,,DOME,55.0,under,12.0,yes,12.0,15.0
12778,10/25/2020,2020,7,False,New Orleans Saints,27.0,24.0,Carolina Panthers,New Orleans Saints,-6.5,...,72.0,0.0,,DOME,51.0,over,-3.5,no,3.5,3.0


In [33]:
#copy the df so I can run some analysis on it but still have an original to reference if needed
nfl_df_analysis = nfl_df_1979
nfl_df_analysis

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,...,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,total_score,over_under,spread_diff,cover,spread_diff_absolute,game_score_diff_abs
2501,9/1/1979,1979,1,False,Tampa Bay Buccaneers,31.0,16.0,Detroit Lions,Tampa Bay Buccaneers,-3.0,...,79.0,9.0,87,,47.0,over,12.0,yes,12.0,15.0
2502,9/2/1979,1979,1,False,Buffalo Bills,7.0,9.0,Miami Dolphins,Miami Dolphins,-5.0,...,74.0,15.0,74,,16.0,under,-3.0,no,3.0,2.0
2503,9/2/1979,1979,1,False,Chicago Bears,6.0,3.0,Green Bay Packers,Chicago Bears,-3.0,...,78.0,11.0,68,,9.0,under,0.0,push,0.0,3.0
2504,9/2/1979,1979,1,False,Denver Broncos,10.0,0.0,Cincinnati Bengals,Denver Broncos,-3.0,...,69.0,6.0,38,,10.0,under,7.0,yes,7.0,10.0
2505,9/2/1979,1979,1,False,Kansas City Chiefs,14.0,0.0,Baltimore Colts,Kansas City Chiefs,-1.0,...,76.0,8.0,71,,14.0,under,13.0,yes,13.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12770,10/25/2020,2020,7,False,Arizona Cardinals,37.0,34.0,Seattle Seahawks,Seattle Seahawks,-3.5,...,72.0,0.0,,DOME,71.0,over,-6.5,no,6.5,3.0
12771,10/25/2020,2020,7,False,Atlanta Falcons,22.0,23.0,Detroit Lions,Atlanta Falcons,-2.0,...,72.0,0.0,,DOME,45.0,under,-3.0,no,3.0,1.0
12774,10/25/2020,2020,7,False,Houston Texans,20.0,35.0,Green Bay Packers,Green Bay Packers,-3.0,...,72.0,0.0,,DOME,55.0,under,12.0,yes,12.0,15.0
12778,10/25/2020,2020,7,False,New Orleans Saints,27.0,24.0,Carolina Panthers,New Orleans Saints,-6.5,...,72.0,0.0,,DOME,51.0,over,-3.5,no,3.5,3.0


In [47]:
#save the clean version to a new csv file so I can do analysis
nfl_df_1979.to_csv("spreadspoke_scores_for_analysis.csv")