## Part 1 - Import and Explore Data 

In [1]:
#Import Libraries
import pandas as pd
import numpy as np
import datetime
import statsmodels.formula.api as sm

In [3]:
#Import NFL_Game dataset
NFL_Game=pd.read_csv("Assignment Data/Week 4/nfl_game.csv")
display(NFL_Game.head())

Unnamed: 0,game_id,stadium,date,score,weather_temperature,weather_wind_mph,weather_humidity,score_diff,home,win,...,stadium_close,stadium_type,stadium_age,conference,division,season,team_division,team_division_pre2002,team,stadium_neutral
0,1,Orange Bowl,9/2/1966,14,83,6,71,-9,1,0,...,2008.0,outdoor,29,NFC,NFC West,1966,AFC East,AFC East,Miami Dolphins,0
1,1,Orange Bowl,9/2/1966,23,83,6,71,9,0,1,...,2008.0,outdoor,29,AFC,AFC Central,1966,AFC West,AFC West,Oakland Raiders,0
2,2,Rice Stadium,9/3/1966,45,81,7,70,38,1,1,...,,outdoor,16,NFC,NFC West,1966,,AFC Central,Houston Oilers,0
3,2,Rice Stadium,9/3/1966,7,81,7,70,-38,0,0,...,,outdoor,16,NFC,NFC West,1966,AFC West,AFC West,Denver Broncos,0
4,3,Balboa Stadium,9/4/1966,27,70,7,82,20,1,1,...,,outdoor,52,NFC,NFC West,1966,AFC West,AFC West,San Diego Chargers,0


In [5]:
NFL_Game['date'] = pd.to_datetime(NFL_Game['date'], format='%m/%d/%Y')
print(NFL_Game['date'].describe())


count                   24314
unique                   2065
top       2017-01-01 00:00:00
freq                       32
first     1966-09-02 00:00:00
last      2019-12-29 00:00:00
Name: date, dtype: object


  


In [7]:
home_score_stats = NFL_Game[NFL_Game['home'] == 1]['score'].describe()
away_score_stats = NFL_Game[NFL_Game['home'] == 0]['score'].describe()

print("Home Game Score Summary:\n", home_score_stats)
print("\nAway Game Score Summary:\n", away_score_stats)

Home Game Score Summary:
 count    12157.000000
mean        22.254997
std         10.533005
min          0.000000
25%         14.000000
50%         21.000000
75%         29.000000
max         72.000000
Name: score, dtype: float64

Away Game Score Summary:
 count    12157.000000
mean        19.643004
std         10.166614
min          0.000000
25%         13.000000
50%         20.000000
75%         27.000000
max         62.000000
Name: score, dtype: float64


In [8]:
corr_win_home = NFL_Game['win'].corr(NFL_Game['home'])
print("Correlation between win and home:", corr_win_home)


Correlation between win and home: 0.14790211753177582


In [9]:
corr_score_diff_home = NFL_Game['score_diff'].corr(NFL_Game['home'])
print("Correlation between score_diff and home:", corr_score_diff_home)


Correlation between score_diff and home: 0.1725059685987271


In [10]:
corr_score_temp = NFL_Game['score'].corr(NFL_Game['weather_temperature'])
corr_score_humidity = NFL_Game['score'].corr(NFL_Game['weather_humidity'])
corr_score_wind = NFL_Game['score'].corr(NFL_Game['weather_wind_mph'])

print("Correlation between score and weather_temperature:", corr_score_temp)
print("Correlation between score and weather_humidity:", corr_score_humidity)
print("Correlation between score and weather_wind_mph:", corr_score_wind)


Correlation between score and weather_temperature: 0.033616900808932
Correlation between score and weather_humidity: -0.03278832087607434
Correlation between score and weather_wind_mph: -0.07895602955806053


## Part 2 - Regression Analysis 1 - Test of Home Game Advantage

In [11]:
# Regression Model 1: home as the independent variable
reg1_1 = sm.ols(formula='score_diff ~ home', data=NFL_Game).fit()

# Display results
print(reg1_1.summary())


                            OLS Regression Results                            
Dep. Variable:             score_diff   R-squared:                       0.030
Model:                            OLS   Adj. R-squared:                  0.030
Method:                 Least Squares   F-statistic:                     745.7
Date:                Sun, 15 Dec 2024   Prob (F-statistic):          9.64e-162
Time:                        16:47:29   Log-Likelihood:            -1.0020e+05
No. Observations:               24314   AIC:                         2.004e+05
Df Residuals:                   24312   BIC:                         2.004e+05
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.6120      0.135    -19.309      0.0

In [12]:
# Regression Model 2: home, stadium_capacity, stadium_neutral, and interaction between home and stadium_neutral
reg1_2 = sm.ols(formula='score_diff ~ home + stadium_capacity + stadium_neutral + home:stadium_neutral', data=NFL_Game).fit()

# Display results
print(reg1_2.summary())


                            OLS Regression Results                            
Dep. Variable:             score_diff   R-squared:                       0.031
Model:                            OLS   Adj. R-squared:                  0.030
Method:                 Least Squares   F-statistic:                     191.7
Date:                Sun, 15 Dec 2024   Prob (F-statistic):          3.99e-162
Time:                        16:47:37   Log-Likelihood:            -1.0019e+05
No. Observations:               24314   AIC:                         2.004e+05
Df Residuals:                   24309   BIC:                         2.004e+05
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
Intercept               -2.6386 

In [13]:
# Regression Model 3: home, stadium_capacity, stadium_neutral, interaction between home and stadium_neutral, and team and opponent
reg1_3 = sm.ols(formula='score_diff ~ home + stadium_capacity + stadium_neutral + home:stadium_neutral + team + opponent', data=NFL_Game).fit()

# Display results
print(reg1_3.summary())


                            OLS Regression Results                            
Dep. Variable:             score_diff   R-squared:                       0.066
Model:                            OLS   Adj. R-squared:                  0.062
Method:                 Least Squares   F-statistic:                     20.23
Date:                Sun, 15 Dec 2024   Prob (F-statistic):          7.02e-288
Time:                        16:47:43   Log-Likelihood:                -99748.
No. Observations:               24314   AIC:                         1.997e+05
Df Residuals:                   24229   BIC:                         2.004e+05
Df Model:                          84                                         
Covariance Type:            nonrobust                                         
                                       coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------------
Intercep

## Part 3 - Regression Analysis 2 -- Impact of Outside Factors on Scores