In [1]:
# Importing necessary packages
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import chisquare

In [2]:
# Uploading DataFrame and inspecting its contents
df = pd.DataFrame()
df = pd.read_csv('/Users/dwreiter/Desktop/Work/Springboard/NFL Capstone Project/Data Wrangling/nfl_df.csv', delimiter='\t', index_col=0)
df.head()

Unnamed: 0,Date,Home Field Goal Attempts,Home First Downs,Home Fourth Down Attempts,Home Fourth Down Successes,Home Fumble TDs,Home Fumbles,Home Fumbles Lost,Home Fumbles Recovered,Home Goal To Go Attempts,...,Road Time of Possession,Road Total TDs,Road Total Yds,Road Touchbacks,Road Two Point Conversion Attempts,Road Two Point Conversion Successes,Road Win,Road Wins,Season,Week Number
0,"on December 20, 2015",1,19,2,0,0,2,2,0,0,...,2248,5,506,3,0,0,True,1,nfl-2015-2016,15
1,"on November 29, 2015",2,17,2,1,0,2,1,0,0,...,2247,2,350,1,0,0,True,1,nfl-2015-2016,12
2,"on December 27, 2015",1,19,0,0,2,2,1,3,3,...,1784,1,265,0,1,1,False,0,nfl-2015-2016,16
3,"on November 22, 2015",2,21,0,0,0,2,0,1,1,...,1875,4,415,3,0,0,False,0,nfl-2015-2016,11
4,"on January 3, 2016",1,16,1,0,0,0,0,0,0,...,2197,4,382,7,0,0,True,1,nfl-2015-2016,17


** For each test below:
<br>
Ho = Statistic has no impact on a team's chance of winning
<br>
Ha = Statistic has an impact on a team's chance of winning
<br>
α = 0.05**

In [3]:
# Running a t-test for Road Rushing Attempts and Road Win
print('Average amount of Road Rushing Attempts in Road Win: ' + str(df.loc[df['Road Win'], 'Road Rushing Atts'].mean()))
print('Average amount of Road Rushing Attempts in Road Loss: ' + str(df.loc[df['Road Loss'], 'Road Rushing Atts'].mean()))

t_stat, p = stats.ttest_ind(df.loc[df['Road Win'], 'Road Rushing Atts'], df.loc[df['Road Loss'], 'Road Rushing Atts'], equal_var=False)
print('t-statistic: ' + str(t_stat))
print('p-value: ' + str(p))

Average amount of Road Rushing Attempts in Road Win: 30.4440677966
Average amount of Road Rushing Attempts in Road Loss: 22.1288659794
t-statistic: 17.2698667801
p-value: 5.89954574468e-55


** Because of how low the p-value is, we can reject the null hypothesis and say Road Rushing Attempts does have an impact on a Road Team's chance of winning.**

In [4]:
# Running a t-test for Road Time of Possession and Road Win
print('Average amount of Road Time of Possession in Road Win: ' + str(df.loc[df['Road Win'], 'Road Time of Possession'].mean()))
print('Average amount of Road Time of Possession in Road Loss: ' + str(df.loc[df['Road Loss'], 'Road Time of Possession'].mean()))

t_stat, p = stats.ttest_ind(df.loc[df['Road Win'], 'Road Time of Possession'], df.loc[df['Road Loss'], 'Road Time of Possession'], equal_var=False)
print('t-statistic: ' + str(t_stat))
print('p-value: ' + str(p))

Average amount of Road Time of Possession in Road Win: 1919.11864407
Average amount of Road Time of Possession in Road Loss: 1702.22164948
t-statistic: 11.5380986752
p-value: 4.16229517181e-28


** Because of how low the p-value is, we can reject the null hypothesis and say Road Time of Possession does have an impact on a Road Team's chance of winning.**

In [5]:
# Running a chi-squared test for Road Passing First Downs and Road Win
RPFD_RW_CT = pd.crosstab(df['Road Passing First Downs'], df['Road Win'])
print(RPFD_RW_CT)
chi, p, d_f, e_f = stats.chi2_contingency(RPFD_RW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

Road Win                  False  True 
Road Passing First Downs              
1                             1      0
2                             2      0
3                             4      0
4                             6      1
5                             7      5
6                            11     11
7                            20     15
8                            30     17
9                            39     27
10                           39     25
11                           39     35
12                           35     27
13                           37     25
14                           29     29
15                           17     28
16                           22     16
17                           23      7
18                           11     10
19                           10      7
20                            5      6
21                            1      4
22                            2      0
24                            1      0
chi-squared: 28.797249653

** Because of how high the p-value is, we can accept the null hypothesis and say Road Passing First Downs does not have an impact on a Road Team's chance of winning.**

In [6]:
# Running a chi-squared test for Road Passing First Downs and Road Win with Filters
RPFD_RW_CT_HMD = pd.crosstab(df['Road Passing First Downs'] > df['Road Passing First Downs'].median(), df['Road Win'])
print(RPFD_RW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(RPFD_RW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

RPFD_RW_CT_LMD = pd.crosstab(df['Road Passing First Downs'] < df['Road Passing First Downs'].median(), df['Road Win'])
print(RPFD_RW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(RPFD_RW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

Road Win                  False  True 
Road Passing First Downs              
False                       233    163
True                        158    132
chi-squared for higher than median: 1.12413269528
p-value for higher than median: 0.289030314887
Road Win                  False  True 
Road Passing First Downs              
False                       193    159
True                        198    136
chi-squared for lower than median: 1.21012863504
p-value for lower than median: 0.27130664755


** When filtering Road Passing First Downs by its median, we still see high p-values that further emphasize how the stat does not impact a Road Team's chance of winning.**

In [7]:
# Running a t-test for Road Passing First Downs and Road Win
print('Average amount of Road Passing First Downs in Road Win: ' + str(df.loc[df['Road Win'], 'Road Passing First Downs'].mean()))
print('Average amount of Road Passing First Downs in Road Loss: ' + str(df.loc[df['Road Loss'], 'Road Passing First Downs'].mean()))

t_stat, p = stats.ttest_ind(df.loc[df['Road Win'], 'Road Passing First Downs'], df.loc[df['Road Loss'], 'Road Passing First Downs'], equal_var=False)
print('t-statistic: ' + str(t_stat))
print('p-value: ' + str(p))

Average amount of Road Passing First Downs in Road Win: 12.1389830508
Average amount of Road Passing First Downs in Road Loss: 11.6701030928
t-statistic: 1.6064041691
p-value: 0.108669167328


** When running a t-test, we still find a high p-value for Road First Downs meaning it does not impact a Road Team's chance of winning.**

In [8]:
# Running a chi-squared test for Road Passing Attempts and Home Win
RPA_HW_CT = pd.crosstab(df['Road Passing Atts'], df['Home Win'])
print(RPA_HW_CT)
chi, p, d_f, e_f = stats.chi2_contingency(RPA_HW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

Home Win           False  True 
Road Passing Atts              
14                     1      0
15                     1      1
17                     2      1
18                     5      1
19                     2      1
20                     1      2
21                     8      1
22                     5      4
23                     4      1
24                     8      3
25                    11      6
26                     5      6
27                    16     12
28                    14      9
29                    18     15
30                    25     11
31                    15     11
32                    15     19
33                    18     23
34                    19     12
35                    11     20
36                    16     20
37                    11     25
38                    12     17
39                    14     17
40                     8     15
41                     7     19
42                     4      9
43                     2      6
44      

** Because of how low the p-value is, we can reject the null hypothesis and say Road Passing Attempts does have an impact on a Home Team's chance of winning.**

In [9]:
# Running a chi-squared test for Road Passing Attempts and Home Win with Filters
RPA_HW_CT_HMD = pd.crosstab(df['Road Passing Atts'] > df['Road Passing Atts'].median(), df['Home Win'])
print(RPA_HW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(RPA_HW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

RPA_HW_CT_LMD = pd.crosstab(df['Road Passing Atts'] < df['Road Passing Atts'].median(), df['Home Win'])
print(RPA_HW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(RPA_HW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

Home Win           False  True 
Road Passing Atts              
False                204    159
True                  93    230
chi-squared for higher than median: 51.1787852518
p-value for higher than median: 8.43250385099e-13
Home Win           False  True 
Road Passing Atts              
False                104    250
True                 193    139
chi-squared for lower than median: 56.5320754533
p-value for lower than median: 5.52890899496e-14


** When filtering Road Passing Attempts by its median, we still see low p-values that further emphasize how the stat does impact a Home Team's chance of winning.**

In [10]:
# Running a t-test for Road Passing Attempts and Home Win
print('Average amount of Road Passing Attempts in Home Win: ' + str(df.loc[df['Home Win'], 'Road Passing Atts'].mean()))
print('Average amount of Road Passing Attempts in Home Loss: ' + str(df.loc[df['Home Loss'], 'Road Passing Atts'].mean()))

t_stat, p = stats.ttest_ind(df.loc[df['Home Win'], 'Road Passing Atts'], df.loc[df['Home Loss'], 'Road Passing Atts'], equal_var=False)
print('t-statistic: ' + str(t_stat))
print('p-value: ' + str(p))

Average amount of Road Passing Attempts in Home Win: 37.6709511568
Average amount of Road Passing Attempts in Home Loss: 32.2406779661
t-statistic: 9.31573275791
p-value: 1.71966856267e-19


** When running a t-test, we still find a low p-value for Road Passing Attempts meaning it does impact a Home Team's chance of winning.**

In [11]:
# Running a chi-squared test for Road Sacks Given Up and Home Win
RSGU_HW_CT = pd.crosstab(df['Road Sacks Given Up'], df['Home Win'])
print(RSGU_HW_CT )
chi, p, d_f, e_f = stats.chi2_contingency(RSGU_HW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

Home Win             False  True 
Road Sacks Given Up              
0                       59     30
1                       78     68
2                       80     96
3                       47     79
4                       19     47
5                        9     38
6                        4     20
7                        1      7
8                        0      2
9                        0      2
chi-squared: 57.3482266227
p-value: 4.33405906973e-09


** Because of how low the p-value is, we can reject the null hypothesis and say Road Sacks Given Up does have an impact on a Home Team's chance of winning.**

In [12]:
# Running a chi-squared test for Road Sacks Given Up and Home Win with Filters
RSGU_HW_CT_HMD = pd.crosstab(df['Road Sacks Given Up'] > df['Road Sacks Given Up'].median(), df['Home Win'])
print(RSGU_HW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(RSGU_HW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

RSGU_HW_CT_LMD = pd.crosstab(df['Road Sacks Given Up'] < df['Road Sacks Given Up'].median(), df['Home Win'])
print(RSGU_HW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(RSGU_HW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

Home Win             False  True 
Road Sacks Given Up              
False                  217    194
True                    80    195
chi-squared for higher than median: 36.7587366997
p-value for higher than median: 1.33690144645e-09
Home Win             False  True 
Road Sacks Given Up              
False                  160    291
True                   137     98
chi-squared for lower than median: 31.8516465066
p-value for lower than median: 1.66408550223e-08


** When filtering Road Sacks Given Up by its median, we still see low p-values that further emphasize how the stat does impact a Home Team's chance of winning.**

In [13]:
# Running a t-test for Road Sacks Given Up and Home Win
print('Average amount of Road Sacks Given Up in Home Win: ' + str(df.loc[df['Home Win'], 'Road Sacks Given Up'].mean()))
print('Average amount of Road Sacks Given Up in Home Loss: ' + str(df.loc[df['Home Loss'], 'Road Sacks Given Up'].mean()))

t_stat, p = stats.ttest_ind(df.loc[df['Home Win'], 'Road Sacks Given Up'], df.loc[df['Home Loss'], 'Road Sacks Given Up'], equal_var=False)
print('t-statistic: ' + str(t_stat))
print('p-value: ' + str(p))

Average amount of Road Sacks Given Up in Home Win: 2.77120822622
Average amount of Road Sacks Given Up in Home Loss: 1.79322033898
t-statistic: 7.98954623203
p-value: 5.79897176919e-15


** When running a t-test, we still find a low p-value for Road Sacks Given Up meaning it does impact a Home Team's chance of winning.**

In [14]:
# Running a chi-squared test for Home Interception Touchdowns and Home Win
HITD_HW_CT = pd.crosstab(df['Home INT TDs'], df['Home Win'])
print(HITD_HW_CT )
chi, p, d_f, e_f = stats.chi2_contingency(HITD_HW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

Home Win      False  True 
Home INT TDs              
0               290    345
1                 7     41
2                 0      3
chi-squared: 19.8662285319
p-value: 4.85403927745e-05


** Because of how low the p-value is, we can reject the null hypothesis and say Home INT TDs does have an impact on a Home Team's chance of winning.**

In [15]:
# Running a chi-squared test for Home Interception Touchdowns and Home Win with Filters
HITD_HW_CT_HMD = pd.crosstab(df['Home INT TDs'] > df['Home INT TDs'].median(), df['Home Win'])
print(HITD_HW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(HITD_HW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

HITD_HW_CT_LMD = pd.crosstab(df['Home INT TDs'] < df['Home INT TDs'].median(), df['Home Win'])
print(HITD_HW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(HITD_HW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

Home Win      False  True 
Home INT TDs              
False           290    345
True              7     44
chi-squared for higher than median: 18.3420496245
p-value for higher than median: 1.84587652133e-05
Home Win      False  True 
Home INT TDs              
False           297    389
chi-squared for lower than median: 0.0
p-value for lower than median: 1.0


** When filtering Home INT TDs by its median, we still see a low p-value above the median but a high p-value beneath the median. This may be because of how uncommon this statistic can be and since many times it could be equal to 0, when you get less than 1, Home INT TDs may not actually have an impact on a Home Team's chance of winning.**

In [16]:
# Running a t-test for Home Interception Touchdowns and Home Win
print('Average amount of Home Interception Touchdowns in Home Win: ' + str(df.loc[df['Home Win'], 'Home INT TDs'].mean()))
print('Average amount of Home Interception Touchdowns in Home Loss: ' + str(df.loc[df['Home Loss'], 'Home INT TDs'].mean()))

t_stat, p = stats.ttest_ind(df.loc[df['Home Win'], 'Home INT TDs'], df.loc[df['Home Loss'], 'Home INT TDs'], equal_var=False)
print('t-statistic: ' + str(t_stat))
print('p-value: ' + str(p))

Average amount of Home Interception Touchdowns in Home Win: 0.120822622108
Average amount of Home Interception Touchdowns in Home Loss: 0.0237288135593
t-statistic: 4.90196112682
p-value: 1.24411005466e-06


** When running a t-test, we still find a low p-value for Home INT TDs meaning it does impact a Home Team's chance of winning.**

In [17]:
# Running a chi-squared test for Home Goal To Go Successes and Home Win
HGTGS_HW_CT = pd.crosstab(df['Home Goal To Go Successes'], df['Home Win'])
print(HGTGS_HW_CT)
chi, p, d_f, e_f = stats.chi2_contingency(HGTGS_HW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

Home Win                   False  True 
Home Goal To Go Successes              
0                            121     72
1                            113    153
2                             46    108
3                             14     39
4                              3     13
5                              0      4
chi-squared: 54.0936640619
p-value: 2.00495339211e-10


** Because of how low the p-value is, we can reject the null hypothesis and say Home Goal To Go Successes does have an impact on a Home Team's chance of winning.**

In [18]:
# Running a chi-squared test for Home Goal To Go Successes and Home Win with Filters
HGTGS_HW_CT_HMD = pd.crosstab(df['Home Goal To Go Successes'] > df['Home Goal To Go Successes'].median(), df['Home Win'])
print(HGTGS_HW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(HGTGS_HW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

HGTGS_HW_CT_LMD = pd.crosstab(df['Home Goal To Go Successes'] < df['Home Goal To Go Successes'].median(), df['Home Win'])
print(HGTGS_HW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(HGTGS_HW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

Home Win                   False  True 
Home Goal To Go Successes              
False                        234    225
True                          63    164
chi-squared for higher than median: 32.4375161059
p-value for higher than median: 1.23087130834e-08
Home Win                   False  True 
Home Goal To Go Successes              
False                        176    317
True                         121     72
chi-squared for lower than median: 40.0770374079
p-value for lower than median: 2.44142053736e-10


** When filtering Home Goal To Go Successes by its median, we still see low p-values that further emphasize how the stat does impact a Home Team's chance of winning.**

In [19]:
# Running a t-test for Home Goal To Go Successes and Home Win
print('Average amount of Home Goal To Go Successes in Home Win: ' + str(df.loc[df['Home Win'], 'Home Goal To Go Successes'].mean()))
print('Average amount of Home Goal To Go Successes in Home Loss: ' + str(df.loc[df['Home Loss'], 'Home Goal To Go Successes'].mean()))

t_stat, p = stats.ttest_ind(df.loc[df['Home Win'], 'Home Goal To Go Successes'], df.loc[df['Home Loss'], 'Home Goal To Go Successes'], equal_var=False)
print('t-statistic: ' + str(t_stat))
print('p-value: ' + str(p))

Average amount of Home Goal To Go Successes in Home Win: 1.43444730077
Average amount of Home Goal To Go Successes in Home Loss: 0.871186440678
t-statistic: 7.43327638772
p-value: 3.22954331479e-13


** When running a t-test, we still find a low p-value for Home Goal To Go Successes meaning it does impact a Home Team's chance of winning.**

In [20]:
# Running a chi-squared test for Road Fourth Down Successes and Home Win
RFDS_HW_CT = pd.crosstab(df['Road Fourth Down Successes'], df['Home Win'])
print(RFDS_HW_CT)
chi, p, d_f, e_f = stats.chi2_contingency(RFDS_HW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

Home Win                    False  True 
Road Fourth Down Successes              
0                             215    223
1                              66    116
2                              15     37
3                               1     10
4                               0      3
chi-squared: 21.6040833409
p-value: 0.000240264746781


** Because of how low the p-value is, we can reject the null hypothesis and say Road Fourth Down Successes does have an impact on a Home Team's chance of winning.**

In [21]:
# Running a chi-squared test for Road Fourth Down Successes and Home Win with Filters
RFDS_HW_CT_HMD = pd.crosstab(df['Road Fourth Down Successes'] > df['Road Fourth Down Successes'].median(), df['Home Win'])
print(RFDS_HW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(RFDS_HW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

RFDS_HW_CT_LMD = pd.crosstab(df['Road Fourth Down Successes'] < df['Road Fourth Down Successes'].median(), df['Home Win'])
print(RFDS_HW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(RFDS_HW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

Home Win                    False  True 
Road Fourth Down Successes              
False                         215    223
True                           82    166
chi-squared for higher than median: 15.9111378979
p-value for higher than median: 6.63868951184e-05
Home Win                    False  True 
Road Fourth Down Successes              
False                         297    389
chi-squared for lower than median: 0.0
p-value for lower than median: 1.0


** When filtering Road Fourth Down Successes by its median, we still see a low p-value above the median but a high p-value beneath the median. This may be because of how infrequently teams go for Fourth Down. The more away teams go for it, the more successes they may have while if they only go for it once or not at all, Road Fourth Down Successes may not actually have an impact on a Home Team's chance of winning.**

In [22]:
# Running a t-test for Road Fourth Down Successes and Home Win
print('Average amount of Road Fourth Down Successes in Home Win: ' + str(df.loc[df['Home Win'], 'Road Fourth Down Successes'].mean()))
print('Average amount of Road Fourth Down Successes in Home Loss: ' + str(df.loc[df['Home Loss'], 'Road Fourth Down Successes'].mean()))

t_stat, p = stats.ttest_ind(df.loc[df['Home Win'], 'Road Fourth Down Successes'], df.loc[df['Home Loss'], 'Road Fourth Down Successes'], equal_var=False)
print('t-statistic: ' + str(t_stat))
print('p-value: ' + str(p))

Average amount of Road Fourth Down Successes in Home Win: 0.596401028278
Average amount of Road Fourth Down Successes in Home Loss: 0.335593220339
t-statistic: 4.82630177351
p-value: 1.71888749093e-06


** When running a t-test, we still find a low p-value for Road Fourth Down Successes meaning it does impact a Home Team's chance of winning.**

In [23]:
# Comparing t-tests for Road Rushing Yards, Road Passing Yards and Road Total Yards with Road Win
print('Average amount of Road Rushing Yards in Road Win: ' + str(df.loc[df['Road Win'], 'Road Rushing Yds'].mean()))
print('Average amount of Road Rushing Yards in Road Loss: ' + str(df.loc[df['Road Loss'], 'Road Rushing Yds'].mean()))

RRY_t_stat, RRY_p = stats.ttest_ind(df.loc[df['Road Win'],'Road Rushing Yds'], df.loc[df['Road Loss'], 'Road Rushing Yds'], equal_var=False)
print('Roading Rushing Yards t-statistic: ' + str(RRY_t_stat))
print('Road Rushing Yards p-value: ' + str(RRY_p))

print('Average amount of Road Passing Yards in Road Win: ' + str(df.loc[df['Road Win'], 'Road Passing Yds'].mean()))
print('Average amount of Road Passing Yards in Road Loss: ' + str(df.loc[df['Road Loss'], 'Road Passing Yds'].mean()))

RPY_t_stat, RPY_p = stats.ttest_ind(df.loc[df['Road Win'], 'Road Passing Yds'], df.loc[df['Road Loss'], 'Road Passing Yds'], equal_var=False)
print('Roading Passing Yards t-statistic: ' + str(RPY_t_stat))
print('Road Passing Yards p-value: ' + str(RPY_p))

print('Average amount of Road Total Yards in Road Win: ' + str(df.loc[df['Road Win'], 'Road Total Yds'].mean()))
print('Average amount of Road Total Yards in Road Loss: ' + str(df.loc[df['Road Loss'], 'Road Total Yds'].mean()))

RTY_t_stat, RTY_p = stats.ttest_ind(df.loc[df['Road Win'], 'Road Total Yds'], df.loc[df['Road Loss'], 'Road Total Yds'], equal_var=False)
print('Road Total Yards t-statistic: ' + str(RTY_t_stat))
print('Road Total Yards p-value: ' + str(RTY_p))

Average amount of Road Rushing Yards in Road Win: 126.152542373
Average amount of Road Rushing Yards in Road Loss: 92.8659793814
Roading Rushing Yards t-statistic: 9.19537876987
Road Rushing Yards p-value: 6.56489564669e-19
Average amount of Road Passing Yards in Road Win: 249.972881356
Average amount of Road Passing Yards in Road Loss: 242.858247423
Roading Passing Yards t-statistic: 1.2575033784
Road Passing Yards p-value: 0.209031049196
Average amount of Road Total Yards in Road Win: 376.125423729
Average amount of Road Total Yards in Road Loss: 335.724226804
Road Total Yards t-statistic: 6.74881320859
Road Total Yards p-value: 3.34810154221e-11


** Because of how low the p-values are for Road Rushing Yards and Road Total Yards, we can reject the null hypothesis and say that both have an impact on a Road Team's chance of winning. However, because of how high the p-value is for Road Passing Yards, we can accept the null hypothesis and say that it does not have an impact on a Road Team's chance of winning.**

In [24]:
# Comparing t-tests for Home Rushing Yards, Home Passing Yards and Home Total Yards with Home Win
print('Average amount of Home Rushing Yards in Home Win: ' + str(df.loc[df['Home Win'], 'Home Rushing Yds'].mean()))
print('Average amount of Home Rushing Yards in Home Loss: ' + str(df.loc[df['Home Loss'], 'Home Rushing Yds'].mean()))

HRY_t_stat, HRY_p = stats.ttest_ind(df.loc[df['Home Win'], 'Home Rushing Yds'], df.loc[df['Home Loss'], 'Home Rushing Yds'], equal_var=False)
print('Home Rushing Yards t-statistic: ' + str(HRY_t_stat))
print('Home Rushing Yards p-value: ' + str(HRY_p))

print('Average amount of Home Passing Yards in Home Win: ' + str(df.loc[df['Home Win'], 'Home Passing Yds'].mean()))
print('Average amount of Home Passing Yards in Home Loss: ' + str(df.loc[df['Home Loss'], 'Home Passing Yds'].mean()))

HPY_t_stat, HPY_p = stats.ttest_ind(df.loc[df['Home Win'], 'Home Passing Yds'], df.loc[df['Home Loss'], 'Home Passing Yds'], equal_var=False)
print('Home Passing Yards t-statistic: ' + str(HPY_t_stat))
print('Home Passing Yards p-value: ' + str(HPY_p))

print('Average amount of Home Total Yards in Home Win: ' + str(df.loc[df['Home Win'], 'Home Total Yds'].mean()))
print('Average amount of Home Total Yards in Home Loss: ' + str(df.loc[df['Home Loss'], 'Home Total Yds'].mean()))

HTY_t_stat, HTY_p = stats.ttest_ind(df.loc[df['Home Win'], 'Home Total Yds'], df.loc[df['Home Loss'], 'Home Total Yds'], equal_var=False)
print('Home Total Yards t-statistic: ' + str(HTY_t_stat))
print('Home Total Yards p-value: ' + str(HTY_p))

Average amount of Home Rushing Yards in Home Win: 132.066838046
Average amount of Home Rushing Yards in Home Loss: 96.0169491525
Home Rushing Yards t-statistic: 10.3404950095
Home Rushing Yards p-value: 2.21865803101e-23
Average amount of Home Passing Yards in Home Win: 258.285347044
Average amount of Home Passing Yards in Home Loss: 252.13220339
Home Passing Yards t-statistic: 1.13272589041
Home Passing Yards p-value: 0.257748779813
Average amount of Home Total Yards in Home Win: 390.35218509
Average amount of Home Total Yards in Home Loss: 348.149152542
Home Total Yards t-statistic: 7.6370938345
Home Total Yards p-value: 8.12502710132e-14


** Because of how low the p-values are for Home Rushing Yards and Home Total Yards, we can reject the null hypothesis and say that both have an impact on a Home Team's chance of winning. However, because of how high the p-value is for Home Passing Yards, we can accept the null hypothesis and say that it does not have an impact on a Home Team's chance of winning.**

In [25]:
# Running a Pearson Correlation test between Road Total Yards and Home Total Yards
r, p = stats.pearsonr(df['Road Total Yds'], df['Home Total Yds'])
print('r: ' + str(r))
print('p-value: ' + str(p))

r: 0.0332237989546
p-value: 0.384936055995


** Because of how high the p-value is, we can say the relationship between Road Total Yards and Home Total Yards is not statistically significant and also that the correlation between them is not equal to 0. Since an increase in Road Total Yards is not predictive of Home Total Yards, this negates the idea of a shootout.**

In [26]:
# Comparing t-tests for Road Points with Road Win and Home Points with Home Win
print('Average amount of Road Points in Road Win: ' + str(df.loc[df['Road Win'], 'Road Points'].mean()))
print('Average amount of Road Points in Road Loss: ' + str(df.loc[df['Road Loss'], 'Road Points'].mean()))

RP_t_stat, RP_p = stats.ttest_ind(df.loc[df['Road Win'], 'Road Points'], df.loc[df['Road Loss'], 'Road Points'], equal_var=False)
print('Road Points t-statistic: ' + str(RP_t_stat))
print('Road Points p-value: ' + str(RP_p))

print('Average amount of Home Points in Home Win: ' + str(df.loc[df['Home Win'], 'Home Points'].mean()))
print('Average amount of Home Points in Home Loss: ' + str(df.loc[df['Home Loss'], 'Home Points'].mean()))

HP_t_stat, HP_p = stats.ttest_ind(df.loc[df['Home Win'], 'Home Points'], df.loc[df['Home Loss'], 'Home Points'], equal_var=False)
print('Home Points t-statistic: ' + str(HP_t_stat))
print('Home Points p-value: ' + str(HP_p))

Average amount of Road Points in Road Win: 27.0406779661
Average amount of Road Points in Road Loss: 16.6675257732
Road Points t-statistic: 17.3265225416
Road Points p-value: 2.23599858203e-55
Average amount of Home Points in Home Win: 28.5706940874
Average amount of Home Points in Home Loss: 16.9220338983
Home Points t-statistic: 18.6748510633
Home Points p-value: 8.08943267606e-63


** Because of how low the p-value is for Road Points, we can reject the null hypothesis and say it does have an impact on a Road Team's chance of winning. Also, because of how low the p-value is for Home Points, we can reject the null hypothesis and say it does have an impact on a Home Team's chance of winning.**

In [27]:
# Running a Pearson Correlation test between Road Points and Home Points
r, p = stats.pearsonr(df['Road Points'], df['Home Points'])
print('r: ' + str(r))
print('p-value: ' + str(p))

r: -0.0231055999376
p-value: 0.545744131302


** Because of how high the p-value is, we can say the relationship between Road Points and Home Points is not statistically significant and also that the correlation between them is not equal to 0. Since an increase in Road Points is not predictive of Home Points, this negates the idea of a shootout.**

In [28]:
# Running a chi-squared test for Road Interceptions Given Up and Road Win as well as Home Win
RIGU_RW_CT = pd.crosstab(df['Road INTs Given Up'], df['Road Win'])
print(RIGU_RW_CT)
chi, p, d_f, e_f = stats.chi2_contingency(RIGU_RW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

RIGU_HW_CT = pd.crosstab(df['Road INTs Given Up'], df['Home Win'])
print(RIGU_HW_CT)
chi, p, d_f, e_f = stats.chi2_contingency(RIGU_HW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

Road Win            False  True 
Road INTs Given Up              
0                     122    188
1                     146     83
2                      86     21
3                      26      3
4                       8      0
5                       2      0
6                       1      0
chi-squared: 88.407798681
p-value: 6.48671361548e-17
Home Win            False  True 
Road INTs Given Up              
0                     189    121
1                      83    146
2                      22     85
3                       3     26
4                       0      8
5                       0      2
6                       0      1
chi-squared: 87.8242321727
p-value: 8.57279795263e-17


** Because of how low the p-values are, we can reject the null hypothesis and say Road INTs Given Up does have an impact on Road and Home Teams' chances of winning.**

In [29]:
# Running a chi-squared test for Road Interceptions Given Up and Road Win as well as Home Win with Filters
RIGU_RW_CT_HMD = pd.crosstab(df['Road INTs Given Up'] > df['Road INTs Given Up'].median(), df['Road Win'])
print(RIGU_RW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(RIGU_RW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

RIGU_RW_CT_LMD = pd.crosstab(df['Road INTs Given Up'] < df['Road INTs Given Up'].median(), df['Road Win'])
print(RIGU_RW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(RIGU_RW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

RIGU_HW_CT_HMD = pd.crosstab(df['Road INTs Given Up'] > df['Road INTs Given Up'].median(), df['Home Win'])
print(RIGU_HW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(RIGU_HW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

RIGU_HW_CT_LMD = pd.crosstab(df['Road INTs Given Up'] < df['Road INTs Given Up'].median(), df['Home Win'])
print(RIGU_HW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(RIGU_HW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

Road Win            False  True 
Road INTs Given Up              
False                 268    271
True                  123     24
chi-squared for higher than median: 52.9431741666
p-value for higher than median: 3.43339107785e-13
Road Win            False  True 
Road INTs Given Up              
False                 269    107
True                  122    188
chi-squared for lower than median: 70.5143907561
p-value for lower than median: 4.56921179914e-17
Home Win            False  True 
Road INTs Given Up              
False                 272    267
True                   25    122
chi-squared for higher than median: 51.3081834942
p-value for higher than median: 7.8945851331e-13
Home Win            False  True 
Road INTs Given Up              
False                 108    268
True                  189    121
chi-squared for lower than median: 70.6498419603
p-value for lower than median: 4.26601880014e-17


** When filtering Road INTs Given Up by its median, we still see low p-values that further emphasize how the stat does impact Road and Home Teams' chances of winning.**

In [30]:
# Running a t-test for Road Interceptions Given Up and Road Win as well as Home Win
print('Average amount of Road Interceptions Given Up in Road Win: ' + str(df.loc[df['Road Win'], 'Road INTs Given Up'].mean()))
print('Average amount of Road Interceptions Given Up in Road Loss: ' + str(df.loc[df['Road Loss'], 'Road INTs Given Up'].mean()))

RIGU_RW_t_stat, RIGU_RW_p = stats.ttest_ind(df.loc[df['Road Win'], 'Road INTs Given Up'], df.loc[df['Road Loss'], 'Road INTs Given Up'], equal_var=False)
print('t-statistic: ' + str(RIGU_RW_t_stat))
print('p-value: ' + str(RIGU_RW_p))

print('Average amount of Road Interceptions Given Up in Home Win: ' + str(df.loc[df['Home Win'], 'Road INTs Given Up'].mean()))
print('Average amount of Road Interceptions Given Up in Home Loss: ' + str(df.loc[df['Home Loss'], 'Road INTs Given Up'].mean()))

RIGU_HW_t_stat, RIGU_HW_p = stats.ttest_ind(df.loc[df['Home Win'], 'Road INTs Given Up'], df.loc[df['Home Loss'], 'Road INTs Given Up'], equal_var=False)
print('t-statistic: ' + str(RIGU_HW_t_stat))
print('p-value: ' + str(RIGU_HW_p))

Average amount of Road Interceptions Given Up in Road Win: 0.454237288136
Average amount of Road Interceptions Given Up in Road Loss: 1.14175257732
t-statistic: -10.3567733931
p-value: 2.12777729225e-23
Average amount of Road Interceptions Given Up in Home Win: 1.13624678663
Average amount of Road Interceptions Given Up in Home Loss: 0.457627118644
t-statistic: 10.2075811303
p-value: 7.956909854e-23


** When running a t-test, we still find low p-values for Road INTs Given Up meaning it does impact Road and Home Teams' chances of winning.**

In [31]:
# Running a chi-squared test for Home Interceptions Given Up and Home Win as well as Road Win
HIGU_HW_CT = pd.crosstab(df['Home INTs Given Up'], df['Home Win'])
print(HIGU_HW_CT)
chi, p, d_f, e_f = stats.chi2_contingency(HIGU_HW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

HIGU_RW_CT = pd.crosstab(df['Home INTs Given Up'], df['Road Win'])
print(HIGU_RW_CT)
chi, p, d_f, e_f = stats.chi2_contingency(HIGU_RW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

Home Win            False  True 
Home INTs Given Up              
0                      90    223
1                     112    126
2                      60     37
3                      28      3
4                       4      0
5                       3      0
chi-squared: 79.0361329916
p-value: 1.33492559869e-15
Road Win            False  True 
Home INTs Given Up              
0                     223     90
1                     128    110
2                      37     60
3                       3     28
4                       0      4
5                       0      3
chi-squared: 78.5954047829
p-value: 1.65047294309e-15


** Because of how low the p-values are, we can reject the null hypothesis and say Home INTs Given Up does have an impact on Road and Home Teams' chances of winning.**

In [32]:
# Running a chi-squared test for Home Interceptions Given Up and Home Win as well as Road Win with Filters
HIGU_HW_CT_HMD = pd.crosstab(df['Home INTs Given Up'] > df['Home INTs Given Up'].median(), df['Home Win'])
print(HIGU_HW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(HIGU_HW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

HIGU_HW_CT_LMD = pd.crosstab(df['Home INTs Given Up'] < df['Home INTs Given Up'].median(), df['Home Win'])
print(HIGU_HW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(HIGU_HW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

HIGU_RW_CT_HMD = pd.crosstab(df['Home INTs Given Up'] > df['Home INTs Given Up'].median(), df['Road Win'])
print(HIGU_RW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(HIGU_RW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

HIGU_RW_CT_LMD = pd.crosstab(df['Home INTs Given Up'] < df['Home INTs Given Up'].median(), df['Road Win'])
print(HIGU_RW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(HIGU_RW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

Home Win            False  True 
Home INTs Given Up              
False                 202    349
True                   95     40
chi-squared for higher than median: 48.8260029885
p-value for higher than median: 2.79706794002e-12
Home Win            False  True 
Home INTs Given Up              
False                 207    166
True                   90    223
chi-squared for lower than median: 48.4912323603
p-value for lower than median: 3.31768848974e-12
Road Win            False  True 
Home INTs Given Up              
False                 351    200
True                   40     95
chi-squared for higher than median: 49.9792203033
p-value for higher than median: 1.55382816728e-12
Road Win            False  True 
Home INTs Given Up              
False                 168    205
True                  223     90
chi-squared for lower than median: 46.6208681453
p-value for lower than median: 8.61380475515e-12


** When filtering Home INTs Given Up by its median, we still see low p-values that further emphasize how the stat does impact Road and Home Teams' chances of winning.**

In [33]:
# Running a t-test for Home Interceptions Given Up and Home Win as well as Road Win
print('Average amount of Home Interceptions Given Up in Home Win: ' + str(df.loc[df['Home Win'], 'Home INTs Given Up'].mean()))
print('Average amount of Home Interceptions Given Up in Home Loss: ' + str(df.loc[df['Home Loss'], 'Home INTs Given Up'].mean()))

HIGU_HW_t_stat, HIGU_HW_p = stats.ttest_ind(df.loc[df['Home Win'], 'Home INTs Given Up'], df.loc[df['Home Loss'], 'Home INTs Given Up'], equal_var=False)
print('t-statistic: ' + str(HIGU_HW_t_stat))
print('p-value: ' + str(HIGU_HW_p))

print('Average amount of Home Interceptions Given Up in Road Win: ' + str(df.loc[df['Road Win'], 'Home INTs Given Up'].mean()))
print('Average amount of Home Interceptions Given Up in Road Loss: ' + str(df.loc[df['Road Loss'], 'Home INTs Given Up'].mean()))

HIGU_RW_t_stat, HIGU_RW_p = stats.ttest_ind(df.loc[df['Road Win'], 'Home INTs Given Up'], df.loc[df['Road Loss'], 'Home INTs Given Up'], equal_var=False)
print('t-statistic: ' + str(HIGU_RW_t_stat))
print('p-value: ' + str(HIGU_RW_p))

Average amount of Home Interceptions Given Up in Home Win: 0.537275064267
Average amount of Home Interceptions Given Up in Home Loss: 1.17288135593
t-statistic: -8.87723695429
p-value: 1.39537123409e-17
Average amount of Home Interceptions Given Up in Road Win: 1.16949152542
Average amount of Home Interceptions Given Up in Road Loss: 0.538659793814
t-statistic: 8.79340658064
p-value: 2.66006294187e-17


** When running a t-test, we still find low p-values for Home INTs Given Up meaning it does impact Road and Home Teams' chances of winning.**

In [34]:
# Running a Pearson Correlation test between Road Interceptions Given Up and Home Interceptions Given Up
r, p = stats.pearsonr(df['Road INTs Given Up'], df['Home INTs Given Up'])
print('r: ' + str(r))
print('p-value: ' + str(p))

r: -0.125220129564
p-value: 0.00101376165285


** Because of how low the p-value is, we can say the relationship between Road INTs Given Up and Home INTs Given Up is statistically significant and also they have a negative correlation around -0.13. Since an increase in Road INTs Given Up is predictive of Home INTs Given Up, this strengthens the idea of a sloppy game.**

In [35]:
# Running a chi-squared test for Road Fumbles Lost and Road Win as well as Home Win
RFL_RW_CT = pd.crosstab(df['Road Fumbles Lost'], df['Road Win'])
print(RFL_RW_CT)
chi, p, d_f, e_f = stats.chi2_contingency(RFL_RW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

RFL_HW_CT = pd.crosstab(df['Road Fumbles Lost'], df['Home Win'])
print(RFL_HW_CT)
chi, p, d_f, e_f = stats.chi2_contingency(RFL_HW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

Road Win           False  True 
Road Fumbles Lost              
0                    202    199
1                    125     79
2                     45     15
3                     15      2
4                      4      0
chi-squared: 26.4191511677
p-value: 2.60456573094e-05
Home Win           False  True 
Road Fumbles Lost              
0                    201    200
1                     79    125
2                     15     45
3                      2     15
4                      0      4
chi-squared: 27.4721324576
p-value: 1.59545220479e-05


** Because of how low the p-values are, we can reject the null hypothesis and say Road Fumbles Lost does have an impact on Road and Home Teams' chances of winning.**

In [36]:
# Running a chi-squared test for Road Fumbles Lost and Road Win as well as Home Win with Filters
RFL_RW_CT_HMD = pd.crosstab(df['Road Fumbles Lost'] > df['Road Fumbles Lost'].median(), df['Road Win'])
print(RFL_RW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(RFL_RW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

RFL_RW_CT_LMD = pd.crosstab(df['Road Fumbles Lost'] < df['Road Fumbles Lost'].median(), df['Road Win'])
print(RFL_RW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(RFL_RW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

RFL_HW_CT_HMD = pd.crosstab(df['Road Fumbles Lost'] > df['Road Fumbles Lost'].median(), df['Home Win'])
print(RFL_HW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(RFL_HW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

RFL_HW_CT_LMD = pd.crosstab(df['Road Fumbles Lost'] < df['Road Fumbles Lost'].median(), df['Home Win'])
print(RFL_HW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(RFL_HW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

Road Win           False  True 
Road Fumbles Lost              
False                202    199
True                 189     96
chi-squared for higher than median: 16.6294084935
p-value for higher than median: 4.54409471309e-05
Road Win           False  True 
Road Fumbles Lost              
False                391    295
chi-squared for lower than median: 0.0
p-value for lower than median: 1.0
Home Win           False  True 
Road Fumbles Lost              
False                201    200
True                  96    189
chi-squared for higher than median: 17.6780045376
p-value for higher than median: 2.61634588443e-05
Home Win           False  True 
Road Fumbles Lost              
False                297    389
chi-squared for lower than median: 0.0
p-value for lower than median: 1.0


** When filtering Road Fumbles Lost by its median, we still see low p-values above the median but high p-values beneath the median. This may be because of how often teams lose a fumble. If the Road Team does not lose a fumble or only lose 1, Road Fumbles Lost may not actually have an impact on Road and Home Teams' chances of winning.**

In [37]:
# Running a t-test for Road Fumbles Lost and Road Win as well as Home Win
print('Average amount of Road Fumbles Lost in Road Win: ' + str(df.loc[df['Road Win'], 'Road Fumbles Lost'].mean()))
print('Average amount of Road Fumbles Lost in Road Loss: ' + str(df.loc[df['Road Loss'], 'Road Fumbles Lost'].mean()))

RFL_RW_t_stat, RFL_RW_p = stats.ttest_ind(df.loc[df['Road Win'], 'Road Fumbles Lost'], df.loc[df['Road Loss'], 'Road Fumbles Lost'], equal_var=False)
print('t-statistic: ' + str(RFL_RW_t_stat))
print('p-value: ' + str(RFL_RW_p))

print('Average amount of Road Fumbles Lost in Home Win: ' + str(df.loc[df['Home Win'], 'Road Fumbles Lost'].mean()))
print('Average amount of Road Fumbles Lost in Home Loss: ' + str(df.loc[df['Home Loss'], 'Road Fumbles Lost'].mean()))

RFL_HW_t_stat, RFL_HW_p = stats.ttest_ind(df.loc[df['Home Win'], 'Road Fumbles Lost'], df.loc[df['Home Loss'], 'Road Fumbles Lost'], equal_var=False)
print('t-statistic: ' + str(RFL_HW_t_stat))
print('p-value: ' + str(RFL_HW_p))

Average amount of Road Fumbles Lost in Road Win: 0.389830508475
Average amount of Road Fumbles Lost in Road Loss: 0.711340206186
t-statistic: -5.56121875227
p-value: 3.86158653215e-08
Average amount of Road Fumbles Lost in Home Win: 0.709511568123
Average amount of Road Fumbles Lost in Home Loss: 0.389830508475
t-statistic: 5.53554633057
p-value: 4.44056752728e-08


** When running a t-test, we still find low p-values for Road Fumbles Lost meaning it does impact Road and Home Teams' chances of winning.**

In [38]:
# Running a chi-squared test for Home Fumbles Lost and Home Win as well as Road Win
HFL_HW_CT = pd.crosstab(df['Home Fumbles Lost'], df['Home Win'])
print(HFL_HW_CT)
chi, p, d_f, e_f = stats.chi2_contingency(HFL_HW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

HFL_RW_CT = pd.crosstab(df['Home Fumbles Lost'], df['Road Win'])
print(HFL_RW_CT)
chi, p, d_f, e_f = stats.chi2_contingency(HFL_RW_CT)
print('chi-squared: ' + str(chi))
print('p-value: ' + str(p))

Home Win           False  True 
Home Fumbles Lost              
0                    141    259
1                    110    101
2                     33     24
3                     13      5
chi-squared: 28.3420537918
p-value: 3.07851315177e-06
Road Win           False  True 
Home Fumbles Lost              
0                    260    140
1                    102    109
2                     24     33
3                      5     13
chi-squared: 28.3292237059
p-value: 3.09766898508e-06


** Because of how low the p-values are, we can reject the null hypothesis and say Home Fumbles Lost does have an impact on Road and Home Teams' chances of winning.**

In [39]:
# Running a chi-squared test for Home Fumbles Lost and Home Win as well as Road Win with Filters
HFL_HW_CT_HMD = pd.crosstab(df['Home Fumbles Lost'] > df['Home Fumbles Lost'].median(), df['Home Win'])
print(HFL_HW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(HFL_HW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

HFL_HW_CT_LMD = pd.crosstab(df['Home Fumbles Lost'] < df['Home Fumbles Lost'].median(), df['Home Win'])
print(HFL_HW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(HFL_HW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

HFL_RW_CT_HMD = pd.crosstab(df['Home Fumbles Lost'] > df['Home Fumbles Lost'].median(), df['Road Win'])
print(HFL_RW_CT_HMD)
chi, p, d_f, e_f = stats.chi2_contingency(HFL_RW_CT_HMD)
print('chi-squared for higher than median: ' + str(chi))
print('p-value for higher than median: ' + str(p))

HFL_RW_CT_LMD = pd.crosstab(df['Home Fumbles Lost'] < df['Home Fumbles Lost'].median(), df['Road Win'])
print(HFL_RW_CT_LMD)
chi, p, d_f, e_f = stats.chi2_contingency(HFL_RW_CT_LMD)
print('chi-squared for lower than median: ' + str(chi))
print('p-value for lower than median: ' + str(p))

Home Win           False  True 
Home Fumbles Lost              
False                141    259
True                 156    130
chi-squared for higher than median: 24.5104601275
p-value for higher than median: 7.39075140189e-07
Home Win           False  True 
Home Fumbles Lost              
False                297    389
chi-squared for lower than median: 0.0
p-value for lower than median: 1.0
Road Win           False  True 
Home Fumbles Lost              
False                260    140
True                 131    155
chi-squared for higher than median: 24.2935041526
p-value for higher than median: 8.27175457717e-07
Road Win           False  True 
Home Fumbles Lost              
False                391    295
chi-squared for lower than median: 0.0
p-value for lower than median: 1.0


** When filtering Home Fumbles Lost by its median, we still see low p-values above the median but high p-values beneath the median. This may be because of how often teams lose a fumble. If the Home Team does not lose a fumble or only lose 1, Home Fumbles Lost may not actually have an impact on Road and Home Teams' chances of winning.**

In [40]:
# Running a t-test for Home Fumbles Lost and Home Win as well as Road Win
print('Average amount of Home Fumbles Lost in Home Win: ' + str(df.loc[df['Home Win'], 'Home Fumbles Lost'].mean()))
print('Average amount of Home Fumbles Lost in Home Loss: ' + str(df.loc[df['Home Loss'], 'Home Fumbles Lost'].mean()))

HFL_HW_t_stat, HFL_HW_p = stats.ttest_ind(df.loc[df['Home Win'], 'Home Fumbles Lost'], df.loc[df['Home Loss'], 'Home Fumbles Lost'], equal_var=False)
print('t-statistic: ' + str(HFL_HW_t_stat))
print('p-value: ' + str(HFL_HW_p))

print('Average amount of Home Fumbles Lost in Road Win: ' + str(df.loc[df['Road Win'], 'Home Fumbles Lost'].mean()))
print('Average amount of Home Fumbles Lost in Road Loss: ' + str(df.loc[df['Road Loss'], 'Home Fumbles Lost'].mean()))

HFL_RW_t_stat, HFL_RW_p = stats.ttest_ind(df.loc[df['Road Win'], 'Home Fumbles Lost'], df.loc[df['Road Loss'], 'Home Fumbles Lost'], equal_var=False)
print('t-statistic: ' + str(HFL_RW_t_stat))
print('p-value: ' + str(HFL_RW_p))

Average amount of Home Fumbles Lost in Home Win: 0.421593830334
Average amount of Home Fumbles Lost in Home Loss: 0.725423728814
t-statistic: -5.14810888421
p-value: 3.66467571815e-07
Average amount of Home Fumbles Lost in Road Win: 0.725423728814
Average amount of Home Fumbles Lost in Road Loss: 0.420103092784
t-statistic: 5.17063359721
p-value: 3.26659517426e-07


** When running a t-test, we still find low p-values for Home Fumbles Lost meaning it does impact Road and Home Teams' chances of winning.**

In [41]:
# Running a Pearson Correlation test between Road Fumbles Lost and Home Fumbles Lost
r, p = stats.pearsonr(df['Road Fumbles Lost'], df['Home Fumbles Lost'])
print('r: ' + str(r))
print('p-value: ' + str(p))

r: -0.0579855201606
p-value: 0.129205917656


** Because of how high the p-value is, we can say the relationship between Road Fumbles Lost and Home Fumbles Lost is not statistically significant and also that the correlation between them is not equal to 0. Since an increase in Road Fumbles Lost is not predictive of Home Fumbles Lost, this negates the idea of a sloppy game.**