In [1]:
import statsmodels.formula.api as smf
import numpy as np
import pandas as pd

In [2]:
nfl = pd.read_csv("data/nfl.csv")
nfl.head()

Unnamed: 0,third_per,third_per_allowed,TOP,date,first_downs,first_downs_allowed,ha,margin,opp,pass_yards,...,rush_yards,rush_yards_allowed,sacked,sacks,takeaways,team,total_points,total_yards,total_yards_allowed,turnovers
0,0.307692,0.285714,28.35,9/10/2009,18,19,away,-3,PIT,234,...,86,36,1,4,3,TEN,23,320,357,2
1,0.285714,0.307692,36.183333,9/10/2009,19,18,home,3,TEN,321,...,36,86,4,1,2,PIT,23,357,320,3
2,0.363636,0.4,29.116667,9/13/2009,16,19,away,-12,ATL,163,...,96,68,4,2,0,MIA,26,259,281,4
3,0.4,0.363636,30.883333,9/13/2009,19,16,home,12,MIA,213,...,68,96,2,4,4,ATL,26,281,259,0
4,0.25,0.333333,26.55,9/13/2009,10,16,away,5,CIN,227,...,75,86,3,3,2,DEN,19,302,307,0


In [3]:
nfl.columns

Index(['third_per', 'third_per_allowed', 'TOP', 'date', 'first_downs',
       'first_downs_allowed', 'ha', 'margin', 'opp', 'pass_yards',
       'pass_yards_allowed', 'penalty_yards', 'plays', 'points',
       'points_allowed', 'result', 'rush_yards', 'rush_yards_allowed',
       'sacked', 'sacks', 'takeaways', 'team', 'total_points', 'total_yards',
       'total_yards_allowed', 'turnovers'],
      dtype='object')

In [4]:
teams = nfl['team'].unique().tolist()
teams.remove("ARI")
teams  

['TEN',
 'PIT',
 'MIA',
 'ATL',
 'DEN',
 'CIN',
 'MIN',
 'CLE',
 'JAX',
 'IND',
 'DET',
 'NOS',
 'DAL',
 'TBB',
 'PHI',
 'CAR',
 'KCC',
 'BAL',
 'NYJ',
 'HOU',
 'WAS',
 'NYG',
 'SFO',
 'LAR',
 'SEA',
 'CHI',
 'GBP',
 'BUF',
 'NEP',
 'LAC',
 'OAK']

In [5]:
margin_model = smf.ols(formula='margin ~ third_per + third_per_allowed + TOP + first_downs + first_downs_allowed + ha + opp + pass_yards + pass_yards_allowed + penalty_yards + plays + rush_yards + rush_yards_allowed + sacked + sacks + takeaways + team + total_yards + total_yards_allowed + turnovers', data=nfl)
margin_res = margin_model.fit()
print(margin_res.summary())

                            OLS Regression Results                            
Dep. Variable:                 margin   R-squared:                       0.797
Model:                            OLS   Adj. R-squared:                  0.793
Method:                 Least Squares   F-statistic:                     237.5
Date:                Wed, 04 Jul 2018   Prob (F-statistic):               0.00
Time:                        12:38:41   Log-Likelihood:                -16027.
No. Observations:                4806   AIC:                         3.221e+04
Df Residuals:                    4727   BIC:                         3.272e+04
Df Model:                          78                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
Intercept              29.4678    

In [6]:
margin_res.save("models/margin_res.pickle")
# from statsmodels.iolib.smpickle import load_pickle
# margin_res = load_pickle("margin_res.pickle")
# print (margin_model.params)

In [7]:
print(margin_res.params)

Intercept              29.467828
ha[T.home]              1.621145
opp[T.ATL]             -1.028320
opp[T.BAL]             -0.393423
opp[T.BUF]              0.591558
opp[T.CAR]              1.189014
opp[T.CHI]             -0.146005
opp[T.CIN]             -0.764340
opp[T.CLE]              1.392185
opp[T.DAL]             -1.369142
opp[T.DEN]              0.528161
opp[T.DET]              0.039376
opp[T.GBP]             -1.717869
opp[T.HOU]              2.008829
opp[T.IND]             -0.014446
opp[T.JAX]              1.962689
opp[T.KCC]             -0.243586
opp[T.LAC]             -1.289141
opp[T.LAR]              1.455431
opp[T.MIA]             -0.429457
opp[T.MIN]              0.112341
opp[T.NEP]             -3.395059
opp[T.NOS]             -1.199851
opp[T.NYG]              1.184450
opp[T.NYJ]              1.041245
opp[T.OAK]              1.951368
opp[T.PHI]              0.489943
opp[T.PIT]             -1.808736
opp[T.SEA]             -1.131559
opp[T.SFO]              0.071624
          

In [8]:
margin_ari_score = 0
margin_ari_opp = 0
# for tm in teams:
#     r_team = "team[T." + tm + "]"
#     o_team = "opp[T." + tm + "]"
#     team_score = margin_res.params[r_team]
#     opp_score = margin_res.params[o_team]
#     ari = ari + team_score
#     opp_ari = opp_ari + opp_score   
# margin_ari_score = -1*ari/len(teams)
# margin_ari_opp = -1*opp_ari/len(teams)
# print (margin_ari_score, margin_ari_opp)

In [9]:
nfl['pred_margin'] = ""

In [10]:
margins = []
for key, row in nfl.iterrows():
    if row.team == "ARI":
        team_coeff = margin_ari_score
    else:
        res_team = "team[T." + row.team + "]"
        team_coeff = margin_res.params[res_team]
    if row.opp == "ARI":
        opp_coeff = margin_ari_opp
    else:
        res_opp = "team[T." + row.opp + "]"
        opp_coeff = margin_res.params[res_opp]
#     print(team_coeff, opp_coeff)
    if row.ha == "away":
        ha_coeff = margin_res.params["ha[T.home]"]*-1
    else:
        ha_coeff = margin_res.params["ha[T.home]"]*1
#     print(ha_coeff)
    margin_predict = margin_res.params.Intercept + margin_res.params.third_per*row['third_per'] + margin_res.params.third_per_allowed*row['third_per_allowed'] + margin_res.params.TOP*row['TOP'] + margin_res.params.first_downs*row['first_downs'] + margin_res.params.first_downs_allowed*row['first_downs_allowed'] + margin_res.params.pass_yards*row['pass_yards'] + margin_res.params.pass_yards_allowed*row['pass_yards_allowed'] + margin_res.params.penalty_yards*row['penalty_yards'] + margin_res.params.plays*row['plays'] + margin_res.params.rush_yards*row['rush_yards'] + margin_res.params.rush_yards_allowed*row['rush_yards_allowed'] + margin_res.params.sacked*row['sacked'] + margin_res.params.sacks*row['sacks'] + margin_res.params.takeaways*row['takeaways'] + margin_res.params.total_yards*row['total_yards'] + margin_res.params.total_yards_allowed*row['total_yards_allowed'] + margin_res.params.turnovers*row['turnovers'] + ha_coeff + team_coeff + opp_coeff
    print(key, margin_predict)
    margins.append(margin_predict)

0 5.57409000657
1 -8.75990024068
2 -15.872680984
3 19.0922193639
4 8.20550930671
5 -0.448412770376
6 13.7439822137
7 -15.1390540149
8 -4.22856741461
9 -1.5283322039
10 -26.9934325471
11 28.3828516423
12 -3.58876982198
13 1.62484950992
14 26.5903837129
15 -33.8984583351
16 -15.9347767209
17 19.3160844345
18 14.7594440169
19 -21.5781409893
20 -5.32720201518
21 5.96583809256
22 -1.99970259548
23 -0.934640086289
24 -15.9297802151
25 13.7325210061
26 -11.4786431301
27 11.1792585157
28 -6.57292381997
29 14.5613157046
30 -6.42176263806
31 0.689077979695
32 -1.7039162273
33 -3.50651316121
34 6.65515390304
35 -6.53674651622
36 1.91976419472
37 -0.922514497298
38 -5.61869435628
39 -3.62832492411
40 -9.14548409676
41 1.13915653432
42 0.897363795028
43 1.99157578847
44 13.2359238202
45 -11.6880142675
46 -7.68781059346
47 -1.10083963442
48 7.59060306518
49 -8.47339100927
50 -11.3300133436
51 10.712204479
52 -19.9121447215
53 8.92218998774
54 -5.0792408335
55 4.06316907799
56 3.12610480699
57 -0.585

474 9.4264897979
475 -8.59670695509
476 9.83018626474
477 -18.1773053366
478 -6.98022292042
479 0.169578714678
480 -27.1742862736
481 27.8561146361
482 -6.93005308758
483 1.11346317944
484 10.26988233
485 -12.6285738226
486 2.22391901709
487 -9.06928090754
488 -19.6249561391
489 15.9742842908
490 -9.7321187285
491 19.3685387663
492 14.0934298861
493 -23.2440718519
494 5.86828406311
495 -4.09769639128
496 -26.8185853211
497 32.8527735121
498 -16.8922384828
499 17.4274236911
500 16.4304365273
501 -8.00787578554
502 -2.58122671604
503 -5.05202193012
504 -5.89135296024
505 -0.269889098064
506 9.06697369543
507 -5.5400052064
508 7.23858965183
509 -7.75106562054
510 -34.2248505987
511 35.6454125311
512 9.56735879545
513 -8.16081798258
514 -28.083933146
515 25.6103216394
516 25.5932005221
517 -17.2071866312
518 -12.9200000898
519 26.4345652566
520 -16.8475527837
521 25.777858964
522 -11.4404127137
523 14.154015992
524 -21.1675286761
525 19.3764470759
526 -1.50028528898
527 -4.04868667456
528 

970 -12.9797354697
971 7.66496978509
972 7.22104146725
973 -11.0022985021
974 -11.0813097673
975 8.84299890961
976 -8.51913393122
977 4.63472977196
978 -1.4185626294
979 12.0320973914
980 16.1408567233
981 -16.1179449601
982 -18.9570258432
983 17.0009021818
984 -8.24075380814
985 6.87538133285
986 -13.2153712718
987 7.80819396811
988 32.0611998413
989 -25.8814222612
990 9.47784339321
991 -3.32403986237
992 1.36203823578
993 -1.34232787181
994 -7.00336236856
995 -6.84457924229
996 -18.0676254473
997 8.20280355355
998 1.75235221084
999 -5.08421708226
1000 1.64103085437
1001 -10.2108534288
1002 -2.24596926788
1003 12.2980335749
1004 -6.58084258276
1005 9.78215640817
1006 -20.6253356841
1007 24.7128081698
1008 -25.1862680767
1009 29.0962151937
1010 3.59909379063
1011 -3.57447215541
1012 13.9983447697
1013 -13.2322338879
1014 -17.3979598448
1015 10.5809706895
1016 2.12046319784
1017 -0.812359113335
1018 -9.7641153491
1019 7.34063614543
1020 -36.3763655884
1021 37.7978807668
1022 -23.4257040

1462 3.76772661278
1463 1.0386749021
1464 -17.9924119901
1465 15.9602591351
1466 3.30648441719
1467 -8.68880468536
1468 -13.9934356394
1469 18.5327918729
1470 8.98719147859
1471 -11.9267351131
1472 -6.98012464346
1473 -2.70528312726
1474 2.40468900852
1475 -0.416957979959
1476 -19.3452817971
1477 18.1250209869
1478 -15.3649657381
1479 14.9766373309
1480 -4.26696126874
1481 -0.289512950957
1482 -11.0823595025
1483 7.10535119612
1484 -23.5834710393
1485 26.1080746507
1486 14.3057007597
1487 -14.6861690375
1488 18.0374578423
1489 -20.0061318741
1490 22.9690392329
1491 -26.0507586643
1492 1.06192816407
1493 -8.15287753827
1494 9.11606882245
1495 -7.75026003827
1496 -8.40216723061
1497 7.58961538724
1498 -13.5401433672
1499 6.37321485316
1500 13.066618531
1501 -17.7963115573
1502 -3.56532782216
1503 -1.22896519332
1504 -3.26149908281
1505 -0.907962807181
1506 -7.34779781083
1507 1.19864879228
1508 -17.3546023022
1509 21.228795256
1510 15.5533905839
1511 -2.34252677686
1512 -16.7159400606
15

1964 -8.17839285316
1965 -7.81322277567
1966 15.067926208
1967 -29.6360099265
1968 -4.83810192435
1969 0.129825383284
1970 4.49272959251
1971 -2.7377578864
1972 -5.60199819172
1973 3.24541573286
1974 -9.82531510453
1975 5.2283386942
1976 -0.624827347023
1977 -2.46469524979
1978 -8.75307971615
1979 2.78931285518
1980 1.63280534682
1981 5.05016794396
1982 -0.963714956797
1983 2.87762080674
1984 -3.86241049654
1985 -1.34746926952
1986 11.2334752224
1987 -9.36057599063
1988 -7.69866036498
1989 -0.485658106594
1990 2.80061432844
1991 -10.5977064453
1992 2.16455303627
1993 -7.06078136116
1994 -4.62024011146
1995 1.59331371853
1996 -3.46126664281
1997 -9.87223080839
1998 4.83988220701
1999 -5.79865271968
2000 -9.97181395011
2001 11.0414800294
2002 -10.7252091591
2003 1.66779331489
2004 -4.51793331788
2005 3.59860859278
2006 -16.442232325
2007 16.2210987527
2008 -3.90413790548
2009 11.5330640524
2010 -10.7771887965
2011 10.7915992768
2012 -49.8612504698
2013 49.9918004711
2014 -0.84102764523
2

2462 -11.127504962
2463 -2.22488992791
2464 1.46652644932
2465 -3.99930211815
2466 -3.23402158073
2467 -6.42799325108
2468 13.9374930737
2469 -18.2246338623
2470 -15.7302895058
2471 11.9277621575
2472 9.24695893095
2473 -7.8078820646
2474 -9.61645489119
2475 16.7560336641
2476 1.95495508715
2477 -12.2816698455
2478 1.49832228542
2479 -11.3174063616
2480 -16.2093941447
2481 12.3326234944
2482 -5.03498003152
2483 3.08181833924
2484 -7.00645101308
2485 -2.15239168774
2486 5.90897757228
2487 -9.43517679788
2488 -28.8111877173
2489 34.2112822595
2490 -10.9820751611
2491 6.9316429275
2492 2.86176761547
2493 -3.00127017555
2494 -7.17670281946
2495 0.0643893934401
2496 21.1374641073
2497 -24.8471825813
2498 -19.1662738778
2499 20.9619326958
2500 -9.71926606271
2501 8.75823531364
2502 -4.03385923435
2503 -7.19285459136
2504 -6.58484846967
2505 -2.74608393171
2506 6.70756405095
2507 2.84893571365
2508 -8.29212800247
2509 -3.13235918289
2510 -8.36501400741
2511 0.175686172825
2512 -0.633595313436

2965 2.95461115154
2966 -13.5526053396
2967 11.992723512
2968 3.60902626516
2969 -21.5269092084
2970 7.28573431317
2971 1.91046837029
2972 -5.46930609686
2973 -0.771656673028
2974 9.99448132838
2975 -7.70349091315
2976 5.87425632412
2977 -12.1352687053
2978 -9.0434820221
2979 5.59529195938
2980 14.2866170489
2981 -22.5485424414
2982 -14.2877699865
2983 6.95247051968
2984 -30.3028609289
2985 28.519416991
2986 -9.06920987866
2987 11.0661249566
2988 19.474542902
2989 -11.2477825041
2990 3.73414631103
2991 4.21997416735
2992 -11.3954111121
2993 2.46826143807
2994 3.95559529874
2995 -6.54874219463
2996 -23.1729282897
2997 13.2961428721
2998 -13.0374180484
2999 11.5281879348
3000 9.58699373286
3001 -12.3109551292
3002 -13.2162652267
3003 9.68862028546
3004 1.90012064565
3005 -13.5657407452
3006 -17.5535620168
3007 3.69714281606
3008 -2.98865563983
3009 11.3093945728
3010 -6.98815986493
3011 1.90579786222
3012 -8.40116465833
3013 9.170019535
3014 -2.06774862268
3015 2.05389948274
3016 -7.3893

3721 -1.0404345292
3722 1.89805440871
3723 -8.51410194282
3724 -5.89823833816
3725 7.13052740667
3726 4.10807591243
3727 -4.8646127835
3728 -10.2704872367
3729 8.03472501186
3730 -2.59569418631
3731 3.14530303621
3732 -7.52540861873
3733 6.15038319887
3734 -37.2686949781
3735 36.8455982752
3736 -8.12697263908
3737 2.78112719411
3738 1.00727982079
3739 -4.95117457901
3740 -0.13517775857
3741 -2.95103201854
3742 3.62736802355
3743 -10.1226717339
3744 -7.57369785301
3745 12.2727720681
3746 -3.26170300754
3747 7.91035736222
3748 0.560430470016
3749 -1.25858258431
3750 4.29124653975
3751 -7.83101782219
3752 -1.92548332298
3753 7.01620347607
3754 -13.8380235015
3755 2.39763616993
3756 6.38507181454
3757 -11.2178402303
3758 -2.36435643253
3759 -1.31966431525
3760 -9.31646653533
3761 1.01768041678
3762 1.65176650551
3763 -2.02698040793
3764 -6.30796550582
3765 10.8719971479
3766 6.11856195378
3767 -10.7198333353
3768 -30.0404078586
3769 17.5661897252
3770 8.69315553396
3771 -5.63400951001
3772

4212 -4.5250834816
4213 7.5976248091
4214 -27.9490425333
4215 22.3255634955
4216 -21.307443123
4217 22.7344840339
4218 -7.77892204077
4219 13.6863012502
4220 -15.4829426516
4221 2.90900356869
4222 -10.8448677579
4223 1.56603214729
4224 3.1071580251
4225 -4.75614598433
4226 14.7407291231
4227 -10.1364609371
4228 -22.5911526793
4229 25.2672833732
4230 -19.6947335835
4231 20.6229730784
4232 -15.3425439094
4233 12.5622133471
4234 -1.65569900361
4235 -3.14699905804
4236 -12.7154803504
4237 11.7611257678
4238 9.82603261658
4239 -15.8629797251
4240 19.2970155099
4241 -22.3993126436
4242 -21.0956380349
4243 18.5785881514
4244 9.73070917294
4245 -4.68800667674
4246 16.2239626189
4247 -11.4848321197
4248 6.64392227737
4249 -6.88534978982
4250 -31.2143206138
4251 15.4554906647
4252 -15.0506858955
4253 16.5757808408
4254 -13.8961875719
4255 18.9029658484
4256 -12.3900312326
4257 10.8904864989
4258 -11.9009707137
4259 19.5165050778
4260 -11.6154593054
4261 11.2438884219
4262 0.309671616795
4263 4.3

In [11]:
# print (len(margins))

In [12]:
avg_margins = []
count = 0
for x in np.arange(len(margins)/2):
    away_marg = float(margins[int(x*2)]) + float(margins[int((x*2)+1)])
    home_marg = -1*away_marg
    avg_margins.append(away_marg)
    avg_margins.append(home_marg)
nfl['pred_margin'] = avg_margins
    

In [13]:
nfl[['margin', "pred_margin"]]

Unnamed: 0,margin,pred_margin
0,-3,-3.185810
1,3,3.185810
2,-12,3.219538
3,12,-3.219538
4,5,7.757097
5,-5,-7.757097
6,14,-1.395072
7,-14,1.395072
8,-2,-5.756900
9,2,5.756900


In [14]:
count = 0
for g in np.arange(len(avg_margins)/2):
    m = avg_margins[int(g*2)]
#     print(m)
    if m < 0:
        count = count + 1
# print(count)
print(f"Percent of Home Favorites: {count/(len(avg_margins)/2)}")


Percent of Home Favorites: 0.6404494382022472


In [15]:
residuals = []
for key, row in nfl.iterrows():
    residual = row['pred_margin'] - row['margin']
    residuals.append(residual)
diffs = []
for r in np.arange(len(residuals)/2):
    diff = residuals[int(r*2)]
    diffs.append(diff)
# print(diffs)
print(np.average(diffs))
    
    

0.315598770133


In [16]:
total_model = smf.ols(formula='total_points ~ third_per + third_per_allowed + TOP + first_downs + first_downs_allowed + ha + opp + pass_yards + pass_yards_allowed + penalty_yards + plays + rush_yards + rush_yards_allowed + sacked + sacks + takeaways + team + total_yards + total_yards_allowed + turnovers', data=nfl)
total_res = total_model.fit()
print(total_res.summary())

                            OLS Regression Results                            
Dep. Variable:           total_points   R-squared:                       0.525
Model:                            OLS   Adj. R-squared:                  0.517
Method:                 Least Squares   F-statistic:                     66.86
Date:                Wed, 04 Jul 2018   Prob (F-statistic):               0.00
Time:                        12:38:58   Log-Likelihood:                -17680.
No. Observations:                4806   AIC:                         3.552e+04
Df Residuals:                    4727   BIC:                         3.603e+04
Df Model:                          78                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
Intercept             -15.5673    

In [17]:
# print(total_res.params)

In [18]:
total_ari_score = 0
total_ari_opp = 0
# for tm in teams:
#     r_team = "team[T." + tm + "]"
#     o_team = "opp[T." + tm + "]"
#     team_score = total_res.params[r_team]
#     opp_score = total_res.params[o_team]
#     ari = ari + team_score
#     opp_ari = opp_ari + opp_score   
# total_ari_score = -1*ari/len(teams)
# total_ari_opp = -1*opp_ari/len(teams)
# print (total_ari_score, total_ari_opp)

In [19]:
nfl['pred_total'] = ""

In [20]:
totals = []
for key, row in nfl.iterrows():
    if row.team == "ARI":
        team_coeff = total_ari_score
    else:
        res_team = "team[T." + row.team + "]"
        team_coeff = total_res.params[res_team]
    if row.opp == "ARI":
        opp_coeff = total_ari_opp
    else:
        res_opp = "team[T." + row.opp + "]"
        opp_coeff = total_res.params[res_opp]
#     print(team_coeff, opp_coeff)
    if row.ha == "away":
        ha_coeff = total_res.params["ha[T.home]"]*-1
    else:
        ha_coeff = total_res.params["ha[T.home]"]*1
#     print(ha_coeff)
    total_predict = total_res.params.Intercept + total_res.params.third_per*row['third_per'] + total_res.params.third_per_allowed*row['third_per_allowed'] + total_res.params.TOP*row['TOP'] + total_res.params.first_downs*row['first_downs'] + total_res.params.first_downs_allowed*row['first_downs_allowed'] + total_res.params.pass_yards*row['pass_yards'] + total_res.params.pass_yards_allowed*row['pass_yards_allowed'] + total_res.params.penalty_yards*row['penalty_yards'] + total_res.params.plays*row['plays'] + total_res.params.rush_yards*row['rush_yards'] + total_res.params.rush_yards_allowed*row['rush_yards_allowed'] + total_res.params.sacked*row['sacked'] + total_res.params.sacks*row['sacks'] + total_res.params.takeaways*row['takeaways'] + total_res.params.total_yards*row['total_yards'] + total_res.params.total_yards_allowed*row['total_yards_allowed'] + total_res.params.turnovers*row['turnovers'] + ha_coeff + team_coeff + opp_coeff
    print(key, total_predict)
    totals.append(total_predict)

0 44.7310420425
1 43.636985013
2 34.1121418598
3 33.4815715149
4 33.3614918318
5 32.6065539059
6 36.1499306886
7 36.2348906235
8 35.2973822699
9 34.9907092735
10 52.4932280493
11 53.8332926817
12 54.2025292796
13 52.6825524922
14 34.1925518946
15 31.7574199693
16 44.1406272642
17 43.5635291454
18 40.8936569364
19 41.6866396493
20 40.2210907058
21 38.1555699297
22 29.5325847355
23 30.196645784
24 44.12865805
25 44.4254651937
26 34.5923321872
27 36.0762264946
28 51.2782350832
29 48.8221842483
30 48.7394864138
31 47.3513055316
32 56.9536571236
33 56.4719136334
34 37.268268893
35 35.1952666994
36 47.3948051051
37 48.0591919515
38 52.5464394139
39 54.6335030465
40 35.3809008046
41 34.22600857
42 30.947629929
43 31.1704641738
44 62.0970265293
45 60.4102784749
46 37.2823488244
47 35.3257439645
48 51.6227963761
49 50.1015254575
50 38.6128562353
51 38.1716498581
52 52.4278130605
53 53.8510206578
54 39.646505794
55 39.0459364461
56 52.1034141378
57 50.5661368816
58 40.5760246346
59 41.4850189859

873 46.5576451871
874 50.394305489
875 50.6793103708
876 32.9811336152
877 32.7239803002
878 48.6073167888
879 49.7766365051
880 58.628109662
881 60.5089229773
882 35.7715576636
883 34.9096393863
884 36.7773030973
885 35.5163365064
886 62.5430482112
887 61.1589030964
888 35.5859382351
889 36.4012335608
890 42.1274349439
891 40.1172163686
892 44.0579168694
893 44.3909184709
894 37.5782228003
895 36.5098060845
896 40.900325488
897 40.871635724
898 46.6547701687
899 45.5138059332
900 24.2702194778
901 24.2143462924
902 49.4908923698
903 49.319723497
904 45.6700630947
905 46.9860638295
906 38.8474660194
907 37.5367311864
908 54.2592735337
909 55.4479974898
910 29.3931418078
911 30.8624324891
912 45.5303293671
913 47.5904656048
914 31.2345309163
915 31.8956612147
916 45.6248512924
917 47.8190877509
918 50.5856781846
919 50.8000867343
920 46.0954264384
921 46.1837369836
922 40.0151953082
923 39.4775685097
924 33.4999640674
925 33.0031255982
926 58.783478878
927 58.6478529403
928 32.222974326

1373 70.7254410352
1374 50.8653151446
1375 50.7999299883
1376 43.9348290825
1377 40.7951040216
1378 30.405981619
1379 30.2714227736
1380 47.8631935499
1381 46.704815835
1382 46.8069352899
1383 47.0213561272
1384 39.1852444209
1385 40.7046159949
1386 41.4673039472
1387 42.6050372581
1388 51.24802995
1389 50.8342964922
1390 39.05016231
1391 39.3760119893
1392 24.6696287095
1393 23.1801621143
1394 46.2488048327
1395 45.1824690367
1396 45.4582398771
1397 44.7251506937
1398 21.94761145
1399 21.9190555442
1400 37.0824784537
1401 37.3740542924
1402 45.4650413936
1403 46.466000182
1404 36.5411388606
1405 36.6575072402
1406 33.4978786368
1407 33.5285539746
1408 41.8379147896
1409 40.6642877814
1410 49.3036316727
1411 48.2203880041
1412 61.4711817389
1413 60.4738683841
1414 40.7827145361
1415 40.4019257021
1416 33.5174882821
1417 32.2146667533
1418 75.0968777838
1419 76.653034946
1420 44.0251043474
1421 43.539358673
1422 35.1426560625
1423 34.4552245978
1424 42.7092696419
1425 41.9457630734
1426

1873 41.3239857434
1874 57.4468281987
1875 57.2715127786
1876 39.4194256686
1877 39.2942435085
1878 56.2466607804
1879 57.8095326249
1880 69.854575384
1881 67.2934947694
1882 32.5387105257
1883 31.7630619298
1884 37.0480682383
1885 36.7292819423
1886 54.757682314
1887 56.5961943194
1888 40.1942669612
1889 39.4884457546
1890 23.3724519652
1891 22.3593287782
1892 25.0748762448
1893 24.4455372262
1894 30.0481854648
1895 29.0878779172
1896 35.5203747236
1897 34.4358515189
1898 43.5394587144
1899 45.0275967939
1900 49.5641728953
1901 50.056109479
1902 30.3111088434
1903 31.0391430947
1904 38.9449250198
1905 38.6582732662
1906 47.7477311004
1907 46.74210951
1908 76.569673821
1909 77.874970619
1910 35.7003884662
1911 38.2191368082
1912 54.8824371821
1913 53.2098923862
1914 63.3543029595
1915 65.764949909
1916 42.2976932017
1917 43.9053211944
1918 28.0329146269
1919 26.8654738556
1920 29.3023288633
1921 30.4903897953
1922 71.5581490068
1923 71.172240367
1924 60.2406913862
1925 58.0626796036
19

2372 47.9831317793
2373 48.8904690502
2374 29.4243637351
2375 27.4690544054
2376 53.4588076113
2377 53.762835431
2378 42.5606033184
2379 41.4574559567
2380 54.0981567492
2381 51.5551167079
2382 47.4712188536
2383 45.9127531543
2384 49.3354459762
2385 49.7151163241
2386 46.2766897564
2387 46.2108299319
2388 64.1688590241
2389 62.8055729941
2390 57.3515788591
2391 57.8262741535
2392 75.0861670699
2393 71.4589433953
2394 77.030247173
2395 79.1852285724
2396 37.126233318
2397 35.9185938454
2398 48.2310683749
2399 47.6081369787
2400 48.832271257
2401 49.0558142623
2402 53.4735410416
2403 55.1221469958
2404 55.5569211328
2405 52.90007881
2406 46.1766527104
2407 43.8861238698
2408 32.7819043813
2409 32.9667313622
2410 39.0578896155
2411 39.8869864001
2412 39.2644988527
2413 38.9700776347
2414 32.7791916474
2415 32.8486204045
2416 45.2787034953
2417 44.3818189655
2418 47.9501681094
2419 47.4779762844
2420 22.5789986376
2421 23.2021471446
2422 49.9629789416
2423 47.6127993891
2424 37.5598837494

2873 40.3404820676
2874 39.2525444809
2875 39.4585025517
2876 50.4181519639
2877 50.3228391122
2878 51.3476914941
2879 53.7081710714
2880 46.4317153336
2881 46.2054416155
2882 54.9398143278
2883 55.1067428197
2884 45.2063624405
2885 47.0184886076
2886 35.8528745354
2887 35.2682582362
2888 46.076852273
2889 45.9759262705
2890 35.647108001
2891 34.5852184398
2892 38.0513624709
2893 37.3174609213
2894 42.8592038456
2895 42.6703685078
2896 46.8753267039
2897 44.9598857425
2898 65.8839750535
2899 65.9308671271
2900 43.9510778389
2901 42.8422684541
2902 59.3436886354
2903 59.5030149458
2904 37.8952516016
2905 40.2370729897
2906 81.2982886272
2907 78.6029721604
2908 71.0122716274
2909 71.8399619875
2910 50.6693586069
2911 51.4776123927
2912 46.2317205905
2913 46.42724807
2914 43.3419226195
2915 42.807523281
2916 41.4994531465
2917 40.0678037461
2918 59.2948954201
2919 57.8075393585
2920 44.8099142097
2921 46.936419389
2922 52.5226828597
2923 52.2576272422
2924 40.1786358184
2925 40.470170565


3372 57.6623763424
3373 56.4662666559
3374 58.9601418934
3375 56.9380615648
3376 45.97114849
3377 44.9141762827
3378 61.3244914766
3379 63.0291081948
3380 49.1431479915
3381 50.0271026127
3382 53.4135960285
3383 52.4915826146
3384 50.0071717963
3385 49.0869090967
3386 30.4179650705
3387 30.781919694
3388 50.1478691113
3389 48.9209381662
3390 57.1527729705
3391 57.3750924534
3392 38.0471144064
3393 38.8161935837
3394 49.9686395059
3395 51.2258123731
3396 45.1433857583
3397 46.0539458631
3398 42.6318507123
3399 42.4310288235
3400 39.8276190801
3401 40.8901895196
3402 53.4472329482
3403 54.2962868932
3404 49.8518814175
3405 49.3046023525
3406 58.1024475927
3407 56.9676557999
3408 49.8650532213
3409 48.8498129434
3410 48.5867297953
3411 50.905106579
3412 45.3674174661
3413 45.6796221529
3414 42.6379758687
3415 43.2087870828
3416 41.5868945449
3417 42.1634516304
3418 55.4380341119
3419 55.5740180307
3420 51.330294097
3421 51.3482268622
3422 25.5575553432
3423 25.7646211991
3424 77.083517849

3874 37.018344104
3875 36.2372672898
3876 36.2108255441
3877 36.7656822905
3878 49.68633764
3879 51.3335011294
3880 40.6519545097
3881 40.1553512285
3882 52.6479300271
3883 51.8590716587
3884 43.7906639415
3885 43.277084333
3886 54.2628870342
3887 52.2340317574
3888 42.135337579
3889 39.9148387297
3890 45.6078677696
3891 47.2183020774
3892 36.1157692195
3893 35.3261128114
3894 55.3463236841
3895 56.8988549142
3896 46.3877483494
3897 45.8645252455
3898 43.1816197172
3899 44.6782015487
3900 51.4206482915
3901 52.3393124393
3902 67.4397283669
3903 66.4026834621
3904 47.6842408697
3905 44.8623813806
3906 51.1289932605
3907 49.1776788079
3908 46.5476277435
3909 46.3674884571
3910 48.4310421073
3911 50.6255279169
3912 44.0310520536
3913 44.5301409348
3914 45.7218768844
3915 44.7844170866
3916 60.1414773136
3917 58.9758866055
3918 52.7827933728
3919 54.9383049589
3920 40.7366332003
3921 42.467070688
3922 46.063742567
3923 45.0112178189
3924 36.0981113653
3925 35.0815275629
3926 53.1390006148


In [21]:
avg_totals = []
count = 0
for x in np.arange(len(totals)/2):
    total = (float(totals[int(x*2)]) + float(totals[int((x*2)+1)]))/2
#     print(total)
    avg_totals.append(total)
    avg_totals.append(total)
nfl['pred_total'] = avg_totals

In [22]:
nfl[['total_points', "pred_total"]]

Unnamed: 0,total_points,pred_total
0,23,44.184014
1,23,44.184014
2,26,33.796857
3,26,33.796857
4,19,32.984023
5,19,32.984023
6,54,36.192411
7,54,36.192411
8,26,35.144046
9,26,35.144046


In [23]:
residuals = []
for key, row in nfl.iterrows():
    residual = row['pred_total'] - row['total_points']
    residuals.append(residual)
diffs = []
for r in np.arange(len(residuals)/2):
    diff = residuals[int(r*2)]
    diffs.append(diff)
# print(diffs)
print(np.average(diffs))

0.330337270561


In [24]:
nfl.head()

Unnamed: 0,third_per,third_per_allowed,TOP,date,first_downs,first_downs_allowed,ha,margin,opp,pass_yards,...,sacked,sacks,takeaways,team,total_points,total_yards,total_yards_allowed,turnovers,pred_margin,pred_total
0,0.307692,0.285714,28.35,9/10/2009,18,19,away,-3,PIT,234,...,1,4,3,TEN,23,320,357,2,-3.18581,44.184014
1,0.285714,0.307692,36.183333,9/10/2009,19,18,home,3,TEN,321,...,4,1,2,PIT,23,357,320,3,3.18581,44.184014
2,0.363636,0.4,29.116667,9/13/2009,16,19,away,-12,ATL,163,...,4,2,0,MIA,26,259,281,4,3.219538,33.796857
3,0.4,0.363636,30.883333,9/13/2009,19,16,home,12,MIA,213,...,2,4,4,ATL,26,281,259,0,-3.219538,33.796857
4,0.25,0.333333,26.55,9/13/2009,10,16,away,5,CIN,227,...,3,3,2,DEN,19,302,307,0,7.757097,32.984023


In [25]:
nfl['pred_pf'] = ""
nfl['pred_pa'] = ""
pred_pfs = []
pred_pas = []

for key, row in nfl.iterrows():
    pred_margin = row['pred_margin']
    pred_total = row['pred_total']

    a = np.array([[1,1], [1,-1]])
    b = np.array([[pred_total], [pred_margin]])

    points = np.linalg.solve(a,b)
    pf = (points[0][0])
    pa = (points[1][0])
    pred_pfs.append(pf)
    pred_pas.append(pa)
nfl['pred_pf'] = pred_pfs
nfl['pred_pa'] = pred_pas
#     print (f'{pf} - {pa}')

In [26]:
nfl.head()

Unnamed: 0,third_per,third_per_allowed,TOP,date,first_downs,first_downs_allowed,ha,margin,opp,pass_yards,...,takeaways,team,total_points,total_yards,total_yards_allowed,turnovers,pred_margin,pred_total,pred_pf,pred_pa
0,0.307692,0.285714,28.35,9/10/2009,18,19,away,-3,PIT,234,...,3,TEN,23,320,357,2,-3.18581,44.184014,20.499102,23.684912
1,0.285714,0.307692,36.183333,9/10/2009,19,18,home,3,TEN,321,...,2,PIT,23,357,320,3,3.18581,44.184014,23.684912,20.499102
2,0.363636,0.4,29.116667,9/13/2009,16,19,away,-12,ATL,163,...,0,MIA,26,259,281,4,3.219538,33.796857,18.508198,15.288659
3,0.4,0.363636,30.883333,9/13/2009,19,16,home,12,MIA,213,...,4,ATL,26,281,259,0,-3.219538,33.796857,15.288659,18.508198
4,0.25,0.333333,26.55,9/13/2009,10,16,away,5,CIN,227,...,2,DEN,19,302,307,0,7.757097,32.984023,20.37056,12.613463


In [27]:
under = 0
for key, row in nfl.iterrows():
    if row.total_points <= row.pred_total:
        under = under + 1
over = len(nfl) - under
under_percent = float(under/len(nfl))
over_percent = float(over/len(nfl))
print (under_percent, over_percent)

0.5287141073657927 0.4712858926342072


In [28]:
points_df = nfl[['team', 'points', 'pred_pf', 'opp', 'points_allowed', 'pred_pa']]
points_df

Unnamed: 0,team,points,pred_pf,opp,points_allowed,pred_pa
0,TEN,10,20.499102,PIT,13,23.684912
1,PIT,13,23.684912,TEN,10,20.499102
2,MIA,7,18.508198,ATL,19,15.288659
3,ATL,19,15.288659,MIA,7,18.508198
4,DEN,12,20.370560,CIN,7,12.613463
5,CIN,7,12.613463,DEN,12,20.370560
6,MIN,34,17.398669,CLE,20,18.793741
7,CLE,20,18.793741,MIN,34,17.398669
8,JAX,12,14.693573,IND,14,20.450473
9,IND,14,20.450473,JAX,12,14.693573


In [29]:
total_res.save("models/total_res.pickle")

In [30]:
nfl.to_csv("data/nfl_prediction.csv")