In [1]:
import pandas as pd
np=pd.np
from sdd_api.api import Api
from credentials import *
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.max_columns=None
api = Api(username=username, password=password, client_id=client_id, client_secret=client_secret)

In [32]:
matchups=api.get_dataframe("matchups",season_start=2000)

### Lets Predict the Game Winner  
We'll use data from 2011 season and on since we have line information for these games

In [3]:
matchups=matchups[(matchups['game_type'].isin(['Regular','Playoffs']))]
matchups.head(2)

Unnamed: 0,season,home_name,away_name,full_game_type,away_first_downs,away_pass_yds,away_pts,away_rush_yds,away_to,away_yards,game_datetime,game_location,game_outcome,game_time,game_type,had_overtime,home_first_downs,home_pass_yds,home_pts,home_rush_yds,home_to,home_yards,teams,week_num,away_conference,conference_play,home_line,over_under,handicap_difference,home_conference,line_moves,matchup_id
0,2000,KC,IND,Week 1,20.0,267.0,27.0,119.0,2.0,386.0,967986000000,,L,1:00PM ET,Regular,False,17.0,206.0,14.0,74.0,1.0,280.0,"IND,KC",1.0,,,,,,,,20000903IND@KC
1,2000,MIN,CHI,Week 1,23.0,272.0,27.0,153.0,1.0,425.0,967986000000,,W,1:00PM ET,Regular,False,17.0,188.0,30.0,186.0,1.0,374.0,"CHI,MIN",1.0,,,,,,,,20000903CHI@MIN


In [4]:
matchups.describe()

Unnamed: 0,season,away_first_downs,away_pass_yds,away_pts,away_rush_yds,away_to,away_yards,game_datetime,home_first_downs,home_pass_yds,home_pts,home_rush_yds,home_to,home_yards,week_num,home_line,over_under,handicap_difference,line_moves
count,4779.0,4523.0,4523.0,4779.0,4523.0,4523.0,4523.0,4779.0,4523.0,4523.0,4779.0,4523.0,4523.0,4523.0,4779.0,1855.0,1855.0,1855.0,1599.0
mean,2008.507219,18.535043,216.562016,19.420381,109.54057,1.702852,326.102587,1242153000000.0,19.56887,223.035596,21.861268,118.055936,1.638293,341.091532,8.830927,-1.979245,38.39434,0.406469,20.479049
std,5.166514,5.039148,78.708184,10.845817,50.873036,1.380235,86.325947,163066800000.0,4.970397,77.163344,11.358121,52.451142,1.367165,83.847646,5.084415,5.265136,16.658077,0.985062,19.106006
min,2000.0,2.0,-9.0,0.0,-18.0,0.0,26.0,967986000000.0,3.0,6.0,0.0,-3.0,0.0,77.0,1.0,-26.5,0.0,0.0,0.0
25%,2004.0,15.0,160.5,13.0,72.0,1.0,267.0,1100509000000.0,16.0,169.0,14.0,81.0,1.0,283.0,4.0,-6.0,40.5,0.0,14.0
50%,2009.0,19.0,213.0,20.0,102.0,1.0,326.0,1252847000000.0,19.0,218.0,22.0,112.0,1.0,340.0,9.0,-2.5,44.0,0.0,19.0
75%,2013.0,22.0,269.0,27.0,139.0,3.0,386.0,1383495000000.0,23.0,272.0,30.0,148.0,2.0,396.0,13.0,1.5,47.5,0.0,25.0
max,2017.0,37.0,516.0,59.0,351.0,8.0,626.0,1514738000000.0,40.0,522.0,62.0,407.0,8.0,653.0,17.0,16.0,60.0,9.0,673.0


### We will use 538's Elo Algorithm
- [538's Elo Introduction](https://fivethirtyeight.com/datalab/introducing-nfl-elo-ratings/)
- [More Elo Description](https://fivethirtyeight.com/datalab/nfl-elo-ratings-are-back/)  

### The features for this model are:
- game location (home, away, neutral)
- team score
- opponent score

In [5]:
matchups['is_neutral']=matchups['game_location'].apply(lambda x: True if x=="N" else False)

In [6]:
from collections import defaultdict
def silverK(MOV, elo_diff):
    K_0=20
    multiplier=np.log(abs(MOV)+1)*(2.2/((elo_diff)*.001+2.2))
    return K_0*multiplier,K_0*multiplier

def silverS(home_score, away_score):
    S_home,S_away=0,0
    if home_score>away_score:
        S_home=1
    elif away_score>home_score:
        S_away=1
    else:
        S_home,S_away=.5,.5
    return S_home,S_away

def silver_elo_update(home_score, away_score, home_rating, away_rating, isNeutral):
    HOME_AD=65.
    if not isNeutral:
        pass#home_rating+=HOME_AD
    E_home = elo_prediction(home_rating,away_rating)
    E_away=1-E_home
    elo_diff=home_rating-away_rating
    MOV=home_score-away_score
    
    S_home,S_away = silverS(home_score,away_score)
    if home_score>=away_score:
        elo_winner=home_rating
        elo_loser=away_rating
    else:
        elo_loser=home_rating
        elo_winner=away_rating

    K_home,K_away =  silverK(MOV,elo_winner-elo_loser)
        
    return K_home*(S_home-E_home),K_away*(S_away-E_away)

def elo_prediction(home_rating,away_rating):
    E_home = 1./(1 + 10 ** ((away_rating - home_rating) / (400.)))
    return E_home

def score_prediction(home_rating,away_rating):
    return (home_rating-away_rating)/25.
class HeadToHeadModel(object):
    def __init__(self, events, update_function, prediction_function=None):
        self.update_function=update_function
        self.events=events
        self.ratings=defaultdict(lambda: 1505)
        self.prediction_function = prediction_function
        self.predictions = []
        self.curr_season=defaultdict(lambda: self.events[0][1]['season'])

    def compute_elo_ratings(self):
        for idx, event in self.events:
            new_year=event['season']
            label_i=event['home_name']
            label_j=event['away_name']
            
            if self.curr_season[label_i]!=new_year:
                self.curr_season[label_i]=new_year
                self.ratings[label_i]=self.ratings[label_i]*1/3+1505.*2/3
            elif self.curr_season[label_j]!=new_year:
                self.curr_season[label_j]=new_year
                self.ratings[label_j]=self.ratings[label_j]*.75+1505.*.25
            #todo change below to just use event
            update=self.update_function(event['home_pts'],event['away_pts'], self.ratings[label_i], self.ratings[label_j], event['is_neutral'])
            self.ratings[label_i]+=update[0]
            self.ratings[label_j]+=update[1]
            

    def power_rankings(self):
        from operator import itemgetter
        #sort dictionary by value to get ascending list of teams
        power_rankings = sorted(self.ratings.items(), key=itemgetter(1), reverse=True)
        power = []
        #Make the 0-th team 1st
        for i, x in enumerate(power_rankings):
            power.append((i + 1, x))
        return power
m=HeadToHeadModel(list(matchups.iterrows()), silver_elo_update, elo_prediction)
m.compute_elo_ratings()
m.power_rankings()

[(1, ('NE', 1590.3767258914979)),
 (2, ('ATL', 1561.3228364151155)),
 (3, ('KC', 1551.0735039890869)),
 (4, ('PIT', 1544.3732268607396)),
 (5, ('GB', 1542.5398821074568)),
 (6, ('DAL', 1541.3758414935351)),
 (7, ('SEA', 1528.2317873653121)),
 (8, ('DEN', 1523.714812469063)),
 (9, ('OAK', 1516.9882377163781)),
 (10, ('NYG', 1516.8975545911578)),
 (11, ('PHI', 1511.2520281254072)),
 (12, ('ARI', 1510.8387054962996)),
 (13, ('CIN', 1509.5769701572649)),
 (14, ('IND', 1509.5495690080013)),
 (15, ('MIA', 1509.2074284132491)),
 (16, ('CAR', 1509.1019677386032)),
 (17, ('WAS', 1508.878368529648)),
 (18, ('TB', 1506.0411867416799)),
 (19, ('NO', 1504.7564943094728)),
 (20, ('HOU', 1502.9392769429605)),
 (21, ('BAL', 1500.6694275182024)),
 (22, ('MIN', 1500.6664400019449)),
 (23, ('DET', 1497.8563631076895)),
 (24, ('TEN', 1496.731848276645)),
 (25, ('BUF', 1493.7136084033937)),
 (26, ('NYJ', 1480.5224654593383)),
 (27, ('LAC', 1478.9229605089768)),
 (28, ('JAC', 1458.4983590104241)),
 (29, ('L

### What Good are Rankings without Testing?
Elo is a robust algorithm for strength of schedule based ratings. But what good are rankings without testing them?!?. Let's test our rankings at picking the winners and against the spread.

In [7]:
#Let's redo our elo model with the addition of elo ranking output
class HeadToHeadModel(object):
    def __init__(self, events, update_function, prediction_function=None):
        self.update_function=update_function
        self.events=events
        self.ratings=defaultdict(lambda: 1505)
        self.prediction_function = prediction_function
        self.predictions = []
        self.curr_season=defaultdict(lambda: self.events[0][1]['season'])
        self.elos=[]
    def compute_elo_ratings(self):
        for idx, event in self.events:
            new_year=event['season']
            label_i=event['home_name']
            label_j=event['away_name']
            
            if self.curr_season[label_i]!=new_year:
                self.curr_season[label_i]=new_year
                self.ratings[label_i]=self.ratings[label_i]*1/3+1505.*2/3
            elif self.curr_season[label_j]!=new_year:
                self.curr_season[label_j]=new_year
                self.ratings[label_j]=self.ratings[label_j]*.75+1505.*.25
            #todo change below to just use event
            update=self.update_function(event['home_pts'],event['away_pts'], self.ratings[label_i], self.ratings[label_j], event['is_neutral'])
            self.elos.append({
                "home_elo":self.ratings[label_i],
                "away_elo":self.ratings[label_j],
                "index": idx,
                
                             })
            self.ratings[label_i]+=update[0]
            self.ratings[label_j]+=update[1]
            

            

    def power_rankings(self):
        from operator import itemgetter
        #sort dictionary by value to get ascending list of teams
        power_rankings = sorted(self.ratings.items(), key=itemgetter(1), reverse=True)
        power = []
        #Make the 0-th team 1st
        for i, x in enumerate(power_rankings):
            power.append((i + 1, x))
        return power
m=HeadToHeadModel(list(matchups.iterrows()), silver_elo_update, elo_prediction)
m.compute_elo_ratings()
m.power_rankings()

[(1, ('NE', 1590.3767258914979)),
 (2, ('ATL', 1561.3228364151155)),
 (3, ('KC', 1551.0735039890869)),
 (4, ('PIT', 1544.3732268607396)),
 (5, ('GB', 1542.5398821074568)),
 (6, ('DAL', 1541.3758414935351)),
 (7, ('SEA', 1528.2317873653121)),
 (8, ('DEN', 1523.714812469063)),
 (9, ('OAK', 1516.9882377163781)),
 (10, ('NYG', 1516.8975545911578)),
 (11, ('PHI', 1511.2520281254072)),
 (12, ('ARI', 1510.8387054962996)),
 (13, ('CIN', 1509.5769701572649)),
 (14, ('IND', 1509.5495690080013)),
 (15, ('MIA', 1509.2074284132491)),
 (16, ('CAR', 1509.1019677386032)),
 (17, ('WAS', 1508.878368529648)),
 (18, ('TB', 1506.0411867416799)),
 (19, ('NO', 1504.7564943094728)),
 (20, ('HOU', 1502.9392769429605)),
 (21, ('BAL', 1500.6694275182024)),
 (22, ('MIN', 1500.6664400019449)),
 (23, ('DET', 1497.8563631076895)),
 (24, ('TEN', 1496.731848276645)),
 (25, ('BUF', 1493.7136084033937)),
 (26, ('NYJ', 1480.5224654593383)),
 (27, ('LAC', 1478.9229605089768)),
 (28, ('JAC', 1458.4983590104241)),
 (29, ('L

In [8]:
elo=pd.DataFrame(m.elos).set_index("index").join(matchups)
elo.head(2)

Unnamed: 0_level_0,away_elo,home_elo,season,home_name,away_name,full_game_type,away_first_downs,away_pass_yds,away_pts,away_rush_yds,away_to,away_yards,game_datetime,game_location,game_outcome,game_time,game_type,had_overtime,home_first_downs,home_pass_yds,home_pts,home_rush_yds,home_to,home_yards,teams,week_num,away_conference,conference_play,home_line,over_under,handicap_difference,home_conference,line_moves,matchup_id,is_neutral
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
0,1505.0,1505.0,2000,KC,IND,Week 1,20.0,267.0,27.0,119.0,2.0,386.0,967986000000,,L,1:00PM ET,Regular,False,17.0,206.0,14.0,74.0,1.0,280.0,"IND,KC",1.0,,,,,,,,20000903IND@KC,False
1,1505.0,1505.0,2000,MIN,CHI,Week 1,23.0,272.0,27.0,153.0,1.0,425.0,967986000000,,W,1:00PM ET,Regular,False,17.0,188.0,30.0,186.0,1.0,374.0,"CHI,MIN",1.0,,,,,,,,20000903CHI@MIN,False


In [9]:
elo.describe()

Unnamed: 0,away_elo,home_elo,season,away_first_downs,away_pass_yds,away_pts,away_rush_yds,away_to,away_yards,game_datetime,home_first_downs,home_pass_yds,home_pts,home_rush_yds,home_to,home_yards,week_num,home_line,over_under,handicap_difference,line_moves
count,4779.0,4779.0,4779.0,4523.0,4523.0,4779.0,4523.0,4523.0,4523.0,4779.0,4523.0,4523.0,4779.0,4523.0,4523.0,4523.0,4779.0,1855.0,1855.0,1855.0,1599.0
mean,1513.313253,1510.496936,2008.507219,18.535043,216.562016,19.420381,109.54057,1.702852,326.102587,1242153000000.0,19.56887,223.035596,21.861268,118.055936,1.638293,341.091532,8.830927,-1.979245,38.39434,0.406469,20.479049
std,79.673425,77.425703,5.166514,5.039148,78.708184,10.845817,50.873036,1.380235,86.325947,163066800000.0,4.970397,77.163344,11.358121,52.451142,1.367165,83.847646,5.084415,5.265136,16.658077,0.985062,19.106006
min,1265.552531,1273.134241,2000.0,2.0,-9.0,0.0,-18.0,0.0,26.0,967986000000.0,3.0,6.0,0.0,-3.0,0.0,77.0,1.0,-26.5,0.0,0.0,0.0
25%,1460.011054,1460.855212,2004.0,15.0,160.5,13.0,72.0,1.0,267.0,1100509000000.0,16.0,169.0,14.0,81.0,1.0,283.0,4.0,-6.0,40.5,0.0,14.0
50%,1511.966954,1509.207428,2009.0,19.0,213.0,20.0,102.0,1.0,326.0,1252847000000.0,19.0,218.0,22.0,112.0,1.0,340.0,9.0,-2.5,44.0,0.0,19.0
75%,1566.15684,1558.274277,2013.0,22.0,269.0,27.0,139.0,3.0,386.0,1383495000000.0,23.0,272.0,30.0,148.0,2.0,396.0,13.0,1.5,47.5,0.0,25.0
max,1784.028956,1812.2688,2017.0,37.0,516.0,59.0,351.0,8.0,626.0,1514738000000.0,40.0,522.0,62.0,407.0,8.0,653.0,17.0,16.0,60.0,9.0,673.0


### Prediction and Testing
Since we are seeding our rankings starting with the 2011 season it would be unfair to our model to expect to it be right in the beginning. For our purposes, we'll look at performance during the whole season and during weeks 4-15 to allow rating to settle and not lose when a team rests their players in weeks 16 and 17

In [10]:
def predict_home_margin(row):
    #how many points the home team is expected to win bye
    return score_prediction(row['home_elo']+65, row['away_elo'])
elo['predicted_home_margin']=elo.apply(predict_home_margin,axis=1)

In [11]:
elo.head(1)

Unnamed: 0_level_0,away_elo,home_elo,season,home_name,away_name,full_game_type,away_first_downs,away_pass_yds,away_pts,away_rush_yds,away_to,away_yards,game_datetime,game_location,game_outcome,game_time,game_type,had_overtime,home_first_downs,home_pass_yds,home_pts,home_rush_yds,home_to,home_yards,teams,week_num,away_conference,conference_play,home_line,over_under,handicap_difference,home_conference,line_moves,matchup_id,is_neutral,predicted_home_margin
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
0,1505.0,1505.0,2000,KC,IND,Week 1,20.0,267.0,27.0,119.0,2.0,386.0,967986000000,,L,1:00PM ET,Regular,False,17.0,206.0,14.0,74.0,1.0,280.0,"IND,KC",1.0,,,,,,,,20000903IND@KC,False,2.6


In [12]:
#1 is home winning
elo['predicted_winner']=elo.apply(lambda row: 1 if row['predicted_home_margin']>=0 else 0, axis=1)


In [13]:
elo['vegas_predicted_winner']=elo.apply(lambda row: 0 if row['home_line']>0 else 1, axis=1)#line assigns negative value
elo['winner']=elo.apply(lambda row: 1 if row['home_pts']>row['away_pts'] else 0, axis=1)#does not account for ties

Since this is a binary decision problem, either win or lose our bet, we will use [confusion matrices](https://en.wikipedia.org/wiki/Confusion_matrix) to judge the performance of our model

In [16]:
from sklearn.metrics import confusion_matrix
def print_confusion_matrix(y_true, y_pred):
    conf_matrix=confusion_matrix(y_true, y_pred)
    success_rate=np.trace(conf_matrix)/np.sum(conf_matrix)
    print(success_rate)
    print(conf_matrix)
test_period=elo[(elo['week_num'].between(4,15,inclusive=True))&(elo['game_type']=='Regular')&(elo['season'].between(2012,2016))]

In [17]:
y_true='winner'
y_pred='predicted_winner'
print_confusion_matrix(test_period['winner'],test_period['predicted_winner'])

0.644318181818
[[168 223]
 [ 90 399]]


In [18]:
y_true='winner'
y_pred='vegas_predicted_winner'
print_confusion_matrix(test_period['winner'],test_period[y_pred])

0.659090909091
[[200 191]
 [109 380]]


### Vegas is pretty good huh
We were good at predicting winners but vegas is slightly better... The highest we've gotten with a pure elo approach using more years of training is 67%. Let's look at our against the spread performance and see if we can find an edge.

In [20]:
elo.head(1)

Unnamed: 0_level_0,away_elo,home_elo,season,home_name,away_name,full_game_type,away_first_downs,away_pass_yds,away_pts,away_rush_yds,away_to,away_yards,game_datetime,game_location,game_outcome,game_time,game_type,had_overtime,home_first_downs,home_pass_yds,home_pts,home_rush_yds,home_to,home_yards,teams,week_num,away_conference,conference_play,home_line,over_under,handicap_difference,home_conference,line_moves,matchup_id,is_neutral,predicted_home_margin,predicted_winner,vegas_predicted_winner,winner
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
0,1505.0,1505.0,2000,KC,IND,Week 1,20.0,267.0,27.0,119.0,2.0,386.0,967986000000,,L,1:00PM ET,Regular,False,17.0,206.0,14.0,74.0,1.0,280.0,"IND,KC",1.0,,,,,,,,20000903IND@KC,False,2.6,1,1,0


In [21]:
elo['home_margin']=elo.apply(lambda x: x['home_pts']-x['away_pts'], axis=1)
elo['home_bet']=elo.apply(lambda x: (x['predicted_home_margin']+x['home_line'])<0,axis=1)
elo['home_covers']=elo.apply(lambda x: (x['home_margin']+x['home_line'])>0,axis=1)
test_period=elo[(elo['week_num'].between(4,15,inclusive=True))&(elo['game_type']=='Regular')&(elo['season'].between(2012,2016))]
y_true='home_covers'
y_pred='home_bet'
print_confusion_matrix(test_period[y_true],test_period[y_pred])

0.501136363636
[[248 224]
 [215 193]]


That won't win for us. Under standard wagering strategies you need to be better than 52.4%

### Performance each season during test period

In [22]:
elo['predicted_winner_right']=elo.apply(lambda x: 1 if x['winner']==x['predicted_winner'] else 0, axis=1)
elo['vegas_favored_wins']=elo.apply(lambda x: 1 if x['winner']==x['vegas_predicted_winner'] else 0, axis=1)
elo['ats_right']=elo.apply(lambda x: 1 if x['home_covers']==x['home_bet'] else 0, axis=1)
test_period=elo[(elo['week_num'].between(4,15,inclusive=True))&(elo['game_type']=='Regular')&(elo['season'].between(2012,2016))]


In [23]:
test_period[['predicted_winner_right','vegas_favored_wins','ats_right','season']].groupby("season").agg([np.mean, np.var])

Unnamed: 0_level_0,predicted_winner_right,predicted_winner_right,vegas_favored_wins,vegas_favored_wins,ats_right,ats_right
Unnamed: 0_level_1,mean,var,mean,var,mean,var
season,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2012,0.607955,0.239708,0.647727,0.229481,0.545455,0.249351
2013,0.636364,0.232727,0.676136,0.220227,0.539773,0.249838
2014,0.681818,0.218182,0.681818,0.218182,0.482955,0.251136
2015,0.659091,0.225974,0.647727,0.229481,0.4375,0.2475
2016,0.636364,0.232727,0.642045,0.231136,0.5,0.251429


### Results
ATS is chancey but performance against vegas for winners is pretty close.

In [25]:
# for all years in our sample we do about just as good as vegas but no better
test_period[['predicted_winner_right','vegas_favored_wins','ats_right']].groupby(lambda x: 0).agg([np.mean, np.var])

Unnamed: 0_level_0,predicted_winner_right,predicted_winner_right,vegas_favored_wins,vegas_favored_wins,ats_right,ats_right
Unnamed: 0_level_1,mean,var,mean,var,mean,var
0,0.644318,0.229433,0.659091,0.224946,0.501136,0.250283


### Next Steps
So now you've seen how to build a strength of schedule (SOS) model. You might think you can make it better, and you probably can, but it would be a better use of your team to create an orthogonal model not based on SOS. Something like how a team plays:  
- vertical "Air Coryell" offense
- 3-4 defense
And if they are a great running team could complement SOS models very well.  
  
Check out our team_season_log table and our team_game_logs to get the data you need to build it.

In [34]:
team_season_log = api.get_dataframe("team_season_log")
team_season_log.head()

Unnamed: 0,coaches,league_name,losses,playoff_result,points,points_diff,points_opp,rank_def_pts,rank_def_yds,rank_off_pts,rank_off_yds,rank_points_diff,rank_takeaway_giveaway,rank_yds_diff,historical_team_name,team_name,teams_in_league,ties,wins,season,defensive_coordinator,offensive_coordinator,offensive_scheme,base_defense,stadium
0,Arians,NFL,8,,418,56,362,14,2,6,9,7,17,1,Arizona Cardinals,ARI,32,1,7,2016,James Bettcher,Harold Goodwin,Air Coryell,3-4,University of Phoenix
1,Quinn,NFL,5,Lost SB,540,134,406,27,25,1,2,2,4,4,Atlanta Falcons,ATL,32,0,11,2016,Richard Smith,Kyle Shanahan,West Coast,4-3,Georgia Dome
2,Harbaugh,NFL,8,,343,22,321,9,7,21,17,13,9,8,Baltimore Ravens,BAL,32,0,8,2016,Dean Pees,"Marty Mornhinweg, Marc Trestman",West Coast,3-4,M&T; Bank
3,"Ryan,Lynn",NFL,9,,399,21,378,16,19,10,16,14,7,20,Buffalo Bills,BUF,32,0,7,2016,Dennis Thurman,"Anthony Lynn, Greg Roman",West Coast,3-4,New Era Field
4,Rivera,NFL,10,,369,-33,402,26,21,15,19,25,21,25,Carolina Panthers,CAR,32,0,6,2016,Sean McDermott,Mike Shula,Erhardt-Perkins,4-3,Bank of America


In [37]:
team_game_logs = api.get_dataframe("team_game_logs")
team_game_logs.head()

Unnamed: 0,season,team_name,full_game_type,opp_first_downs,team_first_downs,game_location,game_outcome,game_time,opp_pass_yds,team_pass_yds,opp_pts,team_pts,opp_rush_yds,team_rush_yds,opp_to,team_to,opp_yards,team_yards,game_type,week_num,opp_name,had_overtime,game_datetime
0,2016,DET,Wild Card,24.0,13.0,@,L,8:15PM ET,210.0,182.0,26.0,6.0,177.0,49.0,0.0,0.0,387.0,231.0,Playoffs,1,SEA,False,2017-01-07 20:15:00
1,2016,GB,Wild Card,15.0,23.0,,W,4:40PM ET,295.0,331.0,13.0,38.0,70.0,75.0,2.0,0.0,365.0,406.0,Playoffs,1,NYG,False,2017-01-08 16:40:00
2,2016,HOU,Wild Card,16.0,19.0,,W,4:35PM ET,139.0,168.0,14.0,27.0,64.0,123.0,3.0,0.0,203.0,291.0,Playoffs,1,OAK,False,2017-01-07 16:35:00
3,2016,MIA,Wild Card,19.0,18.0,@,L,1:05PM ET,188.0,253.0,30.0,12.0,179.0,52.0,2.0,3.0,367.0,305.0,Playoffs,1,PIT,False,2017-01-08 13:05:00
4,2016,NYG,Wild Card,23.0,15.0,@,L,4:40PM ET,331.0,295.0,38.0,13.0,75.0,70.0,0.0,2.0,406.0,365.0,Playoffs,1,GB,False,2017-01-08 16:40:00
