In [1]:
import numpy as np
import pandas as pd
import requests

In [2]:
response = requests.get(
    "https://api.collegefootballdata.com/games",
    params={"year": 2019, "seasonType": "both"}
)

data = pd.read_json(response.text)
data.head()

Unnamed: 0,id,season,week,season_type,start_date,neutral_site,conference_game,attendance,venue_id,venue,home_team,home_conference,home_points,home_line_scores,home_post_win_prob,away_team,away_conference,away_points,away_line_scores,away_post_win_prob
0,401110723,2019,1,regular,2019-08-24T23:00:00.000Z,True,False,,4013,Camping World Stadium,Florida,SEC,24.0,"[7, 0, 10, 7]",0.905953,Miami,ACC,20.0,"[3, 10, 0, 7]",0.094047
1,401114164,2019,1,regular,2019-08-25T02:30:00.000Z,False,False,,3610,Aloha Stadium,Hawai'i,Mountain West,45.0,"[14, 14, 7, 10]",0.68863,Arizona,Pac-12,38.0,"[0, 21, 14, 3]",0.31137
2,401119254,2019,1,regular,2019-08-29T23:00:00.000Z,False,False,,3700,Doyt Perry Stadium,Bowling Green,Mid-American,46.0,"[13, 17, 7, 9]",0.999979,Morgan State,,3.0,"[0, 3, 0, 0]",2.1e-05
3,401117854,2019,1,regular,2019-08-29T23:00:00.000Z,False,False,,3854,Nippert Stadium,Cincinnati,American Athletic,24.0,"[7, 3, 7, 7]",0.996829,UCLA,Pac-12,14.0,"[0, 7, 7, 0]",0.003171
4,401119255,2019,1,regular,2019-08-29T23:00:00.000Z,False,False,,3965,UB Stadium,Buffalo,Mid-American,38.0,"[21, 7, 10, 0]",0.999788,Robert Morris,,10.0,"[7, 3, 0, 0]",0.000212


In [3]:
data = data[
    (pd.notna(data['home_conference'])) # games with a non-FBS home team
    & (pd.notna(data['away_conference'])) # games with a non-FBS away team
]

In [4]:
data = data[
    (data['home_points'] == data['home_points']) # filtering out future games
    & (data['away_points'] == data['away_points'])
    & (pd.notna(data['home_conference'])) # games with a non-FBS home team
    & (pd.notna(data['away_conference'])) # games with a non-FBS away team
]

In [5]:
data['home_spread'] = np.where(data['neutral_site'] == True, data['home_points'] - data['away_points'], (data['home_points'] - data['away_points'] - 2.5))
data['away_spread'] = -data['home_spread']
data.head()

Unnamed: 0,id,season,week,season_type,start_date,neutral_site,conference_game,attendance,venue_id,venue,...,home_points,home_line_scores,home_post_win_prob,away_team,away_conference,away_points,away_line_scores,away_post_win_prob,home_spread,away_spread
0,401110723,2019,1,regular,2019-08-24T23:00:00.000Z,True,False,,4013,Camping World Stadium,...,24.0,"[7, 0, 10, 7]",0.905953,Miami,ACC,20.0,"[3, 10, 0, 7]",0.094047,4.0,-4.0
1,401114164,2019,1,regular,2019-08-25T02:30:00.000Z,False,False,,3610,Aloha Stadium,...,45.0,"[14, 14, 7, 10]",0.68863,Arizona,Pac-12,38.0,"[0, 21, 14, 3]",0.31137,4.5,-4.5
3,401117854,2019,1,regular,2019-08-29T23:00:00.000Z,False,False,,3854,Nippert Stadium,...,24.0,"[7, 3, 7, 7]",0.996829,UCLA,Pac-12,14.0,"[0, 7, 7, 0]",0.003171,7.5,-7.5
10,401111653,2019,1,regular,2019-08-30T00:00:00.000Z,False,True,,3836,Memorial Stadium,...,52.0,"[14, 14, 14, 10]",0.999976,Georgia Tech,ACC,14.0,"[0, 0, 7, 7]",2.4e-05,35.5,-35.5
11,401114236,2019,1,regular,2019-08-30T00:00:00.000Z,False,False,,4729,Benson Field at Yulman Stadium,...,42.0,"[7, 21, 14, 0]",0.999668,Florida International,Conference USA,14.0,"[0, 7, 7, 0]",0.000332,25.5,-25.5


In [6]:
teams = pd.concat([
    data[['home_team', 'home_points', 'away_team', 'away_points', 'home_spread']].rename(columns={'home_team': 'team', 'home_points': 'points', 'away_team': 'opponent', 'away_points': 'opp_points', 'home_spread': 'spread'}),
    data[['away_team', 'away_points', 'home_team', 'home_points', 'away_spread']].rename(columns={'away_team': 'team', 'away_points': 'points', 'home_team': 'opponent', 'home_points': 'opp_points', 'away_spread': 'spread'})
])

teams.head()

Unnamed: 0,team,points,opponent,opp_points,spread
0,Florida,24.0,Miami,20.0,4.0
1,Hawai'i,45.0,Arizona,38.0,4.5
3,Cincinnati,24.0,UCLA,14.0,7.5
10,Clemson,52.0,Georgia Tech,14.0,35.5
11,Tulane,42.0,Florida International,14.0,25.5


In [7]:
teams['spread'] = np.where(teams['spread'] > 28, 28, teams['spread']) # cap the upper bound scoring margin at +28 points
teams['spread'] = np.where(teams['spread'] < -28, -28, teams['spread']) # cap the lower bound scoring margin at -28 points
teams.head()

Unnamed: 0,team,points,opponent,opp_points,spread
0,Florida,24.0,Miami,20.0,4.0
1,Hawai'i,45.0,Arizona,38.0,4.5
3,Cincinnati,24.0,UCLA,14.0,7.5
10,Clemson,52.0,Georgia Tech,14.0,28.0
11,Tulane,42.0,Florida International,14.0,25.5


In [8]:
points = teams.groupby('team').points.mean()
points.head()

team
Air Force            32.916667
Akron                10.500000
Alabama              45.666667
Appalachian State    38.538462
Arizona              23.454545
Name: points, dtype: float64

In [9]:
opp_points = teams.groupby('team').opp_points.mean()
opp_points.head()

team
Air Force            20.916667
Akron                36.250000
Alabama              19.916667
Appalachian State    21.000000
Arizona              35.272727
Name: opp_points, dtype: float64

In [10]:
spreads = teams.groupby('team').spread.mean()
spreads.head()

team
Air Force            11.833333
Akron               -21.125000
Alabama              20.333333
Appalachian State    15.307692
Arizona             -11.363636
Name: spread, dtype: float64

In [11]:
# create empty arrays
terms = []
solutions = []

for team in spreads.keys():
    row = []
    # get a list of team opponents
    opps = list(teams[teams['team'] == team]['opponent'])
    
    for opp in spreads.keys():
        if opp == team:
        	# coefficient for the team should be 1
            row.append(1)
        elif opp in opps:
        	# coefficient for opponents should be 1 over the number of opponents
            row.append(-1.0/len(opps))
        else:
        	# teams not faced get a coefficient of 0
            row.append(0)
            
    terms.append(row)
    
    # average game spread on the other side of the equation
    solutions.append(spreads[team])

In [12]:
solutions = np.linalg.solve(terms, solutions)
solutions

array([  1.83152811, -38.99538672,  17.09317512,   2.59362714,
       -14.03554643,  -2.87607636, -14.85549722, -12.46345153,
       -13.89805687,  10.99729137,  -9.17777622, -12.8658184 ,
         4.60687885,   0.48070966, -10.20317539, -31.2534094 ,
        -8.3941979 ,  -4.3775469 , -13.24934   , -17.57644881,
         0.13316888,  16.58388205, -17.89139792, -10.44330409,
       -17.40032253, -27.9047799 ,  -8.8807763 , -22.5308818 ,
       -16.58680614,   9.94968073,  -0.23930534, -18.87627003,
        -7.81604163, -12.27979085,  11.20596687,  -8.53501417,
       -16.85838729, -16.96372389, -10.45136986,  -8.63212612,
        -9.11027461,  -1.8631479 ,   6.40004748,   3.9315234 ,
       -12.74943847,   0.85465581, -13.93984115,  -2.84208967,
        21.27509269, -14.25729279,  -0.8711436 , -15.49807108,
       -10.39369547,  -7.3870365 , -11.97106221, -12.56148814,
         5.15892546,  -6.65042944, -14.50281729,   8.06800547,
        -3.18464261, -14.97531103,   5.35530121,  -4.13

In [13]:
ratings = list(zip( spreads.keys(), solutions ))
srs = pd.DataFrame(ratings, columns=['team', 'rating'])
srs.head()

Unnamed: 0,team,rating
0,Air Force,1.831528
1,Akron,-38.995387
2,Alabama,17.093175
3,Appalachian State,2.593627
4,Arizona,-14.035546


In [14]:
mean = srs.rating.mean()
srs['rating'] = srs['rating'] - mean

In [15]:
rankings = srs.sort_values('rating', ascending=False).reset_index()[['team', 'rating']]
rankings.loc[:24]

Unnamed: 0,team,rating
0,LSU,29.366359
1,Ohio State,28.246723
2,Alabama,25.184442
3,Clemson,24.675149
4,Georgia,19.297234
5,Auburn,19.088558
6,Penn State,18.651872
7,Oregon,18.440887
8,Florida,18.040947
9,Oklahoma,17.737135


In [16]:
rankings

Unnamed: 0,team,rating
0,LSU,29.366359
1,Ohio State,28.246723
2,Alabama,25.184442
3,Clemson,24.675149
4,Georgia,19.297234
...,...,...
125,Connecticut,-19.813513
126,Bowling Green,-23.162143
127,UTEP,-26.881704
128,Akron,-30.904120


In [238]:
rankings.to_csv('./data/srs_2019.csv')