In [1]:
import numpy as np
import pandas as pd
import requests

In [2]:
response = requests.get(
    "https://api.collegefootballdata.com/games",
    params={"year": 2019}
)

data = pd.read_json(response.text)
data.head()

Unnamed: 0,id,season,week,season_type,start_date,neutral_site,conference_game,attendance,venue_id,venue,home_team,home_conference,home_points,home_line_scores,home_post_win_prob,away_team,away_conference,away_points,away_line_scores,away_post_win_prob
0,401110723,2019,1,regular,2019-08-24T23:00:00.000Z,True,False,,4013,Camping World Stadium,Florida,SEC,24.0,"[7, 0, 10, 7]",0.905953,Miami,ACC,20.0,"[3, 10, 0, 7]",0.094047
1,401114164,2019,1,regular,2019-08-25T02:30:00.000Z,False,False,,3610,Aloha Stadium,Hawai'i,Mountain West,45.0,"[14, 14, 7, 10]",0.68863,Arizona,Pac-12,38.0,"[0, 21, 14, 3]",0.31137
2,401119254,2019,1,regular,2019-08-29T23:00:00.000Z,False,False,,3700,Doyt Perry Stadium,Bowling Green,Mid-American,46.0,"[13, 17, 7, 9]",0.999979,Morgan State,,3.0,"[0, 3, 0, 0]",2.1e-05
3,401119255,2019,1,regular,2019-08-29T23:00:00.000Z,False,False,,3965,UB Stadium,Buffalo,Mid-American,38.0,"[21, 7, 10, 0]",0.999788,Robert Morris,,10.0,"[7, 3, 0, 0]",0.000212
4,401117854,2019,1,regular,2019-08-29T23:00:00.000Z,False,False,,3854,Nippert Stadium,Cincinnati,American Athletic,24.0,"[7, 3, 7, 7]",0.996829,UCLA,Pac-12,14.0,"[0, 7, 7, 0]",0.003171


In [3]:
data = data[
    (data['home_points'] == data['home_points']) # filtering out future games
    & (data['away_points'] == data['away_points'])
    & (pd.notna(data['home_conference'])) # games with a non-FBS home team
    & (pd.notna(data['away_conference'])) # games with a non-FBS away team
]

In [4]:
data['home_spread'] = np.where(data['neutral_site'] == True, data['home_points'] - data['away_points'], (data['home_points'] - data['away_points'] - 2.5))
data['away_spread'] = -data['home_spread']
data.head()

Unnamed: 0,id,season,week,season_type,start_date,neutral_site,conference_game,attendance,venue_id,venue,...,home_points,home_line_scores,home_post_win_prob,away_team,away_conference,away_points,away_line_scores,away_post_win_prob,home_spread,away_spread
0,401110723,2019,1,regular,2019-08-24T23:00:00.000Z,True,False,,4013,Camping World Stadium,...,24.0,"[7, 0, 10, 7]",0.905953,Miami,ACC,20.0,"[3, 10, 0, 7]",0.094047,4.0,-4.0
1,401114164,2019,1,regular,2019-08-25T02:30:00.000Z,False,False,,3610,Aloha Stadium,...,45.0,"[14, 14, 7, 10]",0.68863,Arizona,Pac-12,38.0,"[0, 21, 14, 3]",0.31137,4.5,-4.5
4,401117854,2019,1,regular,2019-08-29T23:00:00.000Z,False,False,,3854,Nippert Stadium,...,24.0,"[7, 3, 7, 7]",0.996829,UCLA,Pac-12,14.0,"[0, 7, 7, 0]",0.003171,7.5,-7.5
9,401114236,2019,1,regular,2019-08-30T00:00:00.000Z,False,False,,4729,Benson Field at Yulman Stadium,...,42.0,"[7, 21, 14, 0]",0.999668,Florida International,Conference USA,14.0,"[0, 7, 7, 0]",0.000332,25.5,-25.5
10,401111653,2019,1,regular,2019-08-30T00:00:00.000Z,False,True,,3836,Memorial Stadium,...,52.0,"[14, 14, 14, 10]",0.999976,Georgia Tech,ACC,14.0,"[0, 0, 7, 7]",2.4e-05,35.5,-35.5


In [5]:
teams = pd.concat([
    data[['home_team', 'home_spread', 'away_team']].rename(columns={'home_team': 'team', 'home_spread': 'spread', 'away_team': 'opponent'}),
    data[['away_team', 'away_spread', 'home_team']].rename(columns={'away_team': 'team', 'away_spread': 'spread', 'home_team': 'opponent'})
])

teams.head()

Unnamed: 0,team,spread,opponent
0,Florida,4.0,Miami
1,Hawai'i,4.5,Arizona
4,Cincinnati,7.5,UCLA
9,Tulane,25.5,Florida International
10,Clemson,35.5,Georgia Tech


In [6]:
teams['spread'] = np.where(teams['spread'] > 28, 28, teams['spread']) # cap the upper bound scoring margin at +28 points
teams['spread'] = np.where(teams['spread'] < -28, -28, teams['spread']) # cap the lower bound scoring margin at -28 points
teams.head()

Unnamed: 0,team,spread,opponent
0,Florida,4.0,Miami
1,Hawai'i,4.5,Arizona
4,Cincinnati,7.5,UCLA
9,Tulane,25.5,Florida International
10,Clemson,28.0,Georgia Tech


In [7]:
spreads = teams.groupby('team').spread.mean()
spreads.head()

team
Air Force            12.000000
Akron               -21.125000
Alabama              20.454545
Appalachian State    15.416667
Arizona             -11.363636
Name: spread, dtype: float64

In [8]:
# create empty arrays
terms = []
solutions = []

for team in spreads.keys():
    row = []
    # get a list of team opponents
    opps = list(teams[teams['team'] == team]['opponent'])
    
    for opp in spreads.keys():
        if opp == team:
        	# coefficient for the team should be 1
            row.append(1)
        elif opp in opps:
        	# coefficient for opponents should be 1 over the number of opponents
            row.append(-1.0/len(opps))
        else:
        	# teams not faced get a coefficient of 0
            row.append(0)
            
    terms.append(row)
    
    # average game spread on the other side of the equation
    solutions.append(spreads[team])

In [9]:
solutions = np.linalg.solve(np.array(terms), np.array(solutions))
solutions

array([  1.07647107, -39.71018443,  15.61425795,   2.97027268,
       -13.83185223,  -2.75915096, -15.54059814, -12.21375459,
       -14.29036467,  11.36293333,  -8.86815421, -13.66030523,
         5.09260186,   1.94683885,  -9.27395053, -32.25912006,
       -10.3102937 ,  -5.0920695 , -12.1945614 , -17.05853913,
        -1.20406913,  15.13206021, -17.6229709 , -10.30197646,
       -17.889946  , -28.06548159,  -9.60632527, -22.55132449,
       -17.61986857,   9.6168355 ,  -2.47060844, -19.32470989,
        -7.93547387, -12.84819196,   9.93614063,  -7.4405892 ,
       -15.85618203, -17.40086765, -11.32706472,  -8.51238676,
        -9.05401275,  -2.04972248,   4.20573927,   5.57038283,
       -12.44532442,   1.01953226, -15.78073154,  -4.25291685,
        19.39843467, -16.05950124,  -0.46621738, -14.9141738 ,
       -12.75767743,  -9.16404899, -12.05599525, -13.02980674,
         6.0452343 ,  -5.603778  , -15.69904057,   8.13183928,
        -3.76529059, -15.64281171,   3.58297106,  -3.55

In [10]:
ratings = list(zip( spreads.keys(), solutions ))
srs = pd.DataFrame(ratings, columns=['team', 'rating'])
srs.head()

Unnamed: 0,team,rating
0,Air Force,1.076471
1,Akron,-39.710184
2,Alabama,15.614258
3,Appalachian State,2.970273
4,Arizona,-13.831852


In [11]:
rankings = srs.sort_values('rating', ascending=False).reset_index()[['team', 'rating']]
rankings.loc[:24]

Unnamed: 0,team,rating
0,Ohio State,20.125317
1,LSU,19.398435
2,Alabama,15.614258
3,Clemson,15.13206
4,Auburn,11.362933
5,Utah,11.121653
6,Oklahoma,11.116157
7,Oregon,10.589549
8,Georgia,9.936141
9,Florida,9.616835


Go back and tinker.

What happens to our ratings if we do any of the following?
* Adjust home field advantage up or down from 2.5
* Remove home field advantage adjustment completely
* Adjust the scoring margin cap up or down from 28
* Remove the scoring margin cap completely