In [1]:
import numpy as np
import pandas as pd
import requests

In [13]:
response = requests.get(
    "https://api.collegefootballdata.com/games",
    params={"year": 2019, "seasonType": "both"}
)

data = pd.read_json(response.text)
data.head()

Unnamed: 0,id,season,week,season_type,start_date,neutral_site,conference_game,attendance,venue_id,venue,home_team,home_conference,home_points,home_line_scores,home_post_win_prob,away_team,away_conference,away_points,away_line_scores,away_post_win_prob
0,401110723,2019,1,regular,2019-08-24T23:00:00.000Z,True,False,,4013,Camping World Stadium,Florida,SEC,24.0,"[7, 0, 10, 7]",0.905953,Miami,ACC,20.0,"[3, 10, 0, 7]",0.094047
1,401114164,2019,1,regular,2019-08-25T02:30:00.000Z,False,False,,3610,Aloha Stadium,Hawai'i,Mountain West,45.0,"[14, 14, 7, 10]",0.68863,Arizona,Pac-12,38.0,"[0, 21, 14, 3]",0.31137
2,401119255,2019,1,regular,2019-08-29T23:00:00.000Z,False,False,,3965,UB Stadium,Buffalo,Mid-American,38.0,"[21, 7, 10, 0]",0.999788,Robert Morris,,10.0,"[7, 3, 0, 0]",0.000212
3,401119254,2019,1,regular,2019-08-29T23:00:00.000Z,False,False,,3700,Doyt Perry Stadium,Bowling Green,Mid-American,46.0,"[13, 17, 7, 9]",0.999979,Morgan State,,3.0,"[0, 3, 0, 0]",2.1e-05
4,401117854,2019,1,regular,2019-08-29T23:00:00.000Z,False,False,,3854,Nippert Stadium,Cincinnati,American Athletic,24.0,"[7, 3, 7, 7]",0.996829,UCLA,Pac-12,14.0,"[0, 7, 7, 0]",0.003171


In [14]:
data = data[
    (data['home_points'] == data['home_points']) # filtering out future games
    & (data['away_points'] == data['away_points'])
    & (pd.notna(data['home_conference'])) # games with a non-FBS home team
    & (pd.notna(data['away_conference'])) # games with a non-FBS away team
]

In [15]:
data['home_spread'] = np.where(data['neutral_site'] == True, data['home_points'] - data['away_points'], (data['home_points'] - data['away_points'] - 2.5))
data['away_spread'] = -data['home_spread']
data.head()

Unnamed: 0,id,season,week,season_type,start_date,neutral_site,conference_game,attendance,venue_id,venue,...,home_points,home_line_scores,home_post_win_prob,away_team,away_conference,away_points,away_line_scores,away_post_win_prob,home_spread,away_spread
0,401110723,2019,1,regular,2019-08-24T23:00:00.000Z,True,False,,4013,Camping World Stadium,...,24.0,"[7, 0, 10, 7]",0.905953,Miami,ACC,20.0,"[3, 10, 0, 7]",0.094047,4.0,-4.0
1,401114164,2019,1,regular,2019-08-25T02:30:00.000Z,False,False,,3610,Aloha Stadium,...,45.0,"[14, 14, 7, 10]",0.68863,Arizona,Pac-12,38.0,"[0, 21, 14, 3]",0.31137,4.5,-4.5
4,401117854,2019,1,regular,2019-08-29T23:00:00.000Z,False,False,,3854,Nippert Stadium,...,24.0,"[7, 3, 7, 7]",0.996829,UCLA,Pac-12,14.0,"[0, 7, 7, 0]",0.003171,7.5,-7.5
9,401111653,2019,1,regular,2019-08-30T00:00:00.000Z,False,True,,3836,Memorial Stadium,...,52.0,"[14, 14, 14, 10]",0.999976,Georgia Tech,ACC,14.0,"[0, 0, 7, 7]",2.4e-05,35.5,-35.5
11,401114236,2019,1,regular,2019-08-30T00:00:00.000Z,False,False,,4729,Benson Field at Yulman Stadium,...,42.0,"[7, 21, 14, 0]",0.999668,Florida International,Conference USA,14.0,"[0, 7, 7, 0]",0.000332,25.5,-25.5


In [16]:
teams = pd.concat([
    data[['home_team', 'home_spread', 'away_team']].rename(columns={'home_team': 'team', 'home_spread': 'spread', 'away_team': 'opponent'}),
    data[['away_team', 'away_spread', 'home_team']].rename(columns={'away_team': 'team', 'away_spread': 'spread', 'home_team': 'opponent'})
])

teams.head()

Unnamed: 0,team,spread,opponent
0,Florida,4.0,Miami
1,Hawai'i,4.5,Arizona
4,Cincinnati,7.5,UCLA
9,Clemson,35.5,Georgia Tech
11,Tulane,25.5,Florida International


In [17]:
teams['spread'] = np.where(teams['spread'] > 28, 28, teams['spread']) # cap the upper bound scoring margin at +28 points
teams['spread'] = np.where(teams['spread'] < -28, -28, teams['spread']) # cap the lower bound scoring margin at -28 points
teams.head()

Unnamed: 0,team,spread,opponent
0,Florida,4.0,Miami
1,Hawai'i,4.5,Arizona
4,Cincinnati,7.5,UCLA
9,Clemson,28.0,Georgia Tech
11,Tulane,25.5,Florida International


In [18]:
spreads = teams.groupby('team').spread.mean()
spreads.head()

team
Air Force            11.833333
Akron               -21.125000
Alabama              20.333333
Appalachian State    15.307692
Arizona             -11.363636
Name: spread, dtype: float64

In [19]:
# create empty arrays
terms = []
solutions = []

for team in spreads.keys():
    row = []
    # get a list of team opponents
    opps = list(teams[teams['team'] == team]['opponent'])
    
    for opp in spreads.keys():
        if opp == team:
        	# coefficient for the team should be 1
            row.append(1)
        elif opp in opps:
        	# coefficient for opponents should be 1 over the number of opponents
            row.append(-1.0/len(opps))
        else:
        	# teams not faced get a coefficient of 0
            row.append(0)
            
    terms.append(row)
    
    # average game spread on the other side of the equation
    solutions.append(spreads[team])

In [20]:
solutions = np.linalg.solve(np.array(terms), np.array(solutions))
solutions

array([  1.79672725, -39.08903295,  17.06772981,   2.63123524,
       -14.07716457,  -2.92386189, -14.8812829 , -12.42198894,
       -13.92959886,  10.96520255,  -9.21575756, -12.97182226,
         4.57983771,   0.44876888, -10.2441261 , -31.35805973,
        -8.49057353,  -4.41974184, -13.35039431, -17.61234723,
         0.06836768,  16.54668574, -17.84899588, -10.48632802,
       -17.43812369, -27.94919394,  -8.91890716, -22.57238647,
       -16.65961574,   9.92082363,  -0.28219385, -18.90779661,
        -7.84716911, -12.31634427,  11.18280802,  -8.4928969 ,
       -16.84429977, -17.00179517, -10.48487627,  -8.66932718,
        -9.16716596,  -1.91868087,   6.32490962,   3.90373189,
       -12.77354041,   0.82170458, -14.03974808,  -2.8757901 ,
        21.25461242, -14.25571255,  -0.5456657 , -15.45137415,
       -10.4325315 ,  -7.42208726, -12.01403714, -12.61358895,
         5.13528189,  -6.69151336, -14.89635134,   8.01635092,
        -3.24032423, -15.01725176,   5.31139764,  -4.12

In [21]:
ratings = list(zip( spreads.keys(), solutions ))
srs = pd.DataFrame(ratings, columns=['team', 'rating'])
srs.head()

Unnamed: 0,team,rating
0,Air Force,1.796727
1,Akron,-39.089033
2,Alabama,17.06773
3,Appalachian State,2.631235
4,Arizona,-14.077165


In [22]:
rankings = srs.sort_values('rating', ascending=False).reset_index()[['team', 'rating']]
rankings.loc[:24]

Unnamed: 0,team,rating
0,LSU,21.254612
1,Ohio State,20.082653
2,Alabama,17.06773
3,Clemson,16.546686
4,Georgia,11.182808
5,Auburn,10.965203
6,Penn State,10.504563
7,Oregon,10.306574
8,Florida,9.920824
9,Oklahoma,9.617867


Go back and tinker.

What happens to our ratings if we do any of the following?
* Adjust home field advantage up or down from 2.5
* Remove home field advantage adjustment completely
* Adjust the scoring margin cap up or down from 28
* Remove the scoring margin cap completely