In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import requests

pd.set_option('display.max_columns', None)



response = requests.get(
    "https://api.collegefootballdata.com/games",
    params={"year": 2019, "seasonType": "both"}
)

data = pd.read_json(response.text)
data.head()

data = data[
    (data['home_points'] == data['home_points'])
    & (data['away_points'] == data['away_points'])
    & (pd.notna(data['home_conference']))
    & (pd.notna(data['away_conference']))
]

data['home_spread'] = np.where(data['neutral_site'] == True, data['home_points'] - data['away_points'], (data['home_points'] - data['away_points'] - 4.425064599483204))
data['away_spread'] = -data['home_spread']
data['home_spread'].mean()


teams = pd.concat([
    data[['home_team', 'home_spread', 'away_team']].rename(columns={'home_team': 'team', 'home_spread': 'spread', 'away_team': 'opponent'}),
    data[['away_team', 'away_spread', 'home_team']].rename(columns={'away_team': 'team', 'away_spread': 'spread', 'home_team': 'opponent'})
])

teams.head()

teams['spread'] = np.where(teams['spread'] > 28, 28, teams['spread']) # cap the upper bound scoring margin at +28 points
teams['spread'] = np.where(teams['spread'] < -28, -28, teams['spread']) # cap the lower bound scoring margin at -28 points

spreads = teams.groupby('team').spread.mean()
spreads.head()

# create empty arrays
terms = []
solutions = []

for team in spreads.keys():
    row = []
    # get a list of team opponents
    opps = list(teams[teams['team'] == team]['opponent'])
    
    for opp in spreads.keys():
        if opp == team:
        	# coefficient for the team should be 1
            row.append(1)
        elif opp in opps:
        	# coefficient for opponents should be 1 over the number of opponents
            row.append(-1.0/len(opps))
        else:
        	# teams not faced get a coefficient of 0
            row.append(0)
            
    terms.append(row)
    
    # average game spread on the other side of the equation
    solutions.append(spreads[team])

solutions = np.linalg.solve(np.array(terms), np.array(solutions))
solutions

ratings = list(zip( spreads.keys(), solutions ))
srs = pd.DataFrame(ratings, columns=['team', 'rating'])
srs.head()


Unnamed: 0,team,rating
0,Air Force,2.111183
1,Akron,-38.500656
2,Alabama,16.963411
3,Appalachian State,2.438972
4,Arizona,-13.667286


In [2]:
srs = pd.read_csv('./data/SRS2019.csv')[['team','rating']]
srs.head()

response = requests.get(
    "https://api.collegefootballdata.com/talent"
)

talent = pd.read_json(response.text)[['school', 'talent']]
talent.head()

teams = srs.merge(talent, left_on='team', right_on='school')[['team', 'rating', 'talent']]
teams.head()

response = requests.get("https://api.collegefootballdata.com/teams/fbs")
data = pd.read_json(response.text)

teams = teams.merge(data, left_on='team', right_on='school')[['team', 'conference', 'rating', 'talent']]
teams.head()

teams['classification'] = np.where((teams['conference'] == 'SEC') | (teams['conference'] == 'Big Ten') | (teams['conference'] == 'ACC') | (teams['conference'] == 'Pac-12') | (teams['conference'] == 'Big 12') | (teams['team'] == 'Notre Dame'), 'P5+ND', 'G5')
teams.head()

fig = px.scatter(teams, x="talent", y="rating", trendline="ols", color='classification', hover_name='team', hover_data=['talent','rating'])
fig.update_layout(
    title="Team Talent vs SRS Rating (2019)",
    xaxis_title="Talent Rating",
    yaxis_title="SRS Rating")
fig.show()

results = px.get_trendline_results(fig)
results.query("classification == 'P5+ND'").px_fit_results.iloc[0].summary()




0,1,2,3
Dep. Variable:,y,R-squared:,0.339
Model:,OLS,Adj. R-squared:,0.337
Method:,Least Squares,F-statistic:,165.7
Date:,"Fri, 10 Jul 2020",Prob (F-statistic):,6.77e-31
Time:,13:39:28,Log-Likelihood:,-1136.8
No. Observations:,325,AIC:,2278.0
Df Residuals:,323,BIC:,2285.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-25.4688,2.495,-10.210,0.000,-30.377,-20.561
x1,0.0458,0.004,12.874,0.000,0.039,0.053

0,1,2,3
Omnibus:,16.832,Durbin-Watson:,1.884
Prob(Omnibus):,0.0,Jarque-Bera (JB):,7.567
Skew:,-0.111,Prob(JB):,0.0227
Kurtosis:,2.286,Cond. No.,3930.0
