Our first cell is going to import the games dataset. The games dataset inludes every NCAAFB game sine 1918

In [None]:
import pandas as pd
import math


#Import the dataset. Clean it up. Add required columns which I believe are self-explanatory.
games = pd.read_csv('GameHistory.csv')
games['Score Differential'] = games['Home Score'] - games['Away Score']

#The following couple of blocks are here to create a list of all unique teams in the dataset.
hometeams = list(games['Away Team'].unique())
awayteams = list(games['Home Team'].unique())

homeaway = hometeams + awayteams
teams = []

for team in homeaway:
    if team not in teams:
        teams.append(team)
        
#For every team in the list teams, an ELO rating of 1000 is given.
elo = {team: 1000 for team in teams}

games['Home Elo'] = pd.Series(0, index=games.index)
games['Away Elo'] = pd.Series(0, index=games.index)
games['Elo Change'] = pd.Series(0, index=games.index)

The purpose of the following function is to calculate the change in elo for each team depending on the result, who is home, and who is away.

In [None]:
def eloChange(eloDiff, pDiff):
    
    #movm = Margin of Victory Modifier
    #hwp and awp= home win probability and away win probability
    #K determines how volatile rankings are game to game
    #hChange a aChange are the amount that the home and away teams ratings change after every game. Zero sum.
    
    
    #As this is from the perspective of the home team, when the point differential is greater than zero
    #We can assume that the home team has won. Likewise, if the point differential is negative
    #We are going to assume that the home team lost

    if pDiff>0:
        movm = math.log(abs(pDiff)+1)*(2.2/((eloDiff + 55)*.001+2.2))
        hwp = 1/(10**((eloDiff+65)/400)+1)
        k = 22
        change = movm * hwp * k
        hChange = change
        aChange = -1 * change
        
    elif pDiff<0:
        movm = math.log(abs(pDiff)+1)*(2.2/((-eloDiff - 55)*.001+2.2))
        awp = 1-1/(10**((eloDiff+65)/400)+1)
        k = 22
        change = movm * awp * k
        hChange = -1 * change
        aChange = change
    
    elif pDiff == 0:
        aChange=0
        hChange=0
        
    return(hChange, aChange)

Runs through every game in the dataset, updating ratings with each result.

In [None]:
for i in range(1, len(games)):
    
    #For each iteration game is set to a particular game. I key in on the home and away teams and their ratings
    homeTeam = games.loc[i, 'Home Team']
    awayTeam = games.loc[i, 'Away Team']
    
    #Compares data of current game to the date of the previous game.
    #If the change in data is large enough than we can conculde it is a new season.
    newSeason = games.loc[i, 'Code'] - games.loc[i-1, 'Code'] 

    #With every new season, we are going to revert each teams ELO rating to 800
    if newSeason > 1:
        for team in elo:
            elo[team] = round(elo[team] * (2/3) + (900 * (1/3)),2)
    
    #For each row, this incorporates the home and away teams elo ratings into the data.
    games.loc[i, 'Home Elo'] = round(elo[homeTeam],2)
    games.loc[i, 'Away Elo'] = round(elo[awayTeam],2)
    
    #Like the previous columns, this is setting the score differential for each row in the data table.
    pDiff = games.loc[i, 'Score Differential']
    eloDiff = (elo[homeTeam] - elo[awayTeam])
         
    h, a = eloChange(eloDiff, pDiff)
    
    elo[homeTeam] += h
    elo[awayTeam] += a
    games.loc[i, 'Elo Change'] = abs(a)
     
games.head()

Lastly I spit this all out to a CSV file

In [None]:
games.to_csv('ncaaElo.csv', index = False)