# Elo ratings based on regular-season games

In [1]:
# importing necessary packages
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss

There are certain hyperparameters which can affect how the elo rating works. I determine the optimal version of these hyperparemeters in different experiments.
- **K** (learning rate) is optimized in **Regression Hyperparameter Optimization**
- **CONF_REVERSION_FACTOR** is optimized in **Regression Hyperparameter Optimization**
- **D1_REVERSION_FACTOR** is optimized in **Regression Hyperparameter Optimization**

In [4]:
DATA_PATH = "../../march-machine-learning-mania-2024-data"

K = 50.
HOME_ADVANTAGE = 100.
CONF_REVERSION_FACTOR = 0.375 # Proportion of team's next year elo rating that is determined by their conference's average rating
D1_REVERSION_FACTOR = 0. # Proportion of team's next year elo rating that is determined by the D1 average rating of 1500

In [5]:
# Reading results of all regular season games
rs = pd.read_csv(DATA_PATH + "/MRegularSeasonCompactResults.csv")
rs.head(3)

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT
0,1985,20,1228,81,1328,64,N,0
1,1985,25,1106,77,1354,70,H,0
2,1985,25,1112,63,1223,56,H,0


In [6]:
team_ids = set(rs.WTeamID).union(set(rs.LTeamID))
len(team_ids)

378

It is necessary to determine the elo rating with which each team begins the season.
- I'm going to initialise all teams with a rating of 1500. This differs from the 538 approach, where new entrants begin with a lower rating. I believe that this will have only a marginal impact on the final results, as very few teams enter each year.
- I will revert each teams rating towards their conference mean each season. I choose conference mean, rather than overall mean, to account for certain conferences being consistently stronger than others. With ~30 games in the regular season, each team should approach their true elo rating regardless of how ratings are reverted to the mean. However, because the vast majority of nonconference games appear early in the season, sub-optimal reversion will make it more difficult for conferences to approach their true strenghts. For simplicity, I will revert teams to their old confernce's mean, rather than their new conference's mean. 

In [7]:
# Creating dict of conference affiliation by year
conf_df = pd.read_csv(DATA_PATH + "/MTeamConferences.csv")
conf_df['TeamYear'] = conf_df.Season.astype(str) + "_" + conf_df.TeamID.astype(str)
conf_dict = dict(zip(conf_df['TeamYear'], conf_df['ConfAbbrev']))
confs = set(conf_df['ConfAbbrev'])
print(len(confs))
conf_df.head()

# Creating dictionary to calculate average elo rating by conference each year
conf_elo_dict = {}
for c in confs: conf_elo_dict[c] = []

51


In [8]:
# This dictionary will be used as a lookup for current
# scores while the algorithm is iterating through each game
elo_dict = dict(zip(list(team_ids), [1500] * len(team_ids)))

In [9]:
# Elo updates will be scaled based on the margin of victory
rs['margin'] = rs.WScore - rs.LScore

The three functions below contain the meat of the Elo calculation:

In [11]:
def elo_pred(elo1, elo2):
    return(1. / (10. ** (-(elo1 - elo2) / 400.) + 1.))

def expected_margin(elo_diff):
    return((7.5 + 0.006 * elo_diff))

def elo_update(w_elo, l_elo, margin):
    elo_diff = w_elo - l_elo
    pred = elo_pred(w_elo, l_elo)
    mult = ((margin + 3.) ** 0.8) / expected_margin(elo_diff)
    update = K * mult * (1 - pred)
    return(pred, update)

In [12]:
# I'm going to iterate over the games dataframe using 
# index numbers, so want to check that nothing is out
# of order before I do that.
assert np.all(rs.index.values == np.array(range(rs.shape[0]))), "Index is out of order."

In [13]:
preds = []
w_elo = []
l_elo = []

season_conf_elos = []

# Group the DataFrame by 'Season'
grouped = rs.groupby('Season')

# Iterate over each group
for season, group in grouped:

    # Iterate over each game in the season
    for row in group.itertuples():
    
        # Get key data from current row
        w = row.WTeamID
        l = row.LTeamID
        margin = row.margin
        wloc = row.WLoc
        
        # Does either team get a home-court advantage?
        w_ad, l_ad, = 0., 0.
        if wloc == "H":
            w_ad += HOME_ADVANTAGE
        elif wloc == "A":
            l_ad += HOME_ADVANTAGE
        
        # Get elo updates as a result of the game
        pred, update = elo_update(elo_dict[w] + w_ad,
                                elo_dict[l] + l_ad, 
                                margin)
        elo_dict[w] += update
        elo_dict[l] -= update
    
        # Save prediction and new Elos for each round
        preds.append(pred)
        w_elo.append(elo_dict[w])
        l_elo.append(elo_dict[l])
    
    # Assigning teams to conference
    for id in team_ids:
        if f"{season}_{id}" in conf_dict.keys(): conf_elo_dict[conf_dict[f"{season}_{id}"]].append(elo_dict[id]) # Only updating teams which are active
    
    # Calculating mean elo by conference
    for k, v in conf_elo_dict.items():
        if len(v) > 0: conf_elo_dict[k] = [sum(v)/len(v)] # Only modifying conferences with at least one team
    
    # Reverting each team's elo towards mean
    for id in team_ids:
        if f"{season}_{id}" in conf_dict.keys(): # Only updating teams which are D1
            this_conf = conf_dict[f"{season}_{id}"]
            this_conf_elo = conf_elo_dict[this_conf][0]
            elo_dict[id] = (1 - CONF_REVERSION_FACTOR) * elo_dict[id] + CONF_REVERSION_FACTOR * this_conf_elo
    
    # Clearing conference season ave elo ratings
    for k in conf_elo_dict.keys():
        if len(conf_elo_dict[k]) > 0: 
            season_conf_elos.append( (season, k, conf_elo_dict[k][0]) )
            conf_elo_dict[k] = []



In [14]:
rs['w_elo'] = w_elo
rs['l_elo'] = l_elo

Let's take a look at the last few games in the games dataframe to check that the Elo ratings look reasonable.

In [15]:
rs.tail(10)

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,margin,w_elo,l_elo
187279,2024,131,1433,66,1386,60,N,0,6,1721.493585,1647.131855
187280,2024,131,1436,66,1262,61,H,0,5,1751.81295,1562.624145
187281,2024,131,1443,78,1431,71,N,0,7,1624.25739,1541.074333
187282,2024,131,1458,76,1345,75,N,1,1,1890.025846,2076.743474
187283,2024,131,1463,69,1165,57,N,0,12,1717.019856,1661.679766
187284,2024,132,1120,86,1196,67,N,0,19,2045.630085,1907.549212
187285,2024,132,1182,57,1433,51,N,0,6,1759.371402,1703.556206
187286,2024,132,1228,93,1458,87,N,0,6,2012.008771,1877.682439
187287,2024,132,1412,85,1396,69,N,0,16,1705.856601,1536.702922
187288,2024,132,1463,62,1135,61,N,0,1,1721.876078,1542.016329


Looks OK. How well do they generally predict games? Since all of the Elo predictions calculated above have a true outcome of 1, it's really simple to check what the log loss would be on these 150k games:

In [16]:
np.mean(-np.log(preds))

0.5197897978930905

For comparison, the mean loss in the no reversion version of this is 0.5362111392019109. So this is better than not reverting teams scores to the mean and using a learning rate of 20.

Final step: for each team, pull out the final Elo rating at the end of each regular season. This is a bit annoying because the team ID could be in either the winner or loser column for their last game of the season..

In [17]:
def final_elo_per_season(df, team_id):
    d = df.copy()
    d = d.loc[(d.WTeamID == team_id) | (d.LTeamID == team_id), :]
    d.sort_values(['Season', 'DayNum'], inplace=True)
    d.drop_duplicates(['Season'], keep='last', inplace=True)
    w_mask = d.WTeamID == team_id
    l_mask = d.LTeamID == team_id
    d['season_elo'] = None
    d.loc[w_mask, 'season_elo'] = d.loc[w_mask, 'w_elo']
    d.loc[l_mask, 'season_elo'] = d.loc[l_mask, 'l_elo']
    out = pd.DataFrame({
        'team_id': team_id,
        'season': d.Season,
        'season_elo': d.season_elo
    })
    return(out)

In [18]:
df_list = [final_elo_per_season(rs, id) for id in team_ids]
season_elos = pd.concat(df_list)

In [19]:
season_elos.sample(10)

Unnamed: 0,team_id,season,season_elo
107977,1378,2009,1502.817699
113236,1237,2010,1664.464569
56036,1405,1998,1588.438849
31471,1198,1992,1161.682092
47827,1339,1996,1632.178724
123861,1270,2012,1485.283745
23236,1330,1990,1573.403627
7493,1242,1986,1977.120587
161422,1413,2019,1365.299739
47961,1429,1996,1618.966811


Before I save the end-of-season elo ratings into a csv file, I'm going to add the team names as a column.

In [59]:
team_name_df = pd.read_csv(DATA_PATH + "/MTeams.csv")
season_elos = season_elos.merge(team_name_df, left_on='team_id', right_on='TeamID', how='left')
season_elos.drop('TeamID', axis=1, inplace=True) # Drop redundant 'TeamID' column

season_elos = season_elos[['team_id', 'TeamName', 'season', 'season_elo']]
season_elos.to_csv("results/season_elos.csv", index=None)

I'll also save my end-of-season conference elo ratings by year.

In [60]:
conf_elo_df = pd.DataFrame(season_conf_elos, columns=['Season', 'ConfAbbrev', 'MeanElo'])
conf_elo_df.to_csv("results/conf_mean_elos.csv",index=None)
