In [37]:
import pandas as pd
import numpy as np

In [38]:
ROOT = '/home/robert/Projects/NCAA2021'

In [39]:
ELO_RATING = {}

## Import Data

In [40]:
RegularSeason = pd.read_csv(f'{ROOT}/Data/MRegularSeasonDetailedResults.csv', index_col=False)
Tourney = pd.read_csv(f'{ROOT}/Data/MNCAATourneyCompactResults.csv')
TeamIDs = pd.read_csv(f'{ROOT}/Data/MTeams.csv')
Rankings = pd.read_csv(f'{ROOT}/Data/MMasseyOrdinals.csv')
RegularSeason = RegularSeason[RegularSeason.Season==2022]
RegularSeason = RegularSeason[['Season', 'DayNum', 'WTeamID', 'LTeamID', 'WScore', 'LScore']]
RegularSeason['WTeamElo'] = np.nan
RegularSeason['LTeamElo'] = np.nan

## Elo Functions

In [41]:
K = 30
def win_chance(elo1, elo2):
    """Given elo1 and elo2, will return the chance of elo1 winning"""
    return  1 / ( 1 + 10**((elo2-elo1)/400) )

def update_elo(winner_elo, loser_elo):
    """Given elo1 (winner) and elo2 (loser) will return new elos"""
    elo_change = K * (1 - win_chance(winner_elo, loser_elo))
    new_elo1 = int(winner_elo + elo_change)
    new_elo2 = int(loser_elo - elo_change)
    return new_elo1, new_elo2
def margin_of_victory(elo_win, elo_loss, score_win, score_loss):
    return (np.log((score_win-score_loss)+1) * 2.2) / ((elo_win - elo_loss)*0.001 + 2.2)

In [42]:
def get_team_ids():
    team_ids = []
    WTeamIDs = RegularSeason['WTeamID'].unique().tolist()
    LTeamIDs = RegularSeason['LTeamID'].unique().tolist()
    for team in WTeamIDs:
        team_ids.append(team)
    for team in LTeamIDs:
        if team not in WTeamIDs:
            team_ids.append(team)
    return team_ids
team_ids = get_team_ids()

In [43]:
def generate_elo():
    for team in team_ids:
        ELO_RATING[team] = 1500
generate_elo()
RegularSeason

Unnamed: 0,Season,DayNum,WTeamID,LTeamID,WScore,LScore,WTeamElo,LTeamElo
96687,2022,8,1104,1256,93,64,,
96688,2022,8,1105,1398,82,73,,
96689,2022,8,1110,1265,77,73,,
96690,2022,8,1112,1319,81,52,,
96691,2022,8,1113,1339,76,60,,
...,...,...,...,...,...,...,...,...
100418,2022,98,1400,1242,79,76,,
100419,2022,98,1411,1126,66,63,,
100420,2022,98,1422,1441,68,49,,
100421,2022,98,1438,1181,69,68,,


In [44]:
for i in RegularSeason.index:
    MoV = margin_of_victory(ELO_RATING[RegularSeason.at[i, 'WTeamID']],ELO_RATING[RegularSeason.at[i, 'WTeamID']], RegularSeason.at[i, 'WScore'], RegularSeason.at[i, 'LScore'])
    WTeamElo, LTeamElo = update_elo(ELO_RATING[RegularSeason.at[i, 'WTeamID']], ELO_RATING[RegularSeason.at[i, 'LTeamID']])
    RegularSeason.at[i, 'WTeamElo'] = WTeamElo + MoV
    RegularSeason.at[i, 'LTeamElo'] = LTeamElo - MoV
    ELO_RATING[RegularSeason.at[i, 'WTeamID']] = WTeamElo + MoV
    ELO_RATING[RegularSeason.at[i, 'LTeamID']] = LTeamElo - MoV

In [45]:
def get_team_id(team_name):
    return TeamIDs.loc[TeamIDs['TeamName']==team_name]['TeamID'].values[0]

In [46]:
TeamIDs

Unnamed: 0,TeamID,TeamName,FirstD1Season,LastD1Season
0,1101,Abilene Chr,2014,2022
1,1102,Air Force,1985,2022
2,1103,Akron,1985,2022
3,1104,Alabama,1985,2022
4,1105,Alabama A&M,2000,2022
...,...,...,...,...
367,1468,Bellarmine,2021,2022
368,1469,Dixie St,2021,2022
369,1470,Tarleton St,2021,2022
370,1471,UC San Diego,2021,2022


In [135]:
team1 = "Baylor"
team2 = "UCLA"
win_chance(ELO_RATING[get_team_id(team1)], ELO_RATING[get_team_id(team2)])

0.5433056292516887

In [136]:
RegularSeason

Unnamed: 0,Season,DayNum,WTeamID,LTeamID,WScore,LScore,WTeamElo,LTeamElo
96687,2022,8,1104,1256,93,64,1518.401197,1481.598803
96688,2022,8,1105,1398,82,73,1517.302585,1482.697415
96689,2022,8,1110,1265,77,73,1516.609438,1483.390562
96690,2022,8,1112,1319,81,52,1518.401197,1481.598803
96691,2022,8,1113,1339,76,60,1517.833213,1482.166787
...,...,...,...,...,...,...,...,...
100418,2022,98,1400,1242,79,76,1664.386294,1699.613706
100419,2022,98,1411,1126,66,63,1489.386294,1317.613706
100420,2022,98,1422,1441,68,49,1516.995732,1376.004268
100421,2022,98,1438,1181,69,68,1589.693147,1680.306853
