In [None]:
'''This project aims to build the best elo rating systems for teams in the NBA. Below, I've provided outlines for the
two methods you will need to modify: win_prob() and change_elo(). I will use these two methods of yours to test the
effectiveness of your elo ranking system. I've attached the schedules and outcomes of five historical NBA seasons for
you to train your data (from SR) and will be using the most recently completely season to test. Please do not pull
the testing data as that will defeat the purpose of this project. Keep in mind, this is the bare minimum outline. 
You are encouraged to pull this code and write any tuning algorithms you deem necessary to further build your elo
system. Best of luck!'''

In [None]:
import pandas as pd
import numpy as np
import sklearn as sl
import math

In [45]:
#read in data
games = pd.read_csv('data/NBA_2020_21.csv')

In [46]:
#initial elo for each team # TODO: update this
teams = {
    "Brooklyn Nets":          0, #set initial values here (float)
    "Milwaukee Bucks":        0,
    "Golden State Warriors":  0,
    "Los Angeles Lakers":     0,
    "Indiana Pacers":         0,
    "Charlotte Hornets":      0,
    "Chicago Bulls":          0,
    "Detroit Pistons":        0,
    "Boston Celtics":         0,
    "New York Knicks":        0,
    "Washington Wizards":     0,
    "Toronto Raptors":        0,
    "Cleveland Cavaliers":    0,
    "Memphis Grizzlies":      0,
    "Houston Rockets":        0,
    "Minnesota Timberwolves": 0,
    "Philadelphia 76ers":     0,
    "New Orleans Pelicans":   0,
    "Orlando Magic":          0,
    "San Antonio Spurs":      0,
    "Oklahoma City Thunder":  0,
    "Utah Jazz":              0,
    "Sacramento Kings":       0,
    "Portland Trail Blazers": 0,
    "Denver Nuggets":         0,
    "Phoenix Suns":           0,
    "Dallas Mavericks":       0,
    "Atlanta Hawks":          0,
    "Miami Heat":             0,
    "Los Angeles Clippers":   0
}

home_courts = dict()
for team in teams:
    filtered_games = games.loc[np.array(games["team_1"] == team) + np.array(games["team_2"] == team)]
    home_courts[team] = max(set(filtered_games["stadium"]), key=list(filtered_games["stadium"]).count)

teams = {team: 1500 for team in teams}

In [24]:
#given two teams and their elos, return the probability of winning for the first team
#input: team 1 elo (float), team 2 elo (float)
#output: team 1 win probability between 0 and 1 (float)

def win_prob(team_1_elo, team_2_elo) -> float:
    elo_dif = team_1_elo - team_2_elo
    odds = 1/(10 ** (-elo_dif/400) + 1)# 𝑃𝑟(𝐴)=1/10^−𝐸𝑙𝑜𝐷𝑖ff/400+1 --> from the NFL site
    return odds

In [47]:
#given two teams and their original elos, return their new elos considering the outcome of their game
#input: team 1 elo (float), team 2 elo (float), game (DataFrame)
#ouput: new team 1 elo (float), new team 2 elo (float)

def change_elo(team_1_elo, team_2_elo, game) -> tuple[float, float]:
    # constants found here: https://fivethirtyeight.com/features/how-we-calculate-nba-elo-ratings/
    k = 20
    home = game["stadium"] == home_courts[game["team_1"]]
    home_court_advantage = 100
    margin_of_victory = game["team_1_score"] - game["team_2_score"]
    elo_diff = team_1_elo - team_2_elo
    MOV_mult = ((abs(margin_of_victory) + 3) ** 0.8) / (7.5 + 0.006 * elo_diff)

    adjusted_elo_1 = team_1_elo + home * home_court_advantage
    adjusted_elo_2 = team_2_elo + (not home) * home_court_advantage
    expected_score = win_prob(adjusted_elo_1, adjusted_elo_2)
    outcome = game["team_1_score"] > game["team_2_score"]
    forecast_delta = (outcome - expected_score)
    # print(team_1_elo, team_2_elo, game, k, forecast_delta, MOV_mult)
    move = k * forecast_delta * MOV_mult
    # print(move)

    
    return team_1_elo + move, team_2_elo - move

In [48]:
#how predictive is the elo system?
#squared loss forcasting test (Brier Score)
#the lower the score, the better
def test() -> float:
    score = 0
    for index, row in games.iterrows():
        t2 = team_1, team_2 = row['team_1'], row['team_2']
        elos = current_team_1_elo, current_team_2_elo = teams.get(team_1), teams.get(team_2)

        win = win_prob(current_team_1_elo, current_team_2_elo)

        new_team_1_elo, new_team_2_elo = change_elo(current_team_1_elo, current_team_2_elo, row)
        teams.update({team_1: new_team_1_elo})
        teams.update({team_2: new_team_2_elo})
        
        if row['team_1_score'] > row['team_2_score']:
            score = score + 2*(1 - win)**2
        else:
            score = score + 2*win**2
            
    return score

# start with prev year elos
g1 = games
games = pd.read_csv('data/NBA_2019_20.csv')
test()
games = g1
teams = {team: teams[team] * 0.75 + 1505 * 0.2 for team in teams}

# final eval
test()

537.1287709205216