# Elo Code

In [8]:
from asyncio.windows_events import NULL
import numpy as np
from collections import Counter
from datetime import datetime as dt
import pickle
import bz2
import random
import math
from math import log

## Data Import

In [9]:
with open('C:\\Users\\mmaze\\Desktop\\Capstone\\Elo\\train_and_test_data.pbz2', 'rb') as file:
    uncompressed = bz2.BZ2File(file)
    vball = pickle.load(uncompressed)

#Sort by date (Final Array)
vball.sort(order='datetime')

In [10]:
# View examples of the data:

# row 0
print(vball[0])
print("")

# Equivalent output to a structured array
print(vball['tournament_name'][1])
print(vball[1]['tournament_name'])

# Calling multiple fields
print(vball[['tournament_name','type']][1])

(80, "25th Annual AAU Girls' Junior National Beach Volleyball Championships", 'National', '15U', '21', 'Girls', 'Juniors', 65, 1, 1, 'Home', 1, 'Pool', False, 400, 494, '400.494', 1816, 2144, '1816.2144', True, False, 21, 7, False, 21, 14, 0.75, 21, 33.862354, -118.40166, 'Pacific Standard Time', '2018-07-10T09:00:00.000000')

25th Annual AAU Girls' Junior National Beach Volleyball Championships
25th Annual AAU Girls' Junior National Beach Volleyball Championships
("25th Annual AAU Girls' Junior National Beach Volleyball Championships", 'National')


## Functions

In [11]:
# Elo probability, recreated from https://www.geeksforgeeks.org/elo-rating-algorithm/
def Probability(rating1, rating2):
    return 1.0 / (1 + (10 ** ((rating2 - rating1) / 400)))

def EloRating(Ra, Rb, K, won):
    # Probability of Player A
    Pa = Probability(Ra, Rb)
    # Case When Player A wins
    if (won) :
        Ra = Ra + K * (1 - Pa)
    # Case When Player A loses
    else :
        Ra = Ra + K * (0 - Pa)
    return(round(Ra, 4))

In [12]:
# Example Outputs:
print(Probability(1400, 1700))
print(Probability(1700, 1400))
print(EloRating(Ra=1100, Rb=1000, K=800, won=True))
print(EloRating(Ra=1100, Rb=1000, K=800, won=False))

0.15097955721132328
0.8490204427886767
1387.948
587.948


In [13]:
def simulator(match_data=vball, K=800, inital_elo=1200):
    # elo_db:
    # a dictionary with player id as the key
    # the value is their elo
    elo_db = dict()
    loop = 0
    correct_counter = 0
    match_counter = 0

    for match in match_data:
        # add players to database if needed:
        for person in ["player_id", "teammate_id", "opponent1_id", "opponent2_id"]:
            if match[person] not in elo_db.keys():
                elo_db.update({match[person]: inital_elo})
        # set parameters     
        avg_oppon = .5 * (elo_db.get(match["opponent1_id"]) + elo_db.get(match["opponent2_id"]))
        player_elo = .5 * (elo_db.get(match["player_id"]) + elo_db.get(match["teammate_id"]))

        # we will only record accuracy of games past the 206295th match
        # it will caputre the accuracy of 3/5/22 to 6/20/22 games
        if loop >= 206296:

            # skip over situations where both players are unranked or have same elo
            if player_elo == avg_oppon:
                continue

            # compare preds to actual
            prob = Probability(player_elo, avg_oppon)
            if prob > .5:
                pred_win = True
            else:
                pred_win = False

            # update accuracy/match counters
            if pred_win == match["win"]:
                correct_counter += 1
            match_counter += 1

        # perform calculations/updates
        new_elo = EloRating(player_elo, avg_oppon, K, match["win"])
        elo_db.update({match["player_id"]: new_elo})
        loop+=1
    return(correct_counter, match_counter, elo_db)

## Apply Functions to Data

In [14]:
output = simulator()
print("accuracy is", output[0]/output[1])

accuracy is 0.893518011606263


In [15]:
players_and_elo = output[2]
# view the first 10 players (by id) and their elos: 
{k: players_and_elo[k] for k in list(players_and_elo)[:10]}

{400: 1938.6504,
 494: 2180.7687,
 1816: 1366.5328,
 2144: 503.1329,
 689: 1409.8511,
 1861: 2607.6398,
 2211: 1762.2169,
 2212: 1318.1059,
 1846: 2676.2848,
 1847: 947.698}

## Elo Variant (Optional Read)
This includes a decay and restore feature  
After each day, players' elo are slightly adjusted towards the initial elo (1200)

In [16]:
def simulatorVariant(match_data=vball, K=800, inital_elo=1200, decay_amt=1, restore_amt=1):
    # elo_db:
    # a dictionary with player id as the key
    # the value is their elo
    elo_db = dict()
    loop = 0
    correct_counter = 0
    match_counter = 0

    last_dt = str(match_data[0][-1])[0:10]

    for match in match_data:
        if isinstance(decay_amt, float) | isinstance(restore_amt, float):
            # find date. Select [0:10] bc we only want the date not time
            current_dt = str(match[-1])[0:10]
            # if it is a new day then:
            if last_dt != current_dt:
                last_dt = current_dt
                for key, elo in elo_db.items():
                    if elo > inital_elo:
                        elo_db.update({key: elo*decay_amt})
                    if elo < inital_elo:
                        elo_db.update({key: elo*restore_amt})

        # add players to database if needed:
        for person in ["player_id", "teammate_id", "opponent1_id", "opponent2_id"]:
            if match[person] not in elo_db.keys():
                elo_db.update({match[person]: inital_elo})
        # set parameters     
        avg_oppon = .5 * (elo_db.get(match["opponent1_id"]) + elo_db.get(match["opponent2_id"]))
        player_elo = .5 * (elo_db.get(match["player_id"]) + elo_db.get(match["teammate_id"]))

        # we will only record accuracy of games past the 206295th match
        # it will caputre the accuracy of 3/5/22 to 6/20/22 games
        if loop >= 206296:

            # skip over situations where both players are unranked or have same elo
            if player_elo == avg_oppon:
                continue

            # compare preds to actual
            prob = Probability(player_elo, avg_oppon)
            if prob > .5:
                pred_win = True
            else:
                pred_win = False

            # update accuracy/match counters
            if pred_win == match["win"]:
                correct_counter += 1
            match_counter += 1

        # perform calculations/updates
        new_elo = EloRating(player_elo, avg_oppon, K, match["win"])
        elo_db.update({match["player_id"]: new_elo})
        loop+=1
    return(correct_counter, match_counter, elo_db)

In [17]:
# performs slightly better, but is more complex
output = simulatorVariant(decay_amt=.98, restore_amt=1.07)
print("accuracy is", output[0]/output[1])

accuracy is 0.8946950618635716


In [18]:
# if decay_amt and restore_amt both equal 1, they have no effect and run like the original function
output = simulatorVariant(decay_amt=1, restore_amt=1)
print("accuracy is", output[0]/output[1])

accuracy is 0.893518011606263


## Elo with Log Loss

In [19]:
def LogLoss(outcome, prob):
    if outcome == True:
        result = 1
    else:
        result = 0
    return -1 * ((result * log(prob)) + ((1 - result) * log(1-prob)))

In [36]:
def simulatorLogLoss(match_data=vball, K=800, inital_elo=1200, decay_amt=1, restore_amt=1):
    # elo_db:
    # a dictionary with player id as the key
    # the value is their elo
    elo_db = dict()
    loop = 0
    correct_counter = 0
    match_counter = 0
    total_log_loss = 0

    last_dt = str(match_data[0][-1])[0:10]

    for match in match_data:
        if isinstance(decay_amt, float) | isinstance(restore_amt, float):
            # find date. Select [0:10] bc we only want the date not time
            current_dt = str(match[-1])[0:10]
            # if it is a new day then:
            if last_dt != current_dt:
                last_dt = current_dt
                for key, elo in elo_db.items():
                    if elo > inital_elo:
                        elo_db.update({key: elo*decay_amt})
                    if elo < inital_elo:
                        elo_db.update({key: elo*restore_amt})

        # add players to database if needed:
        for person in ["player_id", "teammate_id", "opponent1_id", "opponent2_id"]:
            if match[person] not in elo_db.keys():
                elo_db.update({match[person]: inital_elo})
        # set parameters     
        avg_oppon = .5 * (elo_db.get(match["opponent1_id"]) + elo_db.get(match["opponent2_id"]))
        player_elo = .5 * (elo_db.get(match["player_id"]) + elo_db.get(match["teammate_id"]))

        # we will only record accuracy of games past the 206295th match
        # it will caputre the accuracy of 3/5/22 to 6/20/22 games
        if loop >= 206296:

            # skip over situations where both players are unranked or have same elo
            if player_elo == avg_oppon:
                continue

            # compare preds to actual
            prob = Probability(player_elo, avg_oppon)
            total_log_loss += LogLoss(match["win"], prob)
            if prob > .5:
                pred_win = True
            else:
                pred_win = False

            # update accuracy/match counters
            if pred_win == match["win"]:
                correct_counter += 1
            match_counter += 1

        # perform calculations/updates
        new_elo = EloRating(player_elo, avg_oppon, K, match["win"])
        elo_db.update({match["player_id"]: new_elo})
        loop+=1
    log_loss = total_log_loss/match_counter
    return(correct_counter, match_counter, elo_db, log_loss)

In [37]:
# performs slightly better, but is more complex
output = simulatorLogLoss(decay_amt=.98, restore_amt=1.07)
print("log loss is", output[3])

log loss is 0.2991528829230144


In [38]:
# if decay_amt and restore_amt both equal 1, they have no effect and run like the original function
output = simulatorLogLoss(decay_amt=1, restore_amt=1)
print("log loss is", output[3])

log loss is 0.30705570748361294
