In [1]:
import numpy as np
import pandas as pd
from glicko2 import Player
import math

In [2]:
class Sailor:
    def __init__(self, name, team, pos):
        self.name = name
        self.team = team
        self.pos = pos
        self.player = Player()
        
    def update_player(self, rating_list, RD_list, outcome_list):
        self.player.update_player(rating_list, RD_list, outcome_list)
        
    def __repr__(self):
        return f"{self.name}: {self.player.rating}, {self.player.rd}, {self.player.vol}"

In [3]:
#testing:
s = Sailor("Test person", "team", "Skipper")
# s = Player()
s.player.setRd(200)
s.update_player([1400, 1550, 1700],[30, 100, 300], [1, 0, 0])
s

Test person: 1464.050665034383, 151.51653572471452, 0.05999894750834964

In [4]:
df_races = pd.read_csv("races.csv",converters={"Teams": lambda x: [y.strip().split("'")[1] for y in x.strip("[]").split(", ")]})

df_races['Ratio'] = 1 - (df_races['Score'] / df_races['Teams'].apply(len))
df_races["Ratio"] = df_races["Ratio"].astype(float)
df_races['numTeams'] = df_races['Teams'].apply(len)
df_races['raceNum'] = df_races['raceID'].apply(lambda id: int(id.split("/")[2][:-1]))

# df_races['Date'] = df_races['Date'].apply(lambda date: (int(date.split("-")[0]), int(date.split("-")[1]), int(date.split("-")[2])))
# df_races = df_races.sort_values(['Date', 'raceNum']).reset_index(drop=True)

df_races = df_races.loc[df_races['raceID'].str.contains('f24')] # filter for f24

people = {}
for p in df_races['Sailor'].unique():
    team = df_races.loc[df_races['Sailor'] == p]['Team'].iat[0] if len(df_races.loc[df_races['Sailor'] == p]['Team']) > 0 else "Unknown"
    pos = df_races.loc[df_races['Sailor'] == p]['Position'].iat[0] if len(df_races.loc[df_races['Sailor'] == p]['Position']) > 0 else "Unknown"
    people[p] = Sailor(p, team, pos)

In [5]:
for i,date in enumerate(df_races['Date'].unique()):
    print(f"Analyzing {i}/{len(df_races['Date'].unique())} {date}")
    for i,race in enumerate(df_races.loc[df_races['Date'] == date,'raceID'].unique()):
        if i % 100 == 0:
            print(f"Currently analyzing race {i}/{len(df_races.loc[df_races['Date'] == date,'raceID'].unique())}")
            scores = df_races.loc[df_races['raceID'] == race]
        
        sailors = scores['Sailor']
        if len(sailors) == 0:
            continue
        
        for sailor, actual_score in zip([people[s] for s in sailors], df_races['Score']):
            sailorScore = scores.loc[sailors == sailor.name]['Score'].iat[0]
            
            partner = scores.loc[scores['Sailor'] == sailor.name, 'Partner'].iat[0]
            
            outcomes = [1 if actual_score - scores.loc[scores['Sailor'] == p, 'Score'].iat[0] < 0 else 0 for p in sailors if p != partner]
            
            sailor.update_player([people[p].player.rating for p in sailors if p != partner],[people[p].player.rd for p in sailors if p != partner],outcomes)
    for p in people.keys():
        if p not in df_races.loc[df_races['Date'] == date, 'Sailor'].unique():
            people[p].player.did_not_compete()

Analyzing 0/19 2024-11-16
Currently analyzing race 0/36
Analyzing 1/19 2024-11-09
Currently analyzing race 0/22
Analyzing 2/19 2024-11-02
Currently analyzing race 0/52
Analyzing 3/19 2024-10-25
Currently analyzing race 0/30
Analyzing 4/19 2024-10-26
Currently analyzing race 0/302
Currently analyzing race 100/302
Currently analyzing race 200/302
Currently analyzing race 300/302
Analyzing 5/19 2024-10-27
Currently analyzing race 0/10
Analyzing 6/19 2024-10-19
Currently analyzing race 0/256
Currently analyzing race 100/256
Currently analyzing race 200/256
Analyzing 7/19 2024-10-12
Currently analyzing race 0/330
Currently analyzing race 100/330
Currently analyzing race 200/330
Currently analyzing race 300/330
Analyzing 8/19 2024-10-13
Currently analyzing race 0/14
Analyzing 9/19 2024-10-05
Currently analyzing race 0/374
Currently analyzing race 100/374
Currently analyzing race 200/374
Currently analyzing race 300/374
Analyzing 10/19 2024-10-06
Currently analyzing race 0/8
Analyzing 11/19 2

In [6]:
# ratings = [people[p].rating for p in people.keys()]
# lowest = min(ratings)
# highest = max(ratings)

# scale = 10000 / (highest - lowest)
# offset = 0 - (lowest * scale)

# for p in people.values():
# p.rating = p.rating * scale + offset


df_elo = pd.DataFrame(columns=['Sailor','Team','Position', 'Elo', 'RD', 'vol'])
for p in people.values():
    # print(p.name, p.rating)
    df_elo.loc[len(df_elo)] = [p.name, p.team, p.pos, p.player.rating, p.player.rd, p.player.vol]
df_elo = df_elo.sort_values('Elo', ascending=False)
df_elo.reset_index(drop=True, inplace=True)

df_elo.to_csv("elo10.csv", index=False)

In [7]:
df_elo

Unnamed: 0,Sailor,Team,Position,Elo,RD,vol
0,Guthrie Braun,Coast Guard,Skipper,21141.167102,35.763755,0.060471
1,Emily Mueller,Coast Guard,Crew,21141.167102,32.532107,0.060471
2,Vivian Smith,Connecticut College,Crew,21045.495542,32.436351,0.062716
3,Skylor Sweet,Connecticut College,Skipper,21045.495542,32.436351,0.062716
4,Ellie Pinto,MIT,Crew,21029.879232,32.350490,0.062422
...,...,...,...,...,...,...
2591,Pierson Falk,Rhode Island,Skipper,1500.000000,352.474461,0.060000
2592,Katherine Bachmann,Northeastern,Crew,1500.000000,352.011824,0.060000
2593,Carter Anderson,Northeastern,Skipper,1500.000000,351.857477,0.060000
2594,Ava Tobias,Bowdoin,Crew,1500.000000,352.474461,0.060000
