In [1]:
import numpy as np
import math
import pandas as pd

In [2]:
class Sailor:
    def __init__(self, name, team, position, rating=1500, rd=350, vol=0.06, tau=0.5):
        # Default values for a new player
        self.name = name
        self.team = team
        self.position = position
        self.rating = rating
        self.rd = rd
        self.vol = vol
        self.tau = tau  # System constant for volatility adjustment

        # Pre-compute scaling factor
        self.q = np.log(10) / 400

    def _g(self, rd):
        """G function as per Glicko-2 system."""
        return 1 / np.sqrt(1 + 3 * (self.q ** 2) * (rd ** 2) / (math.pi ** 2))

    def _E(self, rating, opponent_rating, rd):
        """E function as per Glicko-2 system."""
        return 1 / (1 + math.exp(-self._g(rd) * (rating - opponent_rating) * self.q))

    def _update_volatility(self, delta, variance):
        """Update player's volatility."""
        a = np.log(self.vol ** 2)
        A = a
        epsilon = 0.000001

        if delta ** 2 > self.rd ** 2 + variance:
            B = np.log(delta ** 2 - self.rd ** 2 - variance)
        else:
            k = 1
            while self._f(a - k * np.sqrt(self.tau ** 2), delta, variance) < 0:
                k += 1
            B = a - k * np.sqrt(self.tau ** 2)

        fA = self._f(A, delta, variance)
        fB = self._f(B, delta, variance)

        while abs(B - A) > epsilon:
            C = A + (A - B) * fA / (fB - fA)
            fC = self._f(C, delta, variance)
            if fC * fB < 0:
                A = B
                fA = fB
            else:
                fA /= 2
            B = C
            fB = fC

        return math.exp(A / 2)

    def _f(self, x, delta, variance):
        """Intermediate function used for volatility calculation."""
        exp_x = math.exp(x)
        term1 = exp_x * (delta ** 2 - self.rd ** 2 - variance - exp_x)
        term2 = 2 * (variance + self.rd ** 2 + exp_x) ** 2
        return (term1 / term2) - ((x - math.log(self.vol ** 2)) / (self.tau ** 2))

    def update(self, opponents):
        """
        Update rating, RD, and volatility based on match results.

        Args:
            opponents: List of tuples containing (opponent_rating, opponent_rd, score).
                       `score` is 1 for a win, 0 for a loss, and 0.5 for a draw.
        """
        # Convert to Glicko-2 scale
        rating = (self.rating - 1500) / 173.7178
        rd = self.rd / 173.7178

        # Step 2: Compute variance
        variance = 0
        for opponent_rating, opponent_rd, score in opponents:
            opponent_rating = (opponent_rating - 1500) / 173.7178
            opponent_rd = opponent_rd / 173.7178
            g = self._g(opponent_rd)
            E = self._E(rating, opponent_rating, opponent_rd)
            variance += (g ** 2) * E * (1 - E)
        variance = 1 / variance

        # Step 3: Compute delta
        delta = 0
        for opponent_rating, opponent_rd, score in opponents:
            opponent_rating = (opponent_rating - 1500) / 173.7178
            opponent_rd = opponent_rd / 173.7178
            g = self._g(opponent_rd)
            E = self._E(rating, opponent_rating, opponent_rd)
            delta += g * (score - E)
        delta *= variance

        # Step 4: Update volatility
        new_vol = self._update_volatility(delta, variance)

        # Step 5: Update rating deviation
        rd_star = np.sqrt(rd ** 2 + new_vol ** 2)
        new_rd = 1 / np.sqrt((1 / rd_star ** 2) + (1 / variance))

        # Step 6: Update rating
        new_rating = rating + new_rd ** 2 * sum(
            self._g((opponent_rd / 173.7178)) * (score - self._E(rating, (opponent_rating / 173.7178), opponent_rd))
            for opponent_rating, opponent_rd, score in opponents
        )

        # Convert back to original scale
        self.rating = 173.7178 * new_rating + 1500
        self.rd = 173.7178 * new_rd
        self.vol = new_vol

In [14]:
# Example Usage
player = Sailor("player")
oppoent1 = Sailor("opponent1",450, 200)
oppoent2 = Sailor("opponent2",350, 30)
oppoent3 = Sailor("opponent3",425, 100)
opponents = [
    (oppoent1.rating, oppoent1.rd, 1),  # Opponent's rating, RD, and score (1 = win)
    (oppoent2.rating, oppoent2.rd, 0),   # Opponent's rating, RD, and score (0 = loss)
    (oppoent3.rating, oppoent3.rd,0.5) # Opponent's rating, RD, and score (0.5 = draw)
]

print("Before update:")
print(f"Rating: {player.rating}, RD: {player.rd}, Volatility: {player.vol}")

player.update(opponents)

print("\nAfter update:")
print(f"Rating: {player.rating:.2f}, RD: {player.rd:.2f}, Volatility: {player.vol:.6f}")

Before update:
Rating: 400, RD: 350, Volatility: 0.06

After update:
Rating: 406.07, RD: 174.06, Volatility: 0.060000


In [17]:
df_races = pd.read_csv("races.csv",converters={"Teams": lambda x: [y.strip().split("'")[1] for y in x.strip("[]").split(", ")]})

df_races['Ratio'] = 1 - (df_races['Score'] / df_races['Teams'].apply(len))
df_races["Ratio"] = df_races["Ratio"].astype(float)
df_races['numTeams'] = df_races['Teams'].apply(len)
df_races['raceNum'] = df_races['raceID'].apply(lambda id: int(id.split("/")[2][:-1]))

df_races['Date'] = df_races['Date'].apply(lambda date: (int(date.split("-")[0]), int(date.split("-")[1]), int(date.split("-")[2])))
df_races = df_races.sort_values(['Date', 'raceNum']).reset_index(drop=True)
df_races

Unnamed: 0,Score,Div,Sailor,Position,Partner,Venue,Regatta,Teams,raceID,Date,Team,Ratio,numTeams,raceNum
0,1,A,Charlie Herrick,Skipper,Kerstin Hyer,Wisconsin,f24/fall-fury,"[Northern Michigan, Wisconsin, Wisconsin, Nort...",f24/fall-fury/1A,"(2024, 9, 7)",Wisconsin,0.923077,13,1
1,1,A,Kerstin Hyer,Crew,Charlie Herrick,Wisconsin,f24/fall-fury,"[Northern Michigan, Wisconsin, Wisconsin, Nort...",f24/fall-fury/1A,"(2024, 9, 7)",Wisconsin,0.923077,13,1
2,2,B,Nigel Yu,Skipper,Ryan Tatelbaum,Wisconsin,f24/fall-fury,"[Northern Michigan, Wisconsin, Wisconsin, Nort...",f24/fall-fury/1B,"(2024, 9, 7)",Wisconsin,0.846154,13,1
3,2,B,Ryan Tatelbaum,Crew,Nigel Yu,Wisconsin,f24/fall-fury,"[Northern Michigan, Wisconsin, Wisconsin, Nort...",f24/fall-fury/1B,"(2024, 9, 7)",Wisconsin,0.846154,13,1
4,8,A,Mary Castellini,Skipper,Paige Kurtenbach,Wisconsin,f24/fall-fury,"[Northern Michigan, Wisconsin, Wisconsin, Nort...",f24/fall-fury/1A,"(2024, 9, 7)",Wisconsin,0.384615,13,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57200,19,A,Nitsa Thotz,Crew,Conrad Kistler,UC Santa Barbara,f24/fall-pacific-coast,"[Victoria, Stanford, UC Santa Barbara, San Die...",f24/fall-pacific-coast/8A,"(2024, 11, 16)",UC Los Angeles,0.000000,19,8
57201,14,B,Rigel Mummers,Skipper,Unknown,UC Santa Barbara,f24/fall-pacific-coast,"[Victoria, Stanford, UC Santa Barbara, San Die...",f24/fall-pacific-coast/8B,"(2024, 11, 16)",UC Los Angeles,0.263158,19,8
57202,14,B,Kenan Thomas,Crew,Unknown,UC Santa Barbara,f24/fall-pacific-coast,"[Victoria, Stanford, UC Santa Barbara, San Die...",f24/fall-pacific-coast/8B,"(2024, 11, 16)",UC Los Angeles,0.263158,19,8
57203,17,C,Orion Spatafora,Skipper,Jonah Sullivan,UC Santa Barbara,f24/fall-pacific-coast,"[Victoria, Stanford, UC Santa Barbara, San Die...",f24/fall-pacific-coast/8C,"(2024, 11, 16)",UC Los Angeles,0.105263,19,8


In [18]:
people = []
for p in df_races['Sailor'].unique():
  team = df_races.loc[df_races['Sailor'] == p]['Team'].iat[0] if len(df_races.loc[df_races['Sailor'] == p]['Team']) > 0 else "Unknown"
  pos = df_races.loc[df_races['Sailor'] == p]['Position'].iat[0] if len(df_races.loc[df_races['Sailor'] == p]['Position']) > 0 else "Unknown"
  people.append(Sailor(p, team, pos))

In [19]:
len(df_races['Regatta'].unique())

139

In [None]:
for i,race in enumerate(df_races['raceID'].unique()):
  scores = df_races.loc[df_races['raceID'] == race]
  sailors = scores['Sailor']
  rating_changes = {}
  for sailor, actual_score in zip(sailors, df_races['Score']):
    sailorScore = scores.loc[sailors == sailor]['Score'].iat[0]
    sailorElo = [p for p in people if p.name == sailor][0]
    
    # scores.loc[sailors == sailor, 'Elo'] = sailorElo
    partner = scores.loc[sailors == sailor]['Partner'].iat[0]
    
    # Calculate AvgOpp (average rating of all other sailors)
    opponents = [o.rating for o in people if o.name != sailor and o.name != partner]
    avg_opp_rating = sum(opponents) / len(opponents)
    
    # Calculate expected performance using Elo formula
    expected_score = 1 / (1 + 10 ** ((avg_opp_rating - sailorElo.rating) / 100))
    
    # Calculate rating change based on actual vs expected (32 is scaling facor)
    delta_rating = 32 * (actual_score - expected_score)
    rating_changes[sailor] = delta_rating
    
    for other in [p for p in sailors if p != sailor and p != partner]:
      otherElo = [p for p in people if p.name == other][0]
      otherScore = scores.loc[sailors == other]['Score'].iat[0]
      
      delta = sailorScore - otherScore
      delta = 1 if delta < 0 else 0
      
      sailorElo.update([(otherElo.rating, otherElo.rd, delta)])

  for sailor, delta in rating_changes.items():
    [p for p in people if p.name == sailor][0].rating += delta
  
    # scores.loc[scores['Sailor'] == sailor,'Elo'] = sailorElo
    # print(sailorElo)
    
  # Dont update main elo until all new elos are calculated
  # for sailor in scores['Sailor']:
    # df_elo.loc[df_elo[0] == sailor,1] = scores.loc[scores['Sailor'] == sailor, 'Elo']

In [16]:
#Adjust range
ratings = [p.rating for p in people]
lowest = min(ratings)
highest = max(ratings)

scale = 10000 / (highest - lowest)
offset = 0 - (lowest * scale)

for p in people:
  p.rating = p.rating * scale + offset
  

In [17]:
df_elo = pd.DataFrame(columns=['Sailor','Team','Position', 'Elo', 'RD', 'vol'])
for p in people:
  # print(p.name, p.rating)
  df_elo.loc[len(df_elo)] = [p.name, p.team, p.position, p.rating, p.rd, p.vol]
df_elo = df_elo.sort_values('Elo', ascending=False)
df_elo.reset_index(drop=True, inplace=True)
df_elo

Unnamed: 0,Sailor,Team,Position,Elo,RD,vol
0,Vanessa Lahrkamp,Stanford,Skipper,10000.000000,67.514236,0.06
1,Kennedy Leehealey,Harvard,Crew,8403.737861,63.555377,0.06
2,Justin Callahan,Harvard,Skipper,8396.881974,63.545810,0.06
3,Sarah Moeder,Yale,Crew,8293.636980,64.388330,0.06
4,Alice Schmid,Stanford,Skipper,8110.692400,62.887740,0.06
...,...,...,...,...,...,...
4059,Nicholas Conti,UC Davis,Skipper,312.346986,60.219966,0.06
4060,Vaughn Russell,Channel Islands,Crew,300.359683,60.007063,0.06
4061,Pike Williams,Cal Poly,Skipper,240.498386,59.970362,0.06
4062,Nitsa Thotz,UC Los Angeles,Crew,177.383331,61.061988,0.06


In [18]:
df_elo.to_csv("elo4.csv", index=False)

In [15]:
# Sample dataframe
data = {
    'date_tuple': [(2024, 5, 17), (2024, 5, 1), (2024, 5, 30)],
    'value': [10, 20, 30]
}
df = pd.DataFrame(data)

# Sort by the date_tuple column
df_sorted = df.sort_values(by='date_tuple')

print(df_sorted)

      date_tuple  value
1   (2024, 5, 1)     20
0  (2024, 5, 17)     10
2  (2024, 5, 30)     30


To do:
- scrape more seasons
- account for date and apply elo in order
- regatta difficulty value