In [1]:
from openskill.models import PlackettLuce
import pandas as pd
import numpy as np
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

cred = credentials.Certificate("thecrowsnestapp-creds.json")
firebase_admin.initialize_app(cred)

db = firestore.client()
model = PlackettLuce()

In [2]:
class Sailor:
    def __init__(self, name, teams, pos, seasons=[], rank=0, rating=1500):
        self.name = name
        self.teams = teams
        self.pos = pos
        self.rank = rank
        self.seasons = seasons
        self.races = []
        self.changes = []
        self.r = model.rating(rating,rating // 3,name)
        
    def rerate(self, rating):
        self.r = model.rating(rating.mu, rating.sigma, self.name)
        
    def __repr__(self):
        return f"{self.name}: {self.teams}, {self.pos} {str(self.r)}"

In [93]:
p1 = Sailor("p1", ['nu'], 'Skipper')
p2 = Sailor("p2", ['nu'], 'Skipper')
p3 = Sailor("p3", ['nu'], 'Skipper')
p4 = Sailor("p4", ['nu'], 'Skipper')
players = [p1,p2,p3,p4]
ratings = [[p.r] for p in players]
ratings = model.rate(ratings, [3,2,1,4], weights=[[3.0]] * 4)
for p,n in zip(players, ratings):
    p.r = n[0]
print(p1,p2,p3,p4)

p1: Plackett-Luce Player Data: 

id: ad16bdc0c0104be0921137ba9ff870f2
name: p1
mu: 1489.5836948596302
sigma: 489.5844366494107
 p2: Plackett-Luce Player Data: 

id: 52c34cc14e5649d98d05ec0fb4a48f5a
name: p2
mu: 1708.3261028073944
sigma: 473.70353025398146
 p3: Plackett-Luce Player Data: 

id: 031367610f89420bbcc83782462d1683
name: p3
mu: 1874.98698505331
sigma: 488.14186177390417
 p4: Plackett-Luce Player Data: 

id: 376a0d80cc2047c78423cab70c0a5555
name: p4
mu: 1364.5880331751937
sigma: 489.5844366494107



In [4]:
#create people
df_races = pd.read_csv("racestest.csv",converters={"Teams": lambda x: [y.strip().split("'")[1] for y in x.strip("[]").split(", ")]})

df_races['Ratio'] = 1 - (df_races['Score'] / df_races['Teams'].apply(len))
df_races['Ratio'] = df_races['Ratio'].astype(float)
df_races['numTeams'] = df_races['Teams'].apply(len)
df_races['raceNum'] = df_races['raceID'].apply(lambda id: int(id.split("/")[2][:-1]))

df_races['Date'] = df_races['Date'].apply(lambda date: (int(date.split("-")[0]), int(date.split("-")[1]), int(date.split("-")[2])))
df_races_full = df_races.sort_values(['Date', 'raceNum']).reset_index(drop=True)

# df_races = df_races.loc[df_races['raceID'].str.contains('f24')] # filter for f24
df_races_skipper = df_races_full.loc[df_races_full['Position'].str.contains('Skipper')] # filter for skippers
df_races_crew = df_races_full.loc[df_races_full['Position'].str.contains('Crew')] # filter for skippers
people = {}

# Pre-group the data for skippers and crews
skipper_groups = df_races_skipper.groupby('Sailor')['Team'].unique()
crew_groups = df_races_crew.groupby('Sailor')['Team'].unique()

# Precompute seasons for skippers and crew
skipper_seasons = (
    df_races_skipper.assign(Season=df_races_skipper['raceID'].str.split('/').str[0])
    .groupby('Sailor')['Season']
    .unique()
)

crew_seasons = (
    df_races_crew.assign(Season=df_races_crew['raceID'].str.split('/').str[0])
    .groupby('Sailor')['Season']
    .unique()
)

# Function to add a sailor to the dictionary
def add_sailor(group, seasons_group, role):
    for sailor, teams in group.items():
        # If no teams are associated, set "Unknown"
        teams = teams if len(teams) > 0 else ["Unknown"]

        # Retrieve the precomputed seasons
        seasons = seasons_group.get(sailor, [])
        
        # Add the sailor to the people dictionary
        people[f"{sailor}/{role}"] = Sailor(sailor, teams, role, list(seasons))

# Add skippers and crew
add_sailor(skipper_groups, skipper_seasons, 'Skipper')
add_sailor(crew_groups, crew_seasons, 'Crew')

In [None]:
stype = 'Ratio'
i = 0

for type, df_races in zip(['/Skipper', '/Crew'], [df_races_skipper, df_races_crew]):
    grouped = df_races.groupby(['Date', 'Regatta', 'raceID'])

    for (date, regatta, race), scores in grouped:
        if i % 1000 == 0:
            print(f"Currently analyzing race {i}/{len(df_races['raceID'].unique()) * 2} Regatta:{regatta}, Date:{date}")
        i += 1

        sailors = scores['Sailor']
        if sailors.empty:
            continue

        # Recalculate global average
        globalAvg = sum([p.r.mu for p in people.values()]) / len(people)

        # Compute regatta average
        regattaAvg = sum([people[p + type].r.mu for p in sailors]) / len(sailors)
        multiplier = regattaAvg / globalAvg

        # Initialize racers and ratings
        racers = [people[p + type] for p in sailors]
        startingElos = [r.r.mu for r in racers]
        ratings = [[r.r] for r in racers]

        # Skip races with fewer than 2 participants
        if len(ratings) < 2:
            # print(regatta)
            continue

        # Rate using the model
        ratings = model.rate(ratings, list(scores['Score']), weights=[[multiplier]] * len(ratings))

        # Update racers' ratings
        for racer, new_rating in zip(racers, ratings):
            racer.r = new_rating[0]

        # Calculate changes
        changes = [racers[i].r.mu - startingElos[i] for i in range(len(racers))]

        # Update sailors' race data
        for idx, sailor in enumerate(sailors):
            sailor_obj = people[sailor + type]
            sailor_obj.races.append(race)
            sailor_obj.changes.append({
                'score': scores.loc[scores['Sailor'] == sailor, 'Score'].iat[0],
                'change':changes[idx],
                'regAvg':regattaAvg,
                'newRating':sailor_obj.r.mu,
                'date':date,
                'partner':scores.loc[scores['Sailor'] == sailor, 'Partner'].iat[0],
                'ratio':scores.loc[scores['Sailor'] == sailor, 'Ratio'].iat[0], 
                'venue':scores.loc[(scores['Sailor'] == sailor) & (scores['raceID'] == race), 'Venue'].iat[0],
                'raceID': race
            })

Currently analyzing race 0/32614 Regatta:s20/peter-wenner-rainbow-invite, Date:(2020, 1, 18)
Currently analyzing race 100/32614 Regatta:s20/usf-women, Date:(2020, 2, 15)
Currently analyzing race 200/32614 Regatta:s20/mustang-open, Date:(2020, 2, 29)
Currently analyzing race 300/32614 Regatta:s20/sp-2, Date:(2020, 2, 29)
Currently analyzing race 400/32614 Regatta:s20/navy-spring-womens, Date:(2020, 3, 7)
Currently analyzing race 500/32614 Regatta:s21/saisa-north-points-uncw, Date:(2021, 3, 6)
Currently analyzing race 600/32614 Regatta:s21/seahawk-wave-showdown, Date:(2021, 3, 26)
Currently analyzing race 700/32614 Regatta:s21/camel-fleet-race-invite, Date:(2021, 4, 9)
Currently analyzing race 800/32614 Regatta:s21/maisa-south-women-championships, Date:(2021, 4, 17)
Currently analyzing race 900/32614 Regatta:s21/pccsc-coed, Date:(2021, 4, 24)
Currently analyzing race 1000/32614 Regatta:s21/sailing-women-nationals, Date:(2021, 5, 19)
Currently analyzing race 1100/32614 Regatta:f21/fall-fu

In [None]:
df_elo = pd.DataFrame(columns=['Sailor', 'Teams', 'Pos', 'Elo'])

new_rows = []
for p in people.values():
    df_elo.loc[len(df_elo)] = [p.name, p.teams, p.pos, p.r.mu]
    row = {"Sailor": p.name}
    row.update(zip(p.races, [(int(c['score']), float(c['change']), float(c['regAvg']), float(c['newRating'])) for c in p.changes]))
    new_rows.append(row)

all_keys = ['Sailor'] + list(df_races['raceID'].unique())
new_df = pd.DataFrame(new_rows, columns=all_keys)

df_elo = pd.concat([df_elo.set_index('Sailor'), new_df.set_index('Sailor')], axis=1).reset_index()

# Filter sailors who have 'f24' in their seasons list
eligible_sailors = [p.name for p in people.values() if 'f24' in p.seasons]

# Filter the df_elo DataFrame to only include eligible sailors
df_elo_filtered = df_elo[df_elo['Sailor'].isin(eligible_sailors)]

# Sort the filtered df_elo by Elo and reset index
df_elo_filtered = df_elo_filtered.sort_values('Elo', ascending=False)
df_elo_filtered.reset_index(drop=True, inplace=True)

# Rank sailors within each position (Skipper/Crew) based on Elo
df_elo_filtered['Rank'] = df_elo_filtered.groupby('Pos')['Elo'].rank(method='dense', ascending=False).astype(int)

# Update rank attribute for each sailor (only for those in the filtered list)
for p in people.values():
    if p.name in eligible_sailors:
        p.rank = df_elo_filtered.loc[(df_elo_filtered['Sailor'] == p.name) & (df_elo_filtered['Pos'] == p.pos), 'Rank'].values[0]


In [None]:
col = db.collection('sailorsElo')
batch = db.batch()

for i,p in enumerate(people.values()):
    batch.set(col.document(),
        {"Name":p.name, 
         "Position": p.pos,
         "Teams": list(p.teams),
         "Rating": int(p.r.mu),
         "GlobalRank": int(p.rank),
         "races": [{"raceID": raceid, 
                    "score": float(c['score']), 
                    "change": float(c['change']), 
                    'regAvg': float(c['regAvg']), 
                    'newRating': float(c['newRating']),
                    'date': c['date'],
                    'partner':c['partner'],
                    'ratio': float(c['ratio']), 
                    'venue': c['venue']
                    } for raceid, c in zip(p.races, p.changes)]})
    if i % 20 == 0: # commit every 20 documents
        batch.commit()
batch.commit()

[update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 },
 update_time {
   seconds: 1733634325
   nanos: 651949000
 }]

In [10]:
import requests
from bs4 import BeautifulSoup

batch = db.batch()
col = db.collection('eloTeams')
lenteams = len(list(df_races['Team'].unique()))
teams = []
bySailors = df_races_full.groupby('Sailor')

for i,team in enumerate(list(df_races['Team'].unique())):
    print(f"{i}/{lenteams} {team}")
    avg = df_elo.loc[df_elo['Teams'].apply(lambda x: team in x), 'Elo'].mean()
    region = df_races
    
    teamLink = df_races.loc[df_races['Team'] == team, 'Teamlink'].iloc[0]
    url = f"https://scores.collegesailing.org/schools/{teamLink.split("/")[2]}"
    # print(url)
    page = requests.get(url)
    teamPage = BeautifulSoup(page.content, 'html.parser')
    
    try:
        region = teamPage.find('span', class_="page-info-value").contents[0].contents[0]
        # print(team, avg, region)
    except:
        print(url)
        
    members = [{"name": p.name, 
                'pos':p.pos,
                'rating': int(p.r.mu),
                'seasons':list(df_races_full.loc[df_races_full['Sailor'] == p.name]['raceID'].str.split('/').str[0].unique()), 
                'globalrank': int(p.rank)} for p in people.values() if team in p.teams]
    
    teams.append({"name":team, "avg": avg,"region": region, "link": url})
    batch.set(col.document(),{"name":team, "avg": avg,"region": region, "link": url, 'members': members})
    if i % 20 == 0: # commit every 20 documents
            batch.commit()
batch.commit()
doc = db.collection('vars').document('eloTeams').set({"teams": teams})

0/184 Hawaii
1/184 UC Santa Barbara
2/184 UC Los Angeles
3/184 Washington
4/184 Cal Maritime
5/184 Oregon
6/184 UC Davis
7/184 Berkeley
8/184 Cal Poly
9/184 UC San Diego
10/184 Western Washington
11/184 CSU Long Beach
12/184 Channel Islands
13/184 Southern Cal
14/184 UC Santa Cruz
15/184 San Diego State
16/184 UC Irvine
17/184 Arizona State
18/184 Embry-Riddle
19/184 Rollins
20/184 Florida Tech
21/184 Texas A&M Galveston
22/184 Texas A&M
23/184 Texas
24/184 North Texas
25/184 Central Oklahoma
26/184 Lewis & Clark
27/184 Texas A&M C. Christ
28/184 Florida
29/184 Palm Beach Atlantic
30/184 New College
31/184 Georgia Tech
32/184 Georgia
33/184 North Carolina State
34/184 South Florida
35/184 Jacksonville
36/184 U. Miami
37/184 Salve Regina
38/184 Eckerd
39/184 Northwestern
40/184 Charleston
41/184 Clemson
42/184 U South Carolina
43/184 UNC Wilmington
44/184 North Carolina
45/184 Duke
46/184 Auburn
47/184 Georgetown
48/184 Navy
49/184 Old Dominion
50/184 Michigan
51/184 Maine Maritime
52/1

In [11]:
topSkippers = []
for p in sorted([p for p in people.values() if p.rank <= 100 and p.pos == 'Skipper'],key=lambda p: p.rank):
    topSkippers.append({'name': p.name, 'rank': int(p.rank), 'pos': p.pos, 'team': list(p.teams), 'rating': p.r.mu, 'seasons': list(p.seasons)})
topCrews = []
for p in sorted([p for p in people.values() if p.rank <= 100 and p.pos == 'Crew'],key=lambda p: p.rank):
    topCrews.append({'name': p.name, 'rank': int(p.rank), 'pos': p.pos, 'team': list(p.teams), 'rating': p.r.mu, 'seasons': list(p.seasons)})
doc = db.collection('vars').document('topSailors').set({"skippers": topSkippers, "crews": topCrews})

In [None]:
for regatta in df_races_full['Regatta'].unique():
    races = df_races_full[df_races_full['Regatta'] == regatta]
    sailors = races['Sailor'].unique()
    
    racePpl = [p for p in people.values() if p.name in sailors]
    
    # race = {'raceID':'', 'raceNum':0, 'div': '', 'sailors':[]}
    # person = {'name':'', 'rating':0, 'change':0, 'team': '', 'pos': '', 'div':'', 'partner': ''}
    
    # for race in races['raceID'].unique():
    #     sailors = races[races['raceID'] == race, 'Sailor'].unique()
    # for p in [p for p in people.values() if p.name in sailors]:
        # racePpl.append({'name':p.name, 'rating':p.rating, 'changes':p.changes, 'team': p.team, 'pos': '', 'div':'', 'partner': ''})
        
    db.collection('eloRegattas').document().set({'regattaName': regatta, 'sailors': racePpl})

In [7]:
df_elo.to_csv("elo19.csv",index=False)