In [112]:
from openskill.models import PlackettLuce
import pandas as pd
import numpy as np
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

cred = credentials.Certificate("thecrowsnestapp-creds.json")
firebase_admin.initialize_app(cred)

db = firestore.client()
model = PlackettLuce()

In [99]:
class Sailor:
    def __init__(self, name, teams, pos, seasons=[], rank=0, rating=1500):
        self.name = name
        self.teams = teams
        self.pos = pos
        self.rank = rank
        self.seasons = seasons
        self.races = []
        self.changes = []
        self.r = model.rating(rating,rating // 3,name)
        
    def rerate(self, rating):
        self.r = model.rating(rating.mu, rating.sigma, self.name)
        
    def __repr__(self):
        return f"{self.name}: {self.teams}, {self.pos} {str(self.r)}"

In [93]:
p1 = Sailor("p1", ['nu'], 'Skipper')
p2 = Sailor("p2", ['nu'], 'Skipper')
p3 = Sailor("p3", ['nu'], 'Skipper')
p4 = Sailor("p4", ['nu'], 'Skipper')
players = [p1,p2,p3,p4]
ratings = [[p.r] for p in players]
ratings = model.rate(ratings, [3,2,1,4], weights=[[3.0]] * 4)
for p,n in zip(players, ratings):
    p.r = n[0]
print(p1,p2,p3,p4)

p1: Plackett-Luce Player Data: 

id: ad16bdc0c0104be0921137ba9ff870f2
name: p1
mu: 1489.5836948596302
sigma: 489.5844366494107
 p2: Plackett-Luce Player Data: 

id: 52c34cc14e5649d98d05ec0fb4a48f5a
name: p2
mu: 1708.3261028073944
sigma: 473.70353025398146
 p3: Plackett-Luce Player Data: 

id: 031367610f89420bbcc83782462d1683
name: p3
mu: 1874.98698505331
sigma: 488.14186177390417
 p4: Plackett-Luce Player Data: 

id: 376a0d80cc2047c78423cab70c0a5555
name: p4
mu: 1364.5880331751937
sigma: 489.5844366494107



In [None]:
#create people
df_races = pd.read_csv("races.csv",converters={"Teams": lambda x: [y.strip().split("'")[1] for y in x.strip("[]").split(", ")]})

df_races['Ratio'] = 1 - (df_races['Score'] / df_races['Teams'].apply(len))
df_races['Ratio'] = df_races['Ratio'].astype(float)
df_races['numTeams'] = df_races['Teams'].apply(len)
df_races['raceNum'] = df_races['raceID'].apply(lambda id: int(id.split("/")[2][:-1]))

df_races['Date'] = df_races['Date'].apply(lambda date: (int(date.split("-")[0]), int(date.split("-")[1]), int(date.split("-")[2])))
df_races_full = df_races.sort_values(['Date', 'raceNum']).reset_index(drop=True)

# df_races = df_races.loc[df_races['raceID'].str.contains('f24')] # filter for f24
df_races_skipper = df_races_full.loc[df_races_full['Position'].str.contains('Skipper')] # filter for skippers
df_races_crew = df_races_full.loc[df_races_full['Position'].str.contains('Crew')] # filter for skippers
people = {}

# Pre-group the data for skippers and crews
skipper_groups = df_races_skipper.groupby('Sailor')['Team'].unique()
crew_groups = df_races_crew.groupby('Sailor')['Team'].unique()

# Precompute seasons for skippers and crew
skipper_seasons = (
    df_races_skipper.assign(Season=df_races_skipper['raceID'].str.split('/').str[0])
    .groupby('Sailor')['Season']
    .unique()
)

crew_seasons = (
    df_races_crew.assign(Season=df_races_crew['raceID'].str.split('/').str[0])
    .groupby('Sailor')['Season']
    .unique()
)

# Function to add a sailor to the dictionary
def add_sailor(group, seasons_group, role):
    for sailor, teams in group.items():
        # If no teams are associated, set "Unknown"
        teams = teams if len(teams) > 0 else ["Unknown"]

        # Retrieve the precomputed seasons
        seasons = seasons_group.get(sailor, [])
        
        # Add the sailor to the people dictionary
        people[f"{sailor}/{role}"] = Sailor(sailor, teams, role, list(seasons))

# Add skippers and crew
add_sailor(skipper_groups, skipper_seasons, 'Skipper')
add_sailor(crew_groups, crew_seasons, 'Crew')

In [None]:
stype = 'Ratio'
i = 0

for type, df_races in zip(['/Skipper', '/Crew'], [df_races_skipper, df_races_crew]):
    grouped = df_races.groupby(['Date', 'Regatta', 'raceID'])

    for (date, regatta, race), scores in grouped:
        if i % 100 == 0:
            print(f"Currently analyzing race {i}/{len(df_races['raceID'].unique()) * 2} Regatta:{regatta}, Date:{date}")
        i += 1

        sailors = scores['Sailor']
        if sailors.empty:
            continue

        # Recalculate global average
        globalAvg = sum([p.r.mu for p in people.values()]) / len(people)

        # Compute regatta average
        regattaAvg = sum([people[p + type].r.mu for p in sailors]) / len(sailors)
        multiplier = regattaAvg / globalAvg

        # Initialize racers and ratings
        racers = [people[p + type] for p in sailors]
        startingElos = [r.r.mu for r in racers]
        ratings = [[r.r] for r in racers]

        # Skip races with fewer than 2 participants
        if len(ratings) < 2:
            # print(regatta)
            continue

        # Rate using the model
        ratings = model.rate(ratings, list(scores['Score']), weights=[[multiplier]] * len(ratings))

        # Update racers' ratings
        for racer, new_rating in zip(racers, ratings):
            racer.r = new_rating[0]

        # Calculate changes
        changes = [racers[i].r.mu - startingElos[i] for i in range(len(racers))]

        # Update sailors' race data
        for idx, sailor in enumerate(sailors):
            sailor_obj = people[sailor + type]
            sailor_obj.races.append(race)
            sailor_obj.changes.append((
                scores.loc[scores['Sailor'] == sailor, 'Score'].iat[0],
                changes[idx],
                regattaAvg,
                sailor_obj.r.mu,
                date,
                scores.loc[scores['Sailor'] == sailor, 'Partner'].iat[0],
                scores.loc[scores['Sailor'] == sailor, 'Ratio'].iat[0], 
                scores.loc[(scores['Sailor'] == sailor) & (scores['raceID'] == race), 'Venue'].iat[0]
            ))


Currently analyzing race 0/25416 Regatta:s22/andrew-johnson, Date:(2022, 1, 15)
Currently analyzing race 100/25416 Regatta:s22/bryson-women-fleet-race, Date:(2022, 2, 20)
Currently analyzing race 200/25416 Regatta:s22/mustang-women, Date:(2022, 3, 5)
Currently analyzing race 300/25416 Regatta:s22/saisa-south-points-2022, Date:(2022, 3, 13)
Currently analyzing race 400/25416 Regatta:s22/connecticut-valley-dinghy, Date:(2022, 3, 26)
Currently analyzing race 500/25416 Regatta:s22/central-series-march-27, Date:(2022, 3, 27)
Currently analyzing race 600/25416 Regatta:s22/north-designate-st-francis, Date:(2022, 4, 2)
Currently analyzing race 700/25416 Regatta:s22/emily-wick, Date:(2022, 4, 9)
Currently analyzing race 800/25416 Regatta:s22/pccsc-open-dinghy-conference, Date:(2022, 4, 9)
Currently analyzing race 900/25416 Regatta:s22/grant, Date:(2022, 4, 16)
Currently analyzing race 1000/25416 Regatta:s22/navy-spring, Date:(2022, 4, 16)
Currently analyzing race 1100/25416 Regatta:s22/spring-o

In [179]:
df_elo = pd.DataFrame(columns=['Sailor', 'Teams', 'Pos', 'Elo'])

new_rows = []
for p in people.values():
    df_elo.loc[len(df_elo)] = [p.name, p.teams, p.pos, p.r.mu]
    row = {"Sailor": p.name}
    row.update(zip(p.races, [(int(c[0]), float(c[1]), float(c[2]), float(c[3])) for c in p.changes]))
    new_rows.append(row)

all_keys = ['Sailor'] + list(df_races['raceID'].unique())
new_df = pd.DataFrame(new_rows, columns=all_keys)

df_elo = pd.concat([df_elo.set_index('Sailor'), new_df.set_index('Sailor')], axis=1).reset_index()

# Filter sailors who have 'f24' in their seasons list
eligible_sailors = [p.name for p in people.values() if 'f24' in p.seasons]

# Filter the df_elo DataFrame to only include eligible sailors
df_elo_filtered = df_elo[df_elo['Sailor'].isin(eligible_sailors)]

# Sort the filtered df_elo by Elo and reset index
df_elo_filtered = df_elo_filtered.sort_values('Elo', ascending=False)
df_elo_filtered.reset_index(drop=True, inplace=True)

# Rank sailors within each position (Skipper/Crew) based on Elo
df_elo_filtered['Rank'] = df_elo_filtered.groupby('Pos')['Elo'].rank(method='dense', ascending=False).astype(int)

# Update rank attribute for each sailor (only for those in the filtered list)
for p in people.values():
    if p.name in eligible_sailors:
        p.rank = df_elo_filtered.loc[(df_elo_filtered['Sailor'] == p.name) & (df_elo_filtered['Pos'] == p.pos), 'Rank'].values[0]

In [None]:
col = db.collection('sailorsElo')
batch = db.batch()

for i,p in enumerate(people.values()):
    batch.set(col.document(),
        {"Name":p.name, 
         "Position": p.pos,
         "Teams": list(p.teams),
         "Rating": int(p.r.mu),
         "GlobalRank": int(p.rank),
         "races": [{"raceID": raceid, 
                    "score": float(c[0]), 
                    "change": float(c[1]), 
                    'regAvg': float(c[2]), 
                    'newRating': float(c[3]),
                    'date': c[4],
                    'partner':c[5],
                    'ratio': float(c[6]), 
                    'venue': c[7]
                    } for raceid, c in zip(p.races, p.changes)]})
    if i % 20 == 0: # commit every 20 documents
        batch.commit()
batch.commit()

KeyboardInterrupt: 

In [148]:
import requests
from bs4 import BeautifulSoup

batch = db.batch()
col = db.collection('eloTeams')
lenteams = len(list(df_races['Team'].unique()))
teams = []
bySailors = df_races_full.groupby('Sailors')

for i,team in enumerate(list(df_races['Team'].unique())):
    print(f"{i}/{lenteams} {team}")
    avg = df_elo.loc[df_elo['Teams'].apply(lambda x: team in x), 'Elo'].mean()
    region = df_races
    
    teamLink = df_races.loc[df_races['Team'] == team, 'Teamlink'].iloc[0]
    url = f"https://scores.collegesailing.org/schools/{teamLink.split("/")[2]}"
    # print(url)
    page = requests.get(url)
    teamPage = BeautifulSoup(page.content, 'html.parser')
    
    try:
        region = teamPage.find('span', class_="page-info-value").contents[0].contents[0]
        # print(team, avg, region)
    except:
        print(url)
        
    members = [{"name": p.name, 
                'pos':p.pos,
                'rating': int(p.r.mu),
                'seasons':list(df_races_full.loc[df_races_full['Sailor'] == p.name]['raceID'].str.split('/').str[0].unique()), 
                'globalrank': int(p.rank)} for p in people.values() if team in p.teams]
    
    teams.append({"name":team, "avg": avg,"region": region, "link": url})
    batch.set(col.document(),{"name":team, "avg": avg,"region": region, "link": url, 'members': members})
    if i % 20 == 0: # commit every 20 documents
            batch.commit()
batch.commit()
doc = db.collection('vars').document('eloTeams').set({"teams": teams})

0/177 Stanford
1/177 UC Santa Barbara
2/177 San Diego State
3/177 Southern Cal
4/177 Hawaii
5/177 UC Santa Cruz
6/177 Western Washington
7/177 Washington
8/177 Cal Poly
9/177 Cal Maritime
10/177 Berkeley
11/177 Arizona State
12/177 UC San Diego
13/177 UC Davis
14/177 UC Irvine
15/177 Channel Islands
16/177 UC Los Angeles
17/177 Santa Clara
18/177 Monterey Bay
19/177 North Carolina State
20/177 Clemson
21/177 Citadel
22/177 Duke
23/177 North Carolina
24/177 U South Carolina
25/177 Oregon
26/177 Victoria
27/177 Northwestern
28/177 Michigan
29/177 Notre Dame
30/177 Michigan Tech
31/177 Marquette
32/177 Indiana
33/177 Wisconsin
34/177 Saint Thomas
35/177 Purdue
36/177 Hope
37/177 Washington U
38/177 Ohio State
39/177 Grand Valley State
40/177 Oregon State
41/177 Charleston
42/177 South Florida
43/177 Jacksonville
44/177 Rollins
45/177 Florida State
46/177 Harvard
47/177 Brown
48/177 Yale
49/177 Roger Williams
50/177 Dartmouth
51/177 Boston College
52/177 MIT
53/177 Coast Guard
54/177 Tufts

In [180]:
topSkippers = []
for p in sorted([p for p in people.values() if p.rank <= 100 and p.pos == 'Skipper'],key=lambda p: p.rank):
    topSkippers.append({'name': p.name, 'rank': int(p.rank), 'pos': p.pos, 'team': list(p.teams), 'rating': p.r.mu, 'seasons': list(p.seasons)})
topCrews = []
for p in sorted([p for p in people.values() if p.rank <= 100 and p.pos == 'Crew'],key=lambda p: p.rank):
    topCrews.append({'name': p.name, 'rank': int(p.rank), 'pos': p.pos, 'team': list(p.teams), 'rating': p.r.mu, 'seasons': list(p.seasons)})
doc = db.collection('vars').document('topSailors').set({"skippers": topSkippers, "crews": topCrews})

In [None]:
stype = 'Ratio'
i = 0
for type, df_races in zip(['/Skipper', '/Crew'],[df_races_skipper, df_races_crew]):
    for date in list(df_races['Date'].unique()):
        for regatta in list(df_races.loc[df_races['Date'] == date,'Regatta'].unique()):
            for race in df_races.loc[(df_races['Regatta'] == regatta) ,'raceID'].unique():
                if i % 100 == 0:
                    print(f"Currently analyzing race {i}/{len(df_races['raceID'].unique()) * 2} Regatta:{regatta}, Date:{date}")
                i += 1
                
                scores = df_races.loc[df_races['raceID'] == race]

                sailors = scores['Sailor']
                if len(sailors) == 0:
                    continue
                
                globalAvg = sum([p.r.mu for p in people.values()]) / len(people.keys())
                print(globalAvg)
                regattaAvg = sum([people[p + type].r.mu for p in sailors]) / len(sailors)
                multiplier = regattaAvg / globalAvg
                
                racers = [people[p + type] for p in sailors]
                startingElos = [people[p + type].r.mu for p in sailors]
                ratings = [[p.r] for p in racers]
                if(len(ratings) < 2):
                    print(regatta)
                    continue
                ratings = model.rate(ratings, list(scores['Score']), weights=[[multiplier]] * len(ratings))
                for p,n in zip(racers, ratings):
                    p.r = n[0]
                    
                changes = [racers[i].r.mu - startingElos[i] for i in range(len(racers))]
                
                for i,sailor in enumerate(sailors):
                    sailor_obj = people[sailor + type]
                    sailor_obj.races.append(race)
                    sailor_obj.changes.append((scores.loc[scores['Sailor'] == sailor]['Score'].iat[0],changes[i], regattaAvg, sailor_obj.r.mu, date, scores.loc[scores['Sailor'] == sailor, 'Partner'].iat[0], scores.loc[scores['Sailor'] == sailor]['Ratio'].iat[0]))