In [2]:
from openskill.models import PlackettLuce
import pandas as pd
import numpy as np
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

cred = credentials.Certificate("thecrowsnestapp-creds.json")
firebase_admin.initialize_app(cred)

db = firestore.client()
model = PlackettLuce()

In [None]:
class Sailor:
    def __init__(self, name, year, link, teams, pos, seasons=[], rank=0, rating=1500, races=[]):
        self.name = name
        self.year = year
        self.link = link
        self.teams = teams
        self.pos = pos
        self.rank = rank
        self.seasons = seasons
        self.races = []
        self.r = model.rating(rating,rating // 3,name)
        self.avgRatio = 0
        
    def rerate(self, rating):
        self.r = model.rating(rating.mu, rating.sigma, self.name)
        
    def __repr__(self):
        return f"{self.name}: {self.teams}, {self.pos} {str(self.r)}"

In [None]:
p1 = Sailor("p1", ['nu'], 'Skipper')
p2 = Sailor("p2", ['nu'], 'Skipper')
p3 = Sailor("p3", ['nu'], 'Skipper')
p4 = Sailor("p4", ['nu'], 'Skipper')
players = [p1,p2,p3,p4]
ratings = [[p.r] for p in players]
ratings = model.rate(ratings, [3,2,1,4], weights=[[3.0]] * 4)
for p,n in zip(players, ratings):
    p.r = n[0]
print(p1,p2,p3,p4)

In [87]:
try:
    df_s = pd.read_json("sailorssdfasd.json")
except:
    df_s = pd.DataFrame(columns=['Sailor'])
    
people = {}

for sailor in list(df_s['Sailor'].unique()):
    # print(sailor)
    positions = df_s.loc[df_s['Sailor'] == sailor, 'Pos']
    for pos in positions:
        teams = df_s.loc[(df_s['Sailor'] == sailor)& (df_s['Pos'] == pos), 'Teams'].iat[0]
        seasons = df_s.loc[(df_s['Sailor'] == sailor) & (df_s['Pos'] == pos), 'Seasons'].iat[0]
        year = df_s.loc[(df_s['Sailor'] == sailor) & (df_s['Pos'] == pos), 'GradYear'].iat[0]
        link = df_s.loc[(df_s['Sailor'] == sailor) & (df_s['Pos'] == pos), 'Link'].iat[0]
        rating = df_s.loc[(df_s['Sailor'] == sailor) & (df_s['Pos'] == pos), 'Elo'].iat[0]
        rank = df_s.loc[(df_s['Sailor'] == sailor) & (df_s['Pos'] == pos), 'Rank'].iat[0]
        races = df_s.loc[(df_s['Sailor'] == sailor) & (df_s['Pos'] == pos), 'Races'].iat[0]
        people[sailor + pos] = Sailor(sailor, year, link, teams, pos, seasons,rank,rating, races)

In [None]:
#create people
# converters={"Teams": lambda x: [y.strip().split("'")[1] for y in x.strip("[]").split(", ")]}
df_races = pd.read_json("races_new.json")

# df_races['Ratio'] = 1 - (df_races['Score'] / df_races['Teams'].apply(len))
# df_races['Ratio'] = df_races['Ratio'].astype(float)
df_races['numTeams'] = df_races['Teams'].apply(len)
df_races['raceNum'] = df_races['raceID'].apply(lambda id: int(id.split("/")[2][:-1]))

df_races_full = df_races.sort_values(['Date', 'raceNum']).reset_index(drop=True)

df_races_skipper = df_races_full.loc[df_races_full['Position'].str.contains('Skipper')] # filter for skippers
df_races_crew = df_races_full.loc[df_races_full['Position'].str.contains('Crew')] # filter for skippers
# people = {}

# Pre-group the data for skippers and crews
skipper_groups = df_races_skipper.groupby('Sailor')['Team'].unique()
crew_groups = df_races_crew.groupby('Sailor')['Team'].unique()

# Precompute seasons for skippers and crew
skipper_seasons = (
    df_races_skipper.assign(Season=df_races_skipper['raceID'].str.split('/').str[0])
    .groupby('Sailor')['Season']
    .unique()
)

crew_seasons = (
    df_races_crew.assign(Season=df_races_crew['raceID'].str.split('/').str[0])
    .groupby('Sailor')['Season']
    .unique()
)

skipper_years = (
    df_races_skipper.assign(Season=df_races_skipper['raceID'].str.split('/').str[0])
    .groupby('Sailor')['GradYear']
    .unique()
)

crew_years = (
    df_races_crew.assign(Season=df_races_crew['raceID'].str.split('/').str[0])
    .groupby('Sailor')['GradYear']
    .unique()
)

skipper_links = (
    df_races_skipper.assign(Season=df_races_skipper['raceID'].str.split('/').str[0])
    .groupby('Sailor')['Link']
    .unique()
)

crew_links = (
    df_races_crew.assign(Season=df_races_crew['raceID'].str.split('/').str[0])
    .groupby('Sailor')['Link']
    .unique()
)

# Function to add a sailor to the dictionary
def add_sailor(group, seasons_group, years_group, links_group, role):
    for sailor, teams in group.items():
        if f"{sailor}/{role}" not in people.keys():
            # If no teams are associated, set "Unknown"
            teams = teams if len(teams) > 0 else ["Unknown"]

            # Retrieve the precomputed seasons
            seasons = seasons_group.get(sailor, [])
            year = years_group.get(sailor, [])
            link = links_group.get(sailor, [])
            
            # Add the sailor to the people dictionary
            people[f"{sailor}/{role}"] = Sailor(sailor, year, link, teams, role, list(seasons))

# Add skippers and crew
add_sailor(skipper_groups, skipper_seasons, skipper_years, skipper_links, 'Skipper')
add_sailor(crew_groups, crew_seasons, crew_years, crew_links, 'Crew')

In [None]:
i = 0
residuals = []

for type, df_races in zip(['/Skipper', '/Crew'], [df_races_skipper, df_races_crew]):
    grouped = df_races.groupby(['Date', 'Regatta', 'raceID'])

    for (date, regatta, race), scores in grouped:
        if i % 1000 == 0:
            print(f"Currently analyzing race {i}/{len(df_races['raceID'].unique()) * 2} Regatta:{regatta}, Date:{date}")
        i += 1

        sailors = scores['Sailor']
        if sailors.empty:
            continue

        # Recalculate global average
        globalAvg = sum([p.r.mu for p in people.values()]) / len(people)

        # Compute regatta average
        regattaAvg = sum([people[p + type].r.mu for p in sailors]) / len(sailors)
        multiplier = regattaAvg / globalAvg

        # Initialize racers and ratings
        racers = [people[p + type] for p in sailors]
        startingElos = [r.r.mu for r in racers]
        ratings = [[r.r] for r in racers]

        # Skip races with fewer than 2 participants
        if len(ratings) < 2:
            print(regatta, "did not have enough sailors??")
            continue

        # Rate using the model
        ratings = model.rate(ratings, list(scores['Score']), weights=[[multiplier]] * len(ratings))

        predictions = model.predict_rank(ratings)
        # print(predictions)
        
        for pred, score in zip(predictions, scores['Score']):
            residuals.append(score - pred[0])

        # Update racers' ratings
        for racer, new_rating in zip(racers, ratings):
            racer.r = new_rating[0]

        # Calculate changes
        changes = [racers[i].r.mu - startingElos[i] for i in range(len(racers))]

        # Update sailors' race data
        for idx, sailor in enumerate(sailors):
            sailor_obj = people[sailor + type]
            sailor_obj.races.append({
                'score': int(scores.loc[scores['Sailor'] == sailor, 'Score'].iat[0]),
                'predicted': predictions[idx][0],
                'change':changes[idx],
                'regAvg':regattaAvg,
                'newRating':sailor_obj.r.mu,
                'date':date,
                'partner':scores.loc[scores['Sailor'] == sailor, 'Partner'].iat[0],
                'ratio': 1 - (int(scores.loc[scores['Sailor'] == sailor, 'Score'].iat[0]) / len(ratings)), 
                'venue':scores.loc[(scores['Sailor'] == sailor) & (scores['raceID'] == race), 'Venue'].iat[0],
                'raceID': race
            })

Currently analyzing race 0/65138 Regatta:s16/peter-wenner-rainbow-invite, Date:2016-01-16 00:00:00
Currently analyzing race 1000/65138 Regatta:s16/mosbacher-owen-knapp-trophies, Date:2016-04-16 00:00:00
s16/jeremy-pinkerton did not have enough sailors??
s16/jeremy-pinkerton did not have enough sailors??
s16/jeremy-pinkerton did not have enough sailors??
Currently analyzing race 2000/65138 Regatta:f16/jack-boehringer-52, Date:2016-09-10 00:00:00
Currently analyzing race 3000/65138 Regatta:f16/philly-fleet-race, Date:2016-10-01 00:00:00
Currently analyzing race 4000/65138 Regatta:s17/barnyard-bizzare, Date:2017-03-04 00:00:00
Currently analyzing race 5000/65138 Regatta:s17/thompson, Date:2017-04-15 00:00:00
Currently analyzing race 6000/65138 Regatta:f17/central-series, Date:2017-09-16 00:00:00
Currently analyzing race 7000/65138 Regatta:f17/mcgill-cup, Date:2017-10-14 00:00:00
Currently analyzing race 8000/65138 Regatta:s18/eckerd-interconference, Date:2018-03-03 00:00:00
Currently anal

In [104]:
# Filter sailors who have 'f24' in their seasons list
eligible_sailors = [p for p in people.values() if 'f24' in p.seasons]

for pos in ["Skipper", "Crew"]:
    for i,s in enumerate(sorted([p for p in eligible_sailors if p.pos == pos], key=lambda p: p.r.mu, reverse=True)):
        s.rank = i + 1

allRows = []
for p in list(people.values()):
    avgRatio = float(np.array([r['ratio'] for r in p.races]).mean())
    p.avgRatio = avgRatio
    if(p.name == 'Carter Anderson'):
        print(avgRatio)
    allRows.append([p.name, p.year, p.link, p.rank, p.teams, p.pos, p.r.mu, avgRatio, p.r.sigma, p.seasons, p.races])
    
df_sailors = pd.DataFrame(allRows, columns=['Sailor','GradYear', 'Link', 'Rank', 'Teams', 'Pos', 'Elo','avgRatio','Sigma', 'Seasons', 'Races'])

# df_sailors.to_json('sailors.json', index=False)

0.532917319470161
0.4801555586002517


  avgRatio = float(np.array([r['ratio'] for r in p.races]).mean())
  ret = ret.dtype.type(ret / rcount)


In [106]:
people['Carter Anderson/Skipper'].name

'Carter Anderson'

In [None]:
# Updates only changed sailors
col = db.collection('sailorsElo')

for doc in col.limit(10).stream():
    doc_id = doc.id
    data = doc.to_dict()
    print(doc_id)
    changes = {}
    person = people[data['Name'] + "/" + data['Position']]
    if len(person.races) < 1:
        print("no races found for sailor, skipping...")
        continue
    
    print(data['Name'])
    try:
        data['Link']
    except:
        changes['Link'] = list(person.link)[0]
        # print("No link!")
    try:
        data['Year']
    except:
        changes['Year'] = int(list(person.year)[0][:2])
        # print("No year!")
    
    # print(person.races[0])
    # check races
    changes['races'] = firestore.ArrayUnion(person.races)
    # for raceID in [r['raceID'] for r in person.races]:
    #     if raceID not in [r['raceID'] for r in data['races']]:
    #         changes['races'].append(raceID)
    #         print("needs update!", data['races'])
    #         print(person.races)
    print(changes)
    col.document(doc_id).update(changes)

00DrKGRiyUzy6fPhwAUW


KeyError: 'Name'

In [94]:
# Writes over entire database
col = db.collection('sailorsElo')

for i,p in enumerate(people.values()):
    col.document().set({"Name":p.name,
        "Position": p.pos,
        "Teams": list(p.teams),
        "Rating": int(p.r.mu),
        "GlobalRank": int(p.rank),
        "Link": list(p.link)[0],
        "Year": int(list(person.year)[0][:2]),
        "races": [{'sailor': p.name,
                'pos': p.pos,
                "raceID": race['raceID'], 
                "score": float(race['score']), 
                "predicted": int(race['predicted']), 
                "change": float(race['change']), 
                'regAvg': float(race['regAvg']), 
                'newRating': float(race['newRating']),
                'date': race['date'],
                'partner':race['partner'],
                'ratio': float(race['ratio']),
                'venue': race['venue']
                } for race in p.races]})

In [115]:
import requests
from bs4 import BeautifulSoup
from collections import defaultdict

# Initialize a defaultdict to count races per season

def getCounts(races):
    # season_counts = {}
    season_counts = defaultdict(int)
    
    for race in races:
        season = race["raceID"].split("/")[0]
        season_counts[season] += 1

    return dict(season_counts)


# batch = db.batch()
col = db.collection('eloTeams')
lenteams = len(list(df_races['Team'].unique()))
teams = []
bySailors = df_races_full.groupby('Sailor')

for i,team in enumerate(list(df_races['Team'].unique())):
    print(f"{i}/{lenteams} {team}")
    avg = df_sailors.loc[df_sailors['Teams'].apply(lambda x: team in x), 'Elo'].mean()
    avgRatio = df_sailors.loc[df_sailors['Teams'].apply(lambda x: team in x), 'avgRatio'].mean()
    numCurMembers = len(df_sailors.loc[(df_sailors['Teams'].apply(lambda x: team in x)) & (df_sailors['Seasons'].apply(lambda x: 'f24' in x))])
    region = df_races
    
    teamLink = df_races.loc[df_races['Team'] == team, 'Teamlink'].iloc[0]
    url = f"https://scores.collegesailing.org/schools/{teamLink.split("/")[2]}"
    # print(url)
    page = requests.get(url)
    teamPage = BeautifulSoup(page.content, 'html.parser')
    
    try:
        region = teamPage.find('span', class_="page-info-value").contents[0].contents[0]
        # print(team, avg, region)
    except:
        print(url)
        continue

    members = [{"name": p.name, 
                'pos':p.pos,
                'teams': list(p.teams),
                'rating': int(p.r.mu),
                'avgRatio': float(p.avgRatio),
                'raceCount': getCounts(p.races),
                'seasons':list(df_races_full.loc[df_races_full['Sailor'] == p.name]['raceID'].str.split('/').str[0].unique()), 
                'globalrank': int(p.rank)} for p in people.values() if team in p.teams]
    
    teams.append({"name":team, "avg": avg, 'avgRatio': avgRatio, "region": region, "link": url, 'memberCount': numCurMembers})
    col.document().set({"name":team, "avg": avg,"region": region, "link": url, 'members': members})
#     if i % 20 == 0: # commit every 20 documents
#             batch.commit()
# batch.commit()
doc = db.collection('vars').document('eloTeams').set({"teams": teams})

0/216 Hawaii
1/216 Brown
2/216 Southern Cal
3/216 Salve Regina
4/216 UC Santa Barbara
5/216 Cal Poly
6/216 UC San Diego
7/216 Washington
8/216 Channel Islands
9/216 British Columbia
10/216 UC Los Angeles
11/216 Westmont College
12/216 Arizona State
13/216 Texas A&M Galveston
14/216 Texas A&M
15/216 Tulane
16/216 Rice
17/216 Texas
18/216 Oklahoma State
19/216 Texas A&M C. Christ
20/216 Central Oklahoma
21/216 Notre Dame
22/216 Jacksonville
23/216 Florida
24/216 Tennessee
25/216 Rollins
26/216 North Carolina State
27/216 Georgia Tech
28/216 Auburn
29/216 Charleston
30/216 South Florida
31/216 Old Dominion
32/216 Eckerd
33/216 Florida State
34/216 U. Miami
35/216 UW Milwaukee
36/216 Stony Brook
37/216 Duke
38/216 Clemson
39/216 U South Carolina
40/216 UNC Wilmington
41/216 Georgia
42/216 Berkeley
43/216 CSU Long Beach
44/216 Monterey Bay
45/216 UC Irvine
46/216 UC Davis
47/216 Rhode Island
48/216 Georgetown
49/216 Dartmouth
50/216 MIT
51/216 George Washington
52/216 Navy
53/216 Fordham
54

In [None]:
topSkippers = []
for p in sorted([p for p in people.values() if p.rank <= 100 and p.pos == 'Skipper'],key=lambda p: p.rank):
    topSkippers.append({'name': p.name, 'rank': int(p.rank), 'pos': p.pos, 'team': list(p.teams), 'rating': p.r.mu, 'seasons': list(p.seasons)})
doc = db.collection('vars').document('topSkippers').set({"sailors": topSkippers})
topCrews = []
for p in sorted([p for p in people.values() if p.rank <= 100 and p.pos == 'Crew'],key=lambda p: p.rank):
    topCrews.append({'name': p.name, 'rank': int(p.rank), 'pos': p.pos, 'team': list(p.teams), 'rating': p.r.mu, 'seasons': list(p.seasons)})
doc = db.collection('vars').document('topCrews').set({"sailors": topCrews})

In [51]:
print(list(df_races_full.loc[df_races_full['Sailor']=='Elliott Chalcraft', 'raceID']))

['f23/toni-deutsch-58/1B', 'f23/toni-deutsch-58/2B', 'f23/toni-deutsch-58/3B', 'f23/toni-deutsch-58/4B', 'f23/regis/1A', 'f23/regis/2A', 'f23/regis/3A', 'f23/regis/4A', 'f23/regis/5A', 'f23/regis/6A', 'f23/regis/7A', 'f23/regis/8A', 'f23/regis/9A', 'f23/hood/1A', 'f23/hood/2A', 'f23/hood/3A', 'f23/hood/4A', 'f23/hood/5A', 'f23/hood/6A', 'f23/hood/7A', 'f23/hood/8A', 'f23/hood/9A', 'f23/hood/10A', 'f23/hood/11A', 'f23/hood/12A', 'f23/hood/13A', 'f23/hood/14A', 'f23/george-warren-smith/1B', 'f23/george-warren-smith/2B', 'f23/george-warren-smith/3B', 'f23/george-warren-smith/4B', 'f23/george-warren-smith/5B', 'f23/george-warren-smith/6B', 'f23/george-warren-smith/7B', 'f23/george-warren-smith/8B', 'f23/george-warren-smith/9B', 'f23/stu-nelson/1C', 'f23/stu-nelson/2C', 'f23/stu-nelson/3C', 'f23/stu-nelson/4C', 'f23/stu-nelson/5C', 'f23/stu-nelson/6C', 'f23/stu-nelson/7C', 'f23/stu-nelson/8C', 'f23/stu-nelson/9C', 'f23/stu-nelson/10C', 'f23/stu-nelson/11C', 'f23/stu-nelson/12C', 'f23/savin-

In [50]:
len(list(df_races_full['Regatta'].unique()))

1723

In [None]:
for regatta in list(df_races_full['Regatta'].unique()):
    races = df_races_full[df_races_full['Regatta'] == regatta]
    raceIDs = list(races['raceID'].unique())
    sailors = races['Sailor'].unique()
    
    racePpl = [{"Name":p.name, 
         "Position": p.pos,
         "Teams": list(p.teams),
         "Rating": int(p.r.mu),
         "GlobalRank": int(p.rank),
         "races": [{'sailor': p.name,
                    'pos': p.pos,
                    "raceID": race['raceID'], 
                    "score": float(race['score']), 
                    "predicted": int(race['predicted']), 
                    "change": float(race['change']), 
                    'regAvg': float(race['regAvg']), 
                    'newRating': float(race['newRating']),
                    'date': race['date'],
                    'partner':race['partner'],
                    'ratio': float(race['ratio']),
                    'venue': race['venue']
                    } for race in p.races if race['raceID'].split("/")[0] + "/" +race['raceID'].split("/")[1] == regatta]
        } for p in people.values() if p.name in sailors]
    
    # race = {'raceID':'', 'raceNum':0, 'div': '', 'sailors':[]}
    # person = {'name':'', 'rating':0, 'change':0, 'team': '', 'pos': '', 'div':'', 'partner': ''}
    
    # for race in races['raceID'].unique():
    #     sailors = races[races['raceID'] == race, 'Sailor'].unique()
    # for p in [p for p in people.values() if p.name in sailors]:
        # racePpl.append({'name':p.name, 'rating':p.rating, 'changes':p.changes, 'team': p.team, 'pos': '', 'div':'', 'partner': ''})
    # print(regatta)
    db.collection('eloRegattas').document().set({'regattaName': regatta,'raceIDs':raceIDs, 'sailors': racePpl}, timeout=15)

In [65]:
def delete_collection(coll_ref, batch_size):
    if batch_size == 0:
        return

    docs = coll_ref.list_documents(page_size=batch_size)
    deleted = 0

    for doc in docs:
        if deleted % 50 == 0:
            print(f"{deleted} Deleting doc {doc.id} => {doc.get().to_dict()}")
        doc.delete()
        deleted = deleted + 1

    if deleted >= batch_size:
        return delete_collection(coll_ref, batch_size)
col = db.collection('eloRegattas')
delete_collection(col, 400)

Deleting doc 1buMNiOqADJvSqPtOfO1 => {'regattaName': 'f18/cascadia-cup-jv', 'raceIDs': ['f18/cascadia-cup-jv/1A', 'f18/cascadia-cup-jv/1B', 'f18/cascadia-cup-jv/2A', 'f18/cascadia-cup-jv/2B', 'f18/cascadia-cup-jv/3A', 'f18/cascadia-cup-jv/3B', 'f18/cascadia-cup-jv/4A', 'f18/cascadia-cup-jv/4B', 'f18/cascadia-cup-jv/5A', 'f18/cascadia-cup-jv/5B', 'f18/cascadia-cup-jv/6A', 'f18/cascadia-cup-jv/6B', 'f18/cascadia-cup-jv/7A'], 'sailors': [{'GlobalRank': 0, 'Position': 'Skipper', 'races': [{'date': DatetimeWithNanoseconds(2018, 10, 20, 0, 0, tzinfo=datetime.timezone.utc), 'ratio': 0.8571428571428572, 'newRating': 6772.362962530372, 'change': 119.54355873458735, 'raceID': 'f18/cascadia-cup-jv/1A', 'partner': 'Blake Winner', 'predicted': 2, 'score': 1.0, 'venue': 'Washington', 'regAvg': 3729.1065778884754}, {'date': DatetimeWithNanoseconds(2018, 10, 20, 0, 0, tzinfo=datetime.timezone.utc), 'ratio': 0.8571428571428572, 'newRating': 6891.142420692399, 'change': 118.77945816202737, 'raceID': 'f1

In [7]:
df_elo.to_csv("elo19.csv",index=False)