In [1]:
import pandas as pd
import numpy as np
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

cred = credentials.Certificate("thecrowsnestapp-creds.json")
firebase_admin.initialize_app(cred)

db = firestore.client()

In [2]:
from openskill.models import PlackettLuce, BradleyTerryFull
model = PlackettLuce(beta=25.0/120.0)

In [3]:
# %load_ext scalene

In [3]:
targetElo = 1000
baseElo = 500
targetSeasons = ['f24', 's25']
targetTRSeasons = ['f24', 's25']
# baseSigma = baseElo // 3
# offset = baseElo * 2

In [4]:
class Sailor:
    def __init__(self, name,key, year, links, teams, seasons=[], skipperRank=0, crewRank=0, skipperRating=baseElo, crewRating=baseElo, races=[], gender=""):
        self.name = name
        self.key = key
        self.gender = gender
        self.year = year
        self.links = links
        self.teams = teams
        
        self.skipperRank = skipperRank
        self.crewRank = crewRank
        self.womenSkipperRank = 0
        self.womenCrewRank = 0
        
        self.skipperRankTR = 0
        self.crewRankTR = 0
        self.womenSkipperRankTR = 0
        self.womenCrewRankTR = 0
        
        self.seasons = seasons
        self.races = []
        self.rivals = {}
        # self.sr = model.rating(skipperRating, skipperRating / 3, name)
        # self.cr = model.rating(crewRating, crewRating / 3, name)
        
        # fleet racing
        self.wsr = model.rating(name=name)
        self.wcr = model.rating(name=name)
        self.sr = model.rating(name=name)
        self.cr = model.rating(name=name)
        # Team racing 
        self.wtsr = model.rating(name=name)
        self.wtcr = model.rating(name=name)
        self.tsr = model.rating(name=name)
        self.tcr = model.rating(name=name)
        self.avgSkipperRatio = 0
        self.avgCrewRatio = 0
        
    def hasTargetSeasons(self,targetSeasons, pos):
        return not set(self.seasons[pos]).isdisjoint(targetSeasons)
        
    def __repr__(self):
        return f"{self.name}: {self.teams}, {str(self.sr.ordinal())} {str(self.cr.ordinal())} {self.seasons} {len(self.races)}"

In [5]:
def adjust_race_id(row):
    if row['Scoring'] == 'Combined':
        return row['raceID'][:-1]  # Remove the last character (A/B) for combined scoring
    return row['raceID']

In [92]:
df_races = pd.read_json("racesfr.json")
# converters={"Teams": lambda x: [y.strip().split("'")[1] for y in x.strip("[]").split(", ")]}
df_races['raceNum'] = df_races['raceID'].apply(lambda id: int(id.split("/")[2][:-1]))  # Numeric part
df_races['raceDiv'] = df_races['raceID'].apply(lambda id: id.split("/")[2][-1])  # Division part (e.g., 'A', 'B')
df_races['adjusted_raceID'] = df_races.apply(adjust_race_id, axis=1) # to make combined division combined
df_races['Link'] = df_races['Link'].fillna('Unknown') # fill empty links
# df_races['key'] = np.where(df_races['Link'] == 'Unknown', df_races['Sailor'], df_races['Link'])
df_races['key'] = df_races.apply(
    lambda row: row['Sailor'] + "-" + row['Team'] if row['Link'] == 'Unknown' else row['Link'],
    axis=1
)
df_races['partnerKey'] = df_races.apply(
    lambda row: row['Partner'] + "-" + row['Team'] if row['PartnerLink'] == 'Unknown' else row['PartnerLink'],
    axis=1
)
# df_races = df_races[df_races['Regatta'] == 'f24/seisa-women-fall'] # example combined regatta

# to exclude f24
# df_races = df_races.loc[df_races['raceID'].apply(lambda id: id.split("/")[0] != 'f24')]
# df_races = df_races.loc[df_races['raceID'].apply(lambda id: id.split("/")[1] != 'east-open-national-semi-final')]
# df_races = df_races.loc[df_races['raceID'].apply(lambda id: id.split("/")[1] != 'open-dinghy-national')]

# df_races_full = df_races.sort_values(['Date', 'raceNum', 'raceDiv']).reset_index(drop=True)

df_races_tr = pd.read_json("TR-20250318.json")
df_races_tr['adjusted_raceID'] = df_races_tr['raceID']
df_races_tr['Scoring'] = 'team'
# df_sailorTRinfo = pd.read_json('trSailorInfoAll.json')
df_races = df_races.rename({'Date': 'date'}, axis='columns')
df_races = df_races.rename({'Regatta': 'regatta'}, axis='columns')
df_races_full = pd.concat([df_races, df_races_tr])
df_races_full = df_races_full.sort_values(['date', 'raceNum', 'raceDiv']).reset_index(drop=True)

# df_races_skipper = df_races_full.loc[df_races_full['Position'].str.contains('Skipper')].sort_values(['Date', 'raceNum']).reset_index(drop=True)
# df_races_crew = df_races_full.loc[df_races_full['Position'].str.contains('Crew')].sort_values(['Date', 'raceNum']).reset_index(drop=True) 

df_sailor_info = pd.read_json("sailor_data2.json")

df_sailor_info.head()

Unnamed: 0,key,link,name,first_name,last_name,gender,year,teamLink,team,id,external_id
0,aadvika-ahuja,aadvika-ahuja,aadvika ahuja,aadvika,ahuja,F,2025,uc-davis,Schools,3136210,
1,aaron-blust,aaron-blust,Aaron Blust,Aaron,Blust,M,2024,olin,Olin,3127882,
2,aaron-burnett,aaron-burnett,Aaron Burnett,Aaron,Burnett,M,2019,florida,Florida,3114274,27657.0
3,aaron-comen,aaron-comen,Aaron Comen,Aaron,Comen,M,2018,texas,Texas,3109429,25596.0
4,aaron-fairchild,aaron-fairchild,Aaron Fairchild,Aaron,Fairchild,M,2021,pittsburgh,U Pittsburgh,3120599,


In [7]:
teamRegions = {'Hawaii': 'PCCSC', 'Brown': 'NEISA', 'Southern Cal': 'PCCSC', 'Salve Regina': 'NEISA', 'UC Santa Barbara': 'PCCSC', 'Cal Poly': 'PCCSC', 'Washington': 'NWICSA', 'Channel Islands': 'PCCSC', 'UC San Diego': 'PCCSC', 'British Columbia': 'NWICSA', 'UC Los Angeles': 'PCCSC', 'Westmont College': 'PCCSC', 'Arizona State': 'PCCSC', 'Texas A&M Galveston': 'SEISA', 'Texas A&M': 'SEISA', 'Tulane': 'SEISA', 'Rice': 'SEISA', 'Texas': 'SEISA', 'Oklahoma State': 'SEISA', 'Texas A&M C. Christ': 'SEISA', 'Central Oklahoma': 'SEISA', 'Notre Dame': 'MCSA', 'Jacksonville': 'SAISA', 'Florida': 'SAISA', 'Tennessee': 'SAISA', 'Rollins': 'SAISA', 'North Carolina State': 'SAISA', 'Georgia Tech': 'SAISA', 'Auburn': 'SAISA', 'Charleston': 'SAISA', 'South Florida': 'SAISA', 'Old Dominion': 'MAISA', 'Eckerd': 'SAISA', 'Florida State': 'SAISA', 'U. Miami': 'SAISA', 'UW Milwaukee': 'MCSA', 'Stony Brook': 'MAISA', 'Duke': 'SAISA', 'Clemson': 'SAISA', 'U South Carolina': 'SAISA', 'UNC Wilmington': 'SAISA', 'Georgia': 'SAISA', 'Berkeley': 'PCCSC', 'CSU Long Beach': 'PCCSC', 'Monterey Bay': 'PCCSC', 'UC Irvine': 'PCCSC', 'UC Davis': 'PCCSC', 'Rhode Island': 'NEISA', 'Georgetown': 'MAISA', 'Dartmouth': 'NEISA', 'MIT': 'NEISA', 'George Washington': 'MAISA', 'Navy': 'MAISA', 'Fordham': 'MAISA', 'Northeastern': 'NEISA', 'Christopher Newport': 'MAISA', 'Victoria': 'NWICSA', 'Boston University': 'NEISA', 'Miami University': 'MCSA', 'Hampton': 'MAISA', 'Virginia': 'MAISA', 'Stevens': 'MAISA', 'Columbia': 'MAISA', 'NY Maritime': 'MAISA', 'Kings Point': 'MAISA', "St. Mary's": 'MAISA', 'Maryland': 'MAISA', 'Virginia Tech': 'MAISA', 'Drexel': 'MAISA', 'Maryland/Baltimore': 'MAISA', 'Buffalo': 'MAISA', 'UC Santa Cruz': 'PCCSC', 'Santa Clara': 'PCCSC', 'Wisconsin': 'MCSA', 'Michigan': 'MCSA', 'Washington College': 'MAISA', 'Minnesota': 'MCSA', 'Yale': 'NEISA', 'Hobart & William': 'MAISA', 'Vermont': 'NEISA', 'Connecticut College': 'NEISA', 'Harvard': 'NEISA', 'Roger Williams': 'NEISA', 'Syracuse': 'MAISA', 'Tufts': 'NEISA', 'Middlebury': 'NEISA', 'New College': 'SAISA', 'William and Mary': 'MAISA', 'Gannon': 'MAISA', 'Boston College': 'NEISA', 'Stanford': 'PCCSC', 'Bowdoin': 'NEISA', 'Lewis & Clark': 'NWICSA', 'Monmouth': 'MAISA', 'American': 'MAISA', 'Michigan State': 'MCSA', 'Hope': 'MCSA', 'Western Michigan': 'MCSA', 'Toledo': 'MCSA', 'Ohio State': 'MCSA', 'Mass Maritime': 'NEISA', 'Coast Guard': 'NEISA', 'Bates': 'NEISA', 'Fairfield': 'NEISA', 'Sacred Heart': 'NEISA', 'Wentworth Institute': 'NEISA', 'Providence': 'NEISA', 'Iowa State': 'MCSA', 'Iowa': 'MCSA', 'Indiana': 'MCSA', 'Davidson': 'SAISA', 'Oregon State': 'NWICSA', 'Western Washington': 'NWICSA', 'U. Rochester': 'MAISA', 'Army': 'MAISA', 'New Hampshire': 'NEISA', 'U. Connecticut': 'NEISA', 'UMass Dartmouth': 'NEISA', 'Wesleyan': 'NEISA', 'U. Mass/ Amherst': 'NEISA', 'U New England': 'NEISA', 'Denison': 'MCSA', 'Northern Michigan': 'MCSA', 'Ohio': 'MCSA', 'Pennsylvania': 'MAISA', 'Villanova': 'MAISA', 'Maine Maritime': 'NEISA', 'Michigan Tech': 'MCSA', 'Illinois': 'MCSA', 'Chicago': 'MCSA', 'Northwestern': 'MCSA', 'Grand Valley State': 'MCSA', 'Washington U': 'MCSA', 'Marquette': 'MCSA', 'Lake Forest': 'MCSA', 'Cornell': 'MAISA', 'Oregon': 'NWICSA', 'Portland State': 'NWICSA', 'Princeton': 'MAISA', "Queen's": 'MAISA', 'Penn State': 'MAISA', 'Ocean County': 'MAISA', 'Delaware': 'MAISA', 'Rutgers': 'MAISA', 'Worcester Polytech': 'NEISA', 'Emmanuel College': 'NEISA', "St. John's": 'MAISA', 'U Pittsburgh': 'MAISA', 'Webb Institute': 'MAISA', 'McGill': 'NEISA', 'Citadel': 'SAISA', 'Colgate': 'MAISA', 'Catholic U America': 'MAISA', 'Loyola College': 'MAISA', 'Ottawa': 'MAISA', 'Royal Military': 'MAISA', 'Dalhousie': 'NEISA', 'U Toronto': 'MAISA', 'New Orleans': 'SEISA', 'Kansas': 'SEISA', 'Bentley': 'NEISA', 'Brandeis': 'NEISA', 'Cal Maritime': 'PCCSC', 'San Diego State': 'PCCSC', 'Loyola': 'SEISA', 'North Texas': 'SEISA', 'Vanderbilt': 'SAISA', 'Purdue': 'MCSA', 'North Carolina': 'SAISA', 'Hillsdale': 'MCSA', 'Amherst': 'NEISA', 'Williams': 'NEISA', 'Hamilton': 'MAISA', 'Rochester': 'MAISA', 'Wellesley': 'NEISA', 'Hosei Univerisity': 'GUEST', 'Colorado': 'SEISA', 'John Carroll': 'MCSA', 'U.  Mass/ Boston': 'NEISA', 'Mercyhurst': 'MAISA', 'Penn State Behrend': 'MAISA', 'Indiana U Pennsylvan': 'MAISA', 'U Nebraska': 'MCSA', 'U Maine': 'NEISA', 'Texas Christian': 'SEISA', 'Embry-Riddle': 'SAISA', 'Palm Beach Atlantic': 'SAISA', 'U of Central Florida': 'SAISA', 'Baldwin-Wallace': 'MCSA', "Saint Mary's College": 'MCSA', 'Olin': 'NEISA', 'Baylor': 'SEISA', 'Texas Tech': 'SEISA', 'Wake Forest': 'SAISA', 'Georgia Southern': 'SAISA', 'East Carolina': 'SAISA', 'Florida Tech': 'SAISA', 'Saint Thomas': 'MCSA', 'Cincinnati': 'MCSA', 'Florida Gulf Coast': 'SAISA', 'Saginaw Valley': 'MCSA', 'Coastal Georgia': 'SAISA', 'Cleveland State': 'MCSA', 'Sewanee': 'SAISA', 'Case Western': 'MCSA', 'Oklahoma': 'SEISA', 'Gonzaga': 'PCCSC'}


In [8]:
merges = [{'first': 'carter-anderson', 'second': 'carter-anderson-2027'}, {'first': 'elliott-bates', 'second':'elliott-bates-2021'}, {'first': 'ian-hopkins-guerra','second': 'ian-hopkins-guerra-2026'}, {'first': 'connor-nelson', 'second':'connor-nelson-2024'}]

In [9]:
# Function to add a sailor to the dictionary
def add_sailor(group,names_group, links_group, seasons_group,teams_group, years_group, people):
    """Summary

    Args:
        group (pandas group): The grouped list of sailor keys and teams
        names_group (pandas group): The list of names grouped by sailor key
        links_group (pandas group): The list of links grouped by sailor key
        seasons_group (pandas group): The list of seasons grouped by sailor key
        teams_group (pandas group): The list of teams grouped by sailor key
        years_group (pandas group): The list of years grouped by sailor key
        people (dict): The people dictionary to be added to
    """

    for key, teams in group.items():
        if key not in people.keys():
            # If no teams are associated, set "Unknown"
            teams = teams if len(teams) > 0 else ["Unknown"]

            # Retrieve the precomputed values
            name = names_group.get(key,[])[0]
            link = links_group.get(key,[])
            seasons = seasons_group.get(key,[])
            teams = teams_group.get(key,[])
            year = years_group.get(key, [])[0]
            gender = ""
            
            if key in list(df_sailor_info['link']):
                data = df_sailor_info.loc[df_sailor_info['link'] == key]
                gender = data['gender'].iat[0]
                year = data['year'].iat[0]
            
            # Add the sailor to the people dictionary
            people[key] = Sailor(name, key, year, list([link]), teams, {'skipper': seasons.get('Skipper', []), 'crew': seasons.get('Crew', [])}, gender=gender)
            
def setupPeople():
    """Generates a dictionary with all of the sailors based on the df_races_full dataframe

    Returns:
        dict: The filled out dictionary of people
    """
    
    people = {}
    
    try:
        df_s = pd.read_json("sailorsasf.json")
    except:
        df_s = pd.DataFrame(columns=['Sailor'])

    # create sailors from file (NOT WORKING)
    for sailor in list(df_s['Sailor'].unique()):
        # print(sailor)
        positions = df_s.loc[df_s['Sailor'] == sailor, 'Pos']
        for pos in positions:
            teams = df_s.loc[(df_s['Sailor'] == sailor)& (df_s['Pos'] == pos), 'Teams'].iat[0]
            seasons = df_s.loc[(df_s['Sailor'] == sailor) & (df_s['Pos'] == pos), 'Seasons'].iat[0]
            year = df_s.loc[(df_s['Sailor'] == sailor) & (df_s['Pos'] == pos), 'GradYear'].iat[0]
            link = df_s.loc[(df_s['Sailor'] == sailor) & (df_s['Pos'] == pos), 'Link'].iat[0]
            rating = df_s.loc[(df_s['Sailor'] == sailor) & (df_s['Pos'] == pos), 'Elo'].iat[0]
            rank = df_s.loc[(df_s['Sailor'] == sailor) & (df_s['Pos'] == pos), 'Rank'].iat[0]
            races = df_s.loc[(df_s['Sailor'] == sailor) & (df_s['Pos'] == pos), 'Races'].iat[0]
            people[link] = Sailor(sailor, year, link, teams, pos, seasons, rank, rating, races)
    
    # Do merges if necessary (merging two techscore links)
    # We must merge here before the calculation is done, because each new rating will need the accurate history
    for merge in merges:
        if merge['second'] in people.keys():
            people[merge['first']].links.append(people[merge['second']])
            del people[merge['second']]
        df_races_full['key'] = df_races_full['key'].replace(merge['second'], merge['first'])
        # df_races_skipper['key'] = df_races_skipper['key'].replace(merge['second'], merge['first'])
        # df_races_crew['key'] = df_races_crew['key'].replace(merge['second'], merge['first'])

    # Pre-group the data for skippers and crews
    grouped = df_races_full.groupby(['key'])['Team'].unique()
    
    # Precompute seasons for skippers and crew
    names = (
        df_races_full.assign(Season=df_races_full['raceID'].str.split('/').str[0])
        .groupby('key')['Sailor']
        .unique()
    )
    links = (
        df_races_full.assign(Season=df_races_full['raceID'].str.split('/').str[0])
        .groupby('key')['Link']
        .unique()
    )
    seasons = (
        df_races_full.assign(Season=df_races_full['raceID'].str.split('/').str[0])
        .groupby(['key', 'Position'])['Season']
        .unique()
    )
    teams = (
        df_races_full.assign(Season=df_races_full['raceID'].str.split('/').str[0])
        .groupby('key')['Team']
        .unique()
    )

    years = (
        df_races_full.assign(Season=df_races_full['raceID'].str.split('/').str[0])
        .groupby('key')['GradYear']
        .unique()
    )
    
    # Add all sailors to the people dictionary
    add_sailor(grouped, names, links, seasons, teams, years, people)
    
    return people

In [17]:
def calculateTR(people, date, regatta, race, row, type, scoring, season, regattaAvg, womens):
    teamAName = row['teamAName'].iat[0]
    teamAKeys = [boat[type.lower() + 'Key'] if boat[type.lower() + 'Key'] is not None else 'Unknown' for boat in row['teamABoats'].iat[0]] 
    # teamANames = [boat[type.lower() + 'Name'] for boat in row['teamABoats'].iat[0]]
    for merge in merges:
            if merge['second'] in teamAKeys:
                teamAKeys = [k if k != merge['second'] else merge['first'] for k in teamAKeys]
    teamARacers = [people[key] for key in teamAKeys if 'Unknown' not in key and key in people.keys()]
    
    teamBName = row['teamBName'].iat[0]
    teamBKeys = [boat[type.lower() + 'Key'] if boat[type.lower() + 'Key'] is not None else 'Unknown' for boat in row['teamBBoats'].iat[0]] 
    # teamBNames = [boat[type.lower() + 'Name'] for boat in row['teamBBoats'].iat[0]]
    for merge in merges:
            if merge['second'] in teamBKeys:
                teamBKeys = [k if k != merge['second'] else merge['first'] for k in teamBKeys]
    teamBRacers = [people[key] for key in teamBKeys if 'Unknown' not in key and key in people.keys()]
    
    
    teamARatings = []
    if womens:
        teamARatings = [r.wtsr if type == 'Skipper' else r.wtcr for r in teamARacers]
    else:
        teamARatings = [r.tsr if type == 'Skipper' else r.tcr for r in teamARacers]
        
    teamBRatings = []
    if womens:
        teamBRatings = [r.wtsr if type == 'Skipper' else r.wtcr for r in teamBRacers]
    else:
        teamBRatings = [r.tsr if type == 'Skipper' else r.tcr for r in teamBRacers]
    
    startingARating = [r.ordinal(target=targetElo, alpha=200 / model.sigma) for r in teamARatings]
    startingBRating = [r.ordinal(target=targetElo, alpha=200 / model.sigma) for r in teamBRatings]
    
    if len(teamARatings) < 1 or len(teamBRatings) < 1:
        # print("not enough sailors in this race, skipping", row['raceID'].iat[0])
        return
    
    predictions = model.predict_rank([teamARatings, teamBRatings])
    
    ratings = model.rate([teamARatings, teamBRatings], 
                        ranks=[1 if row['teamAOutcome'].iat[0] == 'win' else 2, 1 if row['teamBOutcome'].iat[0] == 'win' else 2])
    
    
    for team, name, newRatings in zip([teamARacers, teamBRacers], [teamAName, teamBName], ratings):
        for racer, new_rating in zip(team, newRatings):
            # print(new_rating.ordinal(target=targetElo, alpha=200 / model.sigma))
            # racer.teams = [name]
            if row['raceID'].iat[0].split("/")[0] not in racer.seasons[type.lower()]:
                np.append(racer.seasons[type.lower()], [row['raceID'].iat[0].split("/")[0]])
            if type == 'Skipper':
                if womens:
                    racer.wtsr = new_rating
                else: 
                    racer.tsr = new_rating
            else:
                if womens:
                    racer.wtcr = new_rating
                else:
                    racer.tcr = new_rating
                    
    if womens:
        teamARatings = [r.wtsr if type == 'Skipper' else r.wtcr for r in teamARacers]
    else:
        teamARatings = [r.tsr if type == 'Skipper' else r.tcr for r in teamARacers]
        
    if womens:
        teamBRatings = [r.wtsr if type == 'Skipper' else r.wtcr for r in teamBRacers]
    else:
        teamBRatings = [r.tsr if type == 'Skipper' else r.tcr for r in teamBRacers]
        
    endingARatings = [r.ordinal(target=targetElo, alpha=200 / model.sigma) for r in teamARatings]
    endingBRatings = [r.ordinal(target=targetElo, alpha=200 / model.sigma) for r in teamBRatings]
    
    AChanges = [e-s for s,e in zip(startingARating, endingARatings)]
    BChanges = [e-s for s,e in zip(startingBRating, endingBRatings)]
    
    for tscore, toutcome, oppt, oppn, index, racers,oppRacers, sratings, eratings, changes, boats in zip([row['teamAScore'].iat[0], row['teamBScore'].iat[0]], 
                                            [row['teamAOutcome'].iat[0],row['teamBOutcome'].iat[0]],
                                            [row['teamBName'].iat[0], row['teamAName'].iat[0]], 
                                            [row['teamBNick'].iat[0], row['teamANick'].iat[0]], [0,1], 
                                            [teamARacers, teamBRacers], 
                                            [teamBRacers, teamARacers], 
                                            [startingARating, startingBRating],
                                            [endingARatings, endingBRatings], 
                                            [AChanges, BChanges],
                                            [row['teamABoats'].iat[0], row['teamBBoats'].iat[0]]):
        
        partnerKeys = [boat['crewKey'] if boat['crewKey'] is not None else 'Unknown' for boat in boats]
        partnerNames = [boat['crewName'] if boat['crewName'] is not None else 'Unknown' for boat in boats]
        if type == 'Crew':
            partnerKeys = [boat['skipperKey'] if boat['skipperKey'] is not None else 'Unknown' for boat in boats]
            partnerNames = [boat['skipperName'] if boat['skipperName'] is not None else 'Unknown' for boat in boats]
            
        for racer, sr, er, change, partnerKey, partnerName in zip(racers, sratings, eratings, changes, partnerKeys, partnerNames):
            newRating = 0
            if type == 'Skipper':
                newRating = racer.wtsr.ordinal(target=targetElo, alpha=200 / model.sigma) if womens else racer.tsr.ordinal(target=targetElo, alpha=200 / model.sigma)
            else:
                
                newRating = racer.wtcr.ordinal(target=targetElo, alpha=200 / model.sigma) if womens else racer.tcr.ordinal(target=targetElo, alpha=200 / model.sigma)
            
            for opp in oppRacers:
                if type not in racer.rivals:
                    racer.rivals[type] = {}
                    
                if opp.key not in racer.rivals[type]:
                    racer.rivals[type][opp.key] = {'name': opp.name,'races': {}, 'team': opp.teams[-1], 'wins': {}}
            
                if season not in racer.rivals[type][opp.key]['races'].keys():
                    racer.rivals[type][opp.key]['races'][season] = 0
                if season not in racer.rivals[type][opp.key]['wins'].keys():
                    racer.rivals[type][opp.key]['wins'][season] = 0
                    
                racer.rivals[type][opp.key]['races'][season] += 1
                if toutcome == 'win':
                    racer.rivals[type][opp.key]['wins'][season] += 1
                    
            if season not in racer.seasons[type.lower()]:
                racer.seasons[type.lower()] = np.append(racer.seasons[type.lower()], season)

            racer.races.append({'raceID': row['raceID'].iat[0], 'raceNum': int(row['raceNum'].iat[0]), 'round':  row['round'].iat[0], 
                                'pos': type,
                                'date': date,
                                'womens': womens,
                                'partner': {'key': partnerKey, 'name': partnerName},
                                'opponentTeam': oppt, 
                                'opponentNick': oppn,
                                'score': tscore,
                                'outcome': toutcome, 
                                'predicted': 'win' if predictions[index][0] == 1 else 'lose',
                                'skipperRating': racer.sr.ordinal(target=targetElo, alpha=200 / model.sigma),
                                'crewRating': racer.cr.ordinal(target=targetElo, alpha=200 / model.sigma),
                                'womenSkipperRating': racer.wsr.ordinal(target=targetElo, alpha=200 / model.sigma),
                                'womenCrewRating': racer.wcr.ordinal(target=targetElo, alpha=200 / model.sigma),
                                'tsr': racer.tsr.ordinal(target=targetElo, alpha=200 / model.sigma),
                                'tcr': racer.tcr.ordinal(target=targetElo, alpha=200 / model.sigma),
                                'wtsr': racer.wtsr.ordinal(target=targetElo, alpha=200 / model.sigma),
                                'wtcr': racer.wtcr.ordinal(target=targetElo, alpha=200 / model.sigma),
                                'regAvg': regattaAvg,
                                'change': float(change),
                                'type': 'team'
                                })


In [87]:
def calculateFR(people, date, regatta, race, row, type, scoring, season, residuals, regattaAvg, womens):
    # Filter by current position 
    scores = row[row['Position'] == type]
    # Grab lists for each datapoint
    keys = scores['key'] # the sailor keys
    names = scores['Sailor'] # the sailor names
    teams = scores['Team'] # the sailor names
    scoreVals = list(scores['Score']) # the score values
    
    # check for invalid race conditions
    if len(keys) < 2: # less than two sailors
        return
    if np.isnan(scoreVals[0]): # B division did not complete the set
        return
    
    # Grab people objects 
    racers = [people[key] if key != 'Unknown'
                and key is not None 
                else people[name + "-" + team] for key,name,team in zip(keys,names,teams)]
    
    # Check for womens regatta
    partnerKeys = scores['PartnerLink']
    partnerNames = scores['Partner']
    # partnerKeys = row[row['Position'] != type]['key']
    # partnerNames = row[row['Position'] != type]['Sailor']
    # partners = [people[key] if key != 'Unknown'
            # and key is not None
            # else None for key in zip(partnerKeys, partnerNames)]
    
    # Seperate out the openskill rating objects for use in the model
    if not womens:
        ratings = [[r.sr] if type == 'Skipper' else [r.cr] for r in racers]
    else:
        ratings = [[r.wsr] if type == 'Skipper' else [r.wcr] for r in racers]

    # grab starting rating values for change calculation later
    startingRating = [r[0].ordinal(target=targetElo, alpha=200 / model.sigma) for r in ratings]
    
    # Rate using the model
    ratings = model.rate(ratings, scoreVals)
    
    # Make predictions
    predictions = model.predict_rank(ratings)
    
    # calculate error and add to list (residuals)
    for pred, score, racer in zip(predictions, scoreVals, racers):
        residuals.append(score - pred[0])

    # Update racers' ratings
    for racer, new_rating in zip(racers, ratings):
        if type == 'Skipper':
            if womens:
                racer.wsr = new_rating[0]
            else: 
                racer.sr = new_rating[0]
        else:
            if womens:
                racer.wcr = new_rating[0]
            else:
                racer.cr = new_rating[0]

    # Pre-calculate lists for sailor's race values
    if womens:
        changes = [(racers[i].wsr.ordinal(target=targetElo, alpha=200 / model.sigma) if type == 'Skipper' else racers[i].wcr.ordinal(target=targetElo, alpha=200 / model.sigma)) - startingRating[i] for i in range(len(racers))]
    else:
        changes = [(racers[i].sr.ordinal(target=targetElo, alpha=200 / model.sigma) if type == 'Skipper' else racers[i].cr.ordinal(target=targetElo, alpha=200 / model.sigma)) - startingRating[i] for i in range(len(racers))]
    
    # Common values for each sailor
    venue = scores['Venue'].iat[0]
    scoring = scores['Scoring'].iat[0]
    actualID = scores['raceID'].iat[0]
    
    # Make list of regions and combine PCCSC and NWICSA (those shouldnt count as cross regional for rating purposes)
    regions = [teamRegions[p.teams[-1]] if p.teams[-1] in teamRegions.keys() else None for p in racers]
    regions = ['PCCSC' if reg == 'NWICSA' else reg for reg in regions]
    
    # Check if race has any out of region sailors
    isCross = 1 if len(set(regions)) > 1 else 0
    
    # Only calculate number of cross regional sailors if it is the current season
    doCr = race.split("/")[0] in targetSeasons and isCross == 1
    
    # Loop through each sailor and the associated values
    for sailor, score, pred, change, partnerKey, partnerName in zip(racers, scoreVals, predictions, changes, partnerKeys, partnerNames):
        outLinks = 0
        
        if(isCross == 1):
            # Calculate the number of sailors that are not in the sailor's region
            outLinks = sum(1 for reg in regions # adds 1 each time that a region in the regatta ... 
                        if reg is not None # Double check that the region is not none # first double check that the sailor's team is in the list of regions
                        and sailor.teams[-1] in teamRegions.keys()
                        and ('PCCSC' if reg == 'NWICSA' else reg) != ('PCCSC' if teamRegions[sailor.teams[-1]] == 'NWICSA' else teamRegions[sailor.teams[-1]])) # The sailor's region is not the same as the opponent)
            # Note: We don't need to filter out the sailor themselves from this list, because they will have the same region as themseleves so it will not be counted.
            
        if season not in sailor.seasons[type.lower()]:
                sailor.seasons[type.lower()] = np.append(sailor.seasons[type.lower()], season)
        
        for other, otherScore in zip(racers, scoreVals):
            if other.key != sailor.key:
                if type not in sailor.rivals:
                    sailor.rivals[type] = {}
                
                if other.key not in sailor.rivals[type]:
                    sailor.rivals[type][other.key] = {'name': other.name,'races': {}, 'team': other.teams[-1], 'wins': {}}
                    
                if season not in sailor.rivals[type][other.key]['races'].keys():
                    sailor.rivals[type][other.key]['races'][season] = 0
                if season not in sailor.rivals[type][other.key]['wins'].keys():
                    sailor.rivals[type][other.key]['wins'][season] = 0
                
                sailor.rivals[type][other.key]['races'][season] += 1
                if otherScore > score:
                    sailor.rivals[type][other.key]['wins'][season] += 1
        
        # add race to each sailor's score
        sailor.races.append({
            'score': int(score), # Need to rewrite to include DNF and such (correctly evaluating score but its hard to tell )
            'pos': type,
            'predicted': pred[0],
            'ratio': 1 - ((int(score) - 1) / (len(racers) - 1)), # Calculate ratio here
            'change': change,
            'regAvg': regattaAvg,
            'cross': isCross,
            'outLinks': outLinks,
            'skipperRating': sailor.sr.ordinal(target=targetElo, alpha=200 / model.sigma),
            'crewRating': sailor.cr.ordinal(target=targetElo, alpha=200 / model.sigma),
            'womenSkipperRating': sailor.wsr.ordinal(target=targetElo, alpha=200 / model.sigma),
            'womenCrewRating': sailor.wcr.ordinal(target=targetElo, alpha=200 / model.sigma),
            'tsr': sailor.tsr.ordinal(target=targetElo, alpha=200 / model.sigma),
            'tcr': sailor.tcr.ordinal(target=targetElo, alpha=200 / model.sigma),
            'wtsr': sailor.wtsr.ordinal(target=targetElo, alpha=200 / model.sigma),
            'wtcr': sailor.wtcr.ordinal(target=targetElo, alpha=200 / model.sigma),
            'womens': womens,
            'date': date,
            'partner': {'name': partnerName, 'key': partnerKey},
            'venue': venue,
            'raceID': actualID,
            'type': 'fleet',
            'scoring': scoring
        })

In [94]:
# %%scalene # for profiling the code

# Set up people dictionary
# people = {}
# people = setupPeople()

people = {row.key: Sailor(row.name, row.key, row.year, [row.link], [row.team], {'skipper': [], 'crew': []}, gender=row.gender) for row in df_sailor_info.itertuples()}

for merge in merges:
    new = people[merge['first']]
    old = people[merge['second']]
    new.links = new.links + old.links
    new.teams = new.teams + old.teams
    del people[merge['second']]
    
for merge in merges:
    if merge['second'] in people.keys():
        people[merge['first']].links.append(people[merge['second']])
        del people[merge['second']]
    df_races_full['key'] = df_races_full['key'].replace(merge['second'], merge['first'])    
        
# Pre calculate the number of races to rate
leng = len(df_races_full['adjusted_raceID'].unique())

# List of residuals (errors)
residuals = []

# Current race count for print statement
i = 0

# First, group by regatta to calculate regatta-level metrics
regatta_groups = df_races_full.groupby(['regatta'], sort=False)

# Initialize counters for the outer loop
regatta_count = len(regatta_groups)
# current_regatta = 0

# Iterate through each regatta first
for regatta_name, regatta_data in regatta_groups:
    # current_regatta += 1
    # print(f"Processing regatta {current_regatta}/{regatta_count}: {regatta_name[0]}")
    # if regatta_name[0] != 'f24/george-warren-smith-24':
    #     continue
    
    # Calculate the average rating for this regatta
    
    # Calculate for each position scoring
    scoring = ""
    if isinstance(regatta_data.iloc[0]['Scoring'], str):
        scoring = regatta_data.iloc[0]['Scoring']
    else:
        scoring = regatta_data.iloc[0]['Scoring'].iat[0]
    
    season = regatta_data.iloc[0]['raceID'].split("/")[0]
    
    # Should calculate women's here 
    womens = False
    
    regattAvg = 0
    
    # Filter down people once per regatta? 
    if scoring != 'team':
        skipper_keys = regatta_data.loc[regatta_data['Position'] == 'Skipper']['key'].unique()
        skippers = [people[k] for k in skipper_keys]
        
        crew_keys = regatta_data.loc[regatta_data['Position'] == 'Crew']['key'].unique()
        crews = [people[k] for k in crew_keys if k in people.keys()]
            
        genders = [p.gender for p in skippers + crews]
        womenCount = sum([1 if g == "F" else 0 for g in genders])
        womens = 'M' not in genders and womenCount >= 4
        tempRating = 0
        for type, racers in zip(['Skiper', 'Crew'], [skippers, crews]):
            if womens:
                ratings = [r.wsr if type == 'Skipper' else r.wcr for r in racers]
            else:
                ratings = [r.sr if type == 'Skipper' else r.cr for r in racers]

            startingRating = [r.ordinal(target=targetElo, alpha=200 / model.sigma) for r in ratings]
            tempRating += sum(startingRating)
        
        # Calculate regatta average
        regattaAvg = tempRating / len(skippers + crews)
    else:  # TR 
        skipper_keys = [k for kl in regatta_data['allSkipperKeys'] for k in kl]
        crew_keys =[k for kl in regatta_data['allCrewKeys'] for k in kl]
        
        for merge in merges:
            if merge['second'] in skipper_keys:
                skipper_keys = [k if k != merge['second'] else merge['first'] for k in skipper_keys]
            if merge['second'] in crew_keys:
                crew_keys = [k if k != merge['second'] else merge['first'] for k in crew_keys]
        
        # BADD because it excludes people who have only ever teamraced...
        skippers = [people[k] for k in skipper_keys if k in people.keys()]
        crews = [people[k] for k in crew_keys if k in people.keys()]
        
        genders = [p.gender for p in skippers + crews]
        womenCount = sum([1 if g == "F" else 0 for g in genders])
        womens = 'M' not in genders and womenCount >= 4
        
        tempRating = 0
        for type, racers in zip(['Skiper', 'Crew'], [skippers, crews]):
            if womens:
                ratings = [r.wtsr if type == 'Skipper' else r.wtcr for r in racers]
            else:
                ratings = [r.tsr if type == 'Skipper' else r.tcr for r in racers]

            startingRating = [r.ordinal(target=targetElo, alpha=200 / model.sigma) for r in ratings]
            tempRating += sum(startingRating)
        
        # Calculate regatta average
        regattaAvg = tempRating / len(skippers + crews)
        
    race_groups = regatta_data.groupby(['date', 'regatta', 'adjusted_raceID'], sort=False)
    race_count = len(race_groups)
    
    # Iterate through each race in this regatta
    for (date, regatta, race), row in race_groups:
        i += 1
        
        # Print status every 100 races
        if i % 1000 == 0:
            print(f"Currently analyzing race {i}/{leng} in {regatta}, Date:{date}")
        
        for pos in ['Skipper', 'Crew']:
            if scoring == 'team':
                calculateTR(people, date, regatta, race, row, pos, scoring, season, regattaAvg, womens)
            else:
                calculateFR(people, date, regatta, race, row, pos, scoring, season, residuals, regattaAvg, womens)

# Calculate statiscs about the accuracy of the model. (Lower is better)
# me = np.array(residuals).mean()
# mse = (np.array(residuals) ** 2).mean()
# print(me, mse)

Currently analyzing race 1000/50351 in s16/ike-geiger-team-race, Date:2016-03-19 00:00:00
Currently analyzing race 2000/50351 in s16/oberg, Date:2016-04-16 00:00:00
Currently analyzing race 3000/50351 in f16/hatch-brown, Date:2016-09-17 00:00:00
Currently analyzing race 4000/50351 in f16/protest, Date:2016-10-09 00:00:00
Currently analyzing race 5000/50351 in s17/barnyard-bizzare, Date:2017-03-04 00:00:00
Currently analyzing race 6000/50351 in s17/admiral-moore-team-race, Date:2017-03-25 00:00:00
Currently analyzing race 7000/50351 in s17/new-england-team-race-fowle, Date:2017-04-08 00:00:00
Currently analyzing race 8000/50351 in s17/sperry-women-west-semis, Date:2017-05-23 00:00:00
Currently analyzing race 9000/50351 in f17/pere-marquette, Date:2017-09-30 00:00:00
Currently analyzing race 10000/50351 in f17/nickerson-2017, Date:2017-10-28 00:00:00
Currently analyzing race 11000/50351 in s18/bob-bavier-team-race, Date:2018-03-03 00:00:00
Currently analyzing race 12000/50351 in s18/seah

In [95]:
# Filter sailors who have 'f24' in their seasons list
eligible_skippers = [p for p in people.values()
                    if not set(p.seasons['skipper']).isdisjoint(targetSeasons)
                    and sum([race['outLinks'] for race in p.races if 'outLinks' in race.keys()]) > 70]

eligible_crews = [p for p in people.values()
                    if not set(p.seasons['crew']).isdisjoint(targetSeasons) 
                    and sum([race['outLinks'] for race in p.races if 'outLinks' in race.keys()]) > 70]

# TODO: Count tr and fr seasons seperately  
eligible_skippers_tr = [p for p in people.values()
                        if not set(p.seasons['skipper']).isdisjoint(targetTRSeasons)]
eligible_crews_tr = [p for p in people.values()
                        if not set(p.seasons['crew']).isdisjoint(targetTRSeasons)]

for p in people.values():
    p.skipperRank = 0
    p.crewRank = 0
    p.womenSkipperRank = 0
    p.womenCrewRank = 0
    p.skipperRankTR = 0
    p.crewRankTR = 0
    p.womenSkipperRankTR = 0
    p.womenCrewRankTR = 0

for i,s in enumerate(sorted([p for p in eligible_skippers if p.sr.mu != model.mu], key=lambda p: p.sr.ordinal(), reverse=True)):
    s.skipperRank = i + 1
for i,s in enumerate(sorted([p for p in eligible_crews if p.cr.mu != model.mu], key=lambda p: p.cr.ordinal(), reverse=True)):
    s.crewRank = i + 1

for i,s in enumerate(sorted([p for p in eligible_skippers if p.wsr.mu != model.mu], key=lambda p: p.wsr.ordinal(), reverse=True)):
    s.womenSkipperRank = i + 1
for i,s in enumerate(sorted([p for p in eligible_crews if p.wcr.mu != model.mu], key=lambda p: p.wcr.ordinal(), reverse=True)):
    s.womenCrewRank = i + 1
    
for i,s in enumerate(sorted([p for p in eligible_skippers_tr if p.tsr.mu != model.mu], key=lambda p: p.tsr.ordinal(), reverse=True)):
    s.skipperRankTR = i + 1
for i,s in enumerate(sorted([p for p in eligible_crews_tr if p.tcr.mu != model.mu], key=lambda p: p.tcr.ordinal(), reverse=True)):
    s.crewRankTR = i + 1

for i,s in enumerate(sorted([p for p in eligible_skippers_tr if p.wtsr.mu != model.mu], key=lambda p: p.wtsr.ordinal(), reverse=True)):
    s.womenSkipperRankTR = i + 1
for i,s in enumerate(sorted([p for p in eligible_crews_tr if p.wtcr.mu != model.mu], key=lambda p: p.wtcr.ordinal(), reverse=True)):
    s.womenCrewRankTR = i + 1

allRows = []
for sailor,p in people.items():
    avgSkipperRatio = float(np.array([r['ratio'] for r in p.races if r['pos'] == 'Skipper' and 'ratio' in r.keys()] ).mean())
    avgCrewRatio = float(np.array([r['ratio'] for r in p.races if r['pos'] == 'Crew' and 'ratio' in r.keys()] ).mean())
    p.avgSkipperRatio = avgSkipperRatio
    p.avgCrewRatio = avgCrewRatio
    
    allRows.append([p.name, sailor, 
                    p.skipperRank,p.crewRank, p.womenSkipperRank, 
                    p.womenCrewRank,p.skipperRankTR, p.womenSkipperRankTR,  
                    p.teams, 
                    p.sr.ordinal(target=targetElo, alpha=200 / model.sigma), 
                    p.wsr.ordinal(target=targetElo, alpha=200 / model.sigma), 
                    p.tsr.ordinal(target=targetElo, alpha=200 / model.sigma),
                    p.wtsr.ordinal(target=targetElo, alpha=200 / model.sigma), 
                    p.cr.ordinal(target=targetElo, alpha=200 / model.sigma), 
                    sum([race['outLinks'] for race in p.races if 'outLinks' in race.keys()]), 
                    p.year, p.links, p.sr.mu, 
                    p.cr.mu, avgSkipperRatio,avgCrewRatio, 
                    p.sr.sigma, p.cr.sigma, p.seasons, 
                    sum([race['cross'] for race in p.races if 'cross' in race.keys()]), 
                    p.races,len(p.races), p.rivals])

df_sailors = pd.DataFrame(allRows, columns=['Sailor', 'key', 'SkipperRank', 'CrewRank','WomenSkipperRank', 'WomenCrewRank', 'TRSkipperRank', 'TRWomenSkipperRank', 'Teams', 'SkipperOrdinal','WomenSkipperOrdinal', 'SkipperOrdinalTR', 'WomenSkipperOrdinalTR', 'CrewOrdinal','outLinks','GradYear', 'Links', 'SkipperMU','CrewMU', 'skipperAvgRatio', 'crewAvgRatio', 'SkipperSigma', 'CrewSigma', 'Seasons', 'Cross', 'Races', 'numRaces', 'Rivals'])

# df_sailors.to_json('sailorsexperiment20.json', index=False)
df_sailors.head()

  avgSkipperRatio = float(np.array([r['ratio'] for r in p.races if r['pos'] == 'Skipper' and 'ratio' in r.keys()] ).mean())
  ret = ret.dtype.type(ret / rcount)
  avgCrewRatio = float(np.array([r['ratio'] for r in p.races if r['pos'] == 'Crew' and 'ratio' in r.keys()] ).mean())


Unnamed: 0,Sailor,key,SkipperRank,CrewRank,WomenSkipperRank,WomenCrewRank,TRSkipperRank,TRWomenSkipperRank,Teams,SkipperOrdinal,...,CrewMU,skipperAvgRatio,crewAvgRatio,SkipperSigma,CrewSigma,Seasons,Cross,Races,numRaces,Rivals
0,aadvika ahuja,aadvika-ahuja,0,0,0,0,0,0,[Schools],1000.0,...,8.134139,,0.003977,8.333333,7.834282,"{'skipper': [], 'crew': ['f24']}",8,"[{'score': 23, 'pos': 'Crew', 'predicted': 20,...",8,{'Crew': {'chas-mitchell': {'name': 'Chas Mitc...
1,Aaron Blust,aaron-blust,0,0,0,0,0,0,[Olin],1000.0,...,13.118169,,0.376315,8.333333,5.339879,"{'skipper': [], 'crew': ['f21', 's22', 'f22', ...",6,"[{'score': 16, 'pos': 'Crew', 'predicted': 11,...",62,{'Crew': {'lily-bartlett': {'name': 'Lily Bart...
2,Aaron Burnett,aaron-burnett,0,0,0,0,0,0,[Florida],1226.736102,...,25.0,0.628395,,6.778941,8.333333,"{'skipper': ['s16', 's17'], 'crew': []}",0,"[{'score': 1, 'pos': 'Skipper', 'predicted': 1...",18,{'Skipper': {'peter-hidley': {'name': 'Peter H...
3,Aaron Comen,aaron-comen,0,0,0,0,0,0,[Texas],1011.912987,...,25.0,0.170555,,3.337847,8.333333,"{'skipper': ['s16', 'f16', 's17', 'f17', 's18'...",52,"[{'score': 6, 'pos': 'Skipper', 'predicted': 6...",164,{'Skipper': {'alexander-thompson': {'name': 'A...
4,Aaron Fairchild,aaron-fairchild,0,0,0,0,0,0,[U Pittsburgh],1431.844618,...,25.0,0.644078,,5.434859,8.333333,"{'skipper': ['f17', 's18', 'f18'], 'crew': []}",4,"[{'score': 2, 'pos': 'Skipper', 'predicted': 1...",39,{'Skipper': {'jillian-ticatch': {'name': 'Jill...


In [None]:
eligible = [p for p in people.values() if targetSeasons[-1] in p.seasons['skipper'] 
                                        or targetSeasons[-1] in p.seasons['crew']]

True

In [None]:
# Updates only changed sailors
col = db.collection('sailorsElo')

for p in [p for p in people.values() if targetSeason in p['seasons']['skipper'] or targetSeason in p['seasons']['crew']]:
    changes = {}
    if len(p.races) < 1:
        print("no races found for sailor, skipping...")
        continue

    # print(data['Name'])
    # try:
    #     data['Link']
    # except:
    #     changes['Link'] = list(p.link)[0]
    #     # print("No link!")
    # try:
    #     data['Year']
    # except:
    #     changes['Year'] = int(list(p.year)[0][:2])
    #     # print("No year!")

    # print(p.races[0])
    # check races
    changes['races'] = firestore.ArrayUnion(p.races)
    changes['lastUpdate'] = firestore.SERVER_TIMESTAMP
    # for raceID in [r['raceID'] for r in person.races]:
    #     if raceID not in [r['raceID'] for r in data['races']]:
    #         changes['races'].append(raceID)
    #         print("needs update!", data['races'])
    #         print(person.races)
    print(changes)
    col.document(doc_id).update(changes)

In [None]:
# Initialize Firestore client
col = db.collection('eloSailors')

# Initialize the batch
batch = db.batch()

# Number of documents to commit in each batch
batch_size = 40

eligible = [p for p in people.values() if targetSeasons[-1] in p.seasons['skipper'] 
                                        or targetSeasons[-1] in p.seasons['crew']]

# Iterate over the people values
# for i, p in enumerate(people.values()):
for i, p in enumerate(eligible):

# p = people['carter-anderson']
# Prepare the document data to be written
    if i % 100 == 0:
        print("Currently uploading:",i, p.name)
    try:
        doc_data = {
            "Name": p.name,
            "key": p.key.replace("/", "-"),
            'gender': p.gender,
            "Teams": p.teams.tolist() if isinstance(p.teams, np.ndarray) else p.teams,
            "SkipperRating": int(p.sr.ordinal(target=targetElo, alpha=200 / model.sigma)),
            "CrewRating": int(p.cr.ordinal(target=targetElo, alpha=200 / model.sigma)),
            "WomenSkipperRating": int(p.wsr.ordinal(target=targetElo, alpha=200 / model.sigma)),
            "WomenCrewRating": int(p.wcr.ordinal(target=targetElo, alpha=200 / model.sigma)),
            "tsr": int(p.tsr.ordinal(target=targetElo, alpha=200 / model.sigma)),
            "tcr": int(p.tcr.ordinal(target=targetElo, alpha=200 / model.sigma)),
            "wtsr": int(p.wtsr.ordinal(target=targetElo, alpha=200 / model.sigma)),
            "wtcr": int(p.wtcr.ordinal(target=targetElo, alpha=200 / model.sigma)),
            "SkipperRank": int(p.skipperRank),
            "CrewRank": int(p.crewRank),
            "WomenSkipperRank": int(p.womenSkipperRank),
            "WomenCrewRank": int(p.womenCrewRank),
            "SkipperRank": int(p.skipperRank),
            "CrewRank": int(p.crewRank),
            "WomenSkipperRank": int(p.womenSkipperRank),
            "WomenCrewRank": int(p.womenCrewRank),
            "SkipperRankTR": int(p.skipperRankTR),
            "CrewRankTR": int(p.crewRankTR),
            "WomenSkipperRankTR": int(p.womenSkipperRankTR),
            "WomenCrewRankTR": int(p.womenCrewRankTR),
            "Links": p.links.tolist() if isinstance(p.links, np.ndarray) else p.links if isinstance(p.links, str) or isinstance(p.links, list) else p.links[0].tolist(),
            "Year": p.year,
            "Seasons": {'skipper': list(p.seasons['skipper']), 'crew': list(p.seasons['crew'])},
            "Cross":  sum([race['cross'] for race in p.races if 'cross' in race.keys()]),
            "OutLinks": sum([race['outLinks'] for race in p.races if 'outLinks' in race.keys()]),
            'races': p.races,
            "Rivals": p.rivals,
            "lastUpdate": firestore.SERVER_TIMESTAMP
        }
    except Exception as e:
        print(p, p.links)
        raise e

    # Add the set operation to the batch
    doc_ref = col.document(p.key.replace("/", "-"))
    batch.set(doc_ref, doc_data, merge=True)

    # Commit the batch every 20 documents
    if (i + 1) % batch_size == 0:
        batch.commit()
        batch = db.batch()  # Start a new batch for the next set of documents

# Commit any remaining operations if there are less than 20 documents left
if (i + 1) % batch_size != 0:
    batch.commit()

Currently uploading: 0 Grant Schmidt
Currently uploading: 100 Jadin Gonzalez
Currently uploading: 200 Karolina Debniak
Currently uploading: 300 Liam Parker
Currently uploading: 400 Mary Madaus
Currently uploading: 500 Nathan Willhite
Currently uploading: 600 Piper Holthus
Currently uploading: 700 Sarah Young
Currently uploading: 800 Tucker Gibbs
Currently uploading: 900 Amelia Sulciner


In [111]:
# %%scalene
import requests
from bs4 import BeautifulSoup
from collections import defaultdict

def getCounts(races):
    # season_counts = defaultdict(int)
    season_counts = {}
    
    for race in races:
        season = race["raceID"].split("/")[0]
        if season not in season_counts.keys():
            season_counts[season] = {}
        if race['pos'] not in season_counts[season].keys():
            season_counts[season][race['pos']] = 0
        season_counts[season][race['pos']] += 1

    return dict(season_counts)

# batch = db.batch()
col = db.collection('eloTeams')
teams = []
scrape = False
teamNames = teamRegions.keys()
lenteams = len(teamNames)

season_mask = df_sailors['Seasons'].apply(lambda x: not set(x['skipper']).isdisjoint(targetSeasons) or  not set(x['crew']).isdisjoint(targetSeasons))

# Explode the Teams column to enable grouping
df_exploded = df_sailors.loc[season_mask].explode('Teams')

# Group by team and compute necessary aggregates
team_stats = df_exploded.groupby('Teams').agg(
    numCurMembers=('Teams', 'count'),
    avgSkipperOrdinal=('SkipperOrdinal', 'mean'),
    avgCrewOrdinal=('CrewOrdinal', 'mean'),
    avgSkipperRatio=('skipperAvgRatio', 'mean'),
    avgCrewRatio=('crewAvgRatio', 'mean')
)

# Calculate the average values as in the original code
team_stats['avg'] = (team_stats['avgSkipperOrdinal'] + team_stats['avgCrewOrdinal']) / 2
team_stats['avgRatio'] = (team_stats['avgSkipperRatio'] + team_stats['avgCrewRatio']) / 2

team_stats = team_stats.reindex(teamNames, fill_value=0)

team_link_map = df_races.drop_duplicates('Team').set_index('Team')['Teamlink'].to_dict()

# Optional: Loop for printing (if necessary)
for i, (team, row) in enumerate(team_stats.iterrows()):
    # if team != "MIT":
    #     continue
    print(f"{i}/{len(team_stats)} {team}")
    avg = row['avg']
    avgRatio = row['avgRatio']
    numCurMembers = row['numCurMembers']

# for i,team in enumerate(teamNames):
#     print(f"{i}/{lenteams} {team}")
#     temp = df_sailors.loc[(df_sailors['Teams'].apply(lambda x: team in x)) & (df_sailors['Seasons'].apply(lambda x: 'f24' in x['skipper'] or 'f24' in x['crew']))]
#     avg = (temp.loc[df_sailors['Seasons'].apply(lambda x: 'f24' in x['skipper']), 'SkipperOrdinal'].mean() + temp.loc[df_sailors['Seasons'].apply(lambda x: 'f24' in x['crew']), 'CrewOrdinal'].mean()) / 2
#     avgRatio = (temp.loc[df_sailors['Seasons'].apply(lambda x: 'f24' in x['crew']),'skipperAvgRatio'].mean() + temp.loc[df_sailors['Seasons'].apply(lambda x: 'f24' in x['crew']),'crewAvgRatio'].mean()) / 2
#     numCurMembers = len(temp)
    
    region = teamRegions[team]
    # teamLink = df_races.loc[df_races['Team'] == team, 'Teamlink'].iloc[0]
    teamLink = team_link_map.get(team, None)  # Default to None if team not found
    url = f"https://scores.collegesailing.org/schools/{teamLink.split("/")[2]}"
    
    if scrape:
        page = requests.get(url)
        teamPage = BeautifulSoup(page.content, 'html.parser')
        
        try:
            region = teamPage.find('span', class_="page-info-value").contents[0].contents[0]
        except:
            print(url)
            continue
        
    filtered_people = [p for p in people.values() if team in p.teams]
    
    members = [{"name": str(p.name),
                "key": p.key,
                "gender": p.gender,
                "year": str(p.year),
                'teams': list(p.teams),
                'skipperRating': int(p.sr.ordinal(target=targetElo, alpha=200 / model.sigma)),
                'crewRating': int(p.cr.ordinal(target=targetElo, alpha=200 / model.sigma)),
                'womenSkipperRating': int(p.wsr.ordinal(target=targetElo, alpha=200 / model.sigma)),
                'womenCrewRating': int(p.wcr.ordinal(target=targetElo, alpha=200 / model.sigma)),
                'tsr': p.tsr.ordinal(target=targetElo, alpha=200 / model.sigma),
                'wtsr': p.wtsr.ordinal(target=targetElo, alpha=200 / model.sigma),
                'tcr': p.tcr.ordinal(target=targetElo, alpha=200 / model.sigma),
                'wtcr': p.wtcr.ordinal(target=targetElo, alpha=200 / model.sigma),
                'avgSkipperRatio': float(p.avgSkipperRatio),
                'avgCrewRatio': float(p.avgCrewRatio),
                'raceCount': getCounts(p.races),
                'seasons':{'skipper': list(p.seasons['skipper']), 'crew': list(p.seasons['crew'])},
                'cross': sum([race['cross'] for race in p.races if 'cross' in race.keys()]),
                'outLinks': sum([race['outLinks'] for race in p.races if 'outLinks' in race.keys()]),
                'skipperRank': int(p.skipperRank),
                'crewRank': int(p.crewRank),
                'womenSkipperRank': int(p.womenSkipperRank),
                'womenCrewRank': int(p.womenCrewRank)
                } for p in filtered_people]
    
    teamRating = 0
    if numCurMembers > 0:
        # TODO: Race count is based on both tr and fr here so weighting is wrong... 
        teamRatingSkipper = sum([sum([p['skipperRating'] * (p['raceCount'][seas]['Skipper'] / 5) for p in members 
                                if seas in p['raceCount'].keys()
                                and 'Skipper' in p['raceCount'][seas].keys()
                                and p['raceCount'][seas]['Skipper'] > 5]) for seas in targetSeasons])
        
        teamRatingCrew = sum([sum([p['crewRating'] * (p['raceCount'][seas]['Crew']/ 5) for p in members 
                                if seas in p['raceCount'].keys()
                                and 'Crew' in p['raceCount'][seas].keys()
                                and p['raceCount'][seas]['Crew'] > 5]) for seas in targetSeasons])
        
        teamRating = (teamRatingSkipper + teamRatingCrew) / numCurMembers
    
    topRating = 0
    topWomenRating = 0
    
    numTops = 3
    if numCurMembers > 0:
        topSkippers = sorted([{'key':p['key'],'rating': p['skipperRating']} for p in members
                                if p['cross'] > 20
                                and p['outLinks'] > 70
                                and not set(p['seasons']['skipper']).isdisjoint(targetSeasons)
                                # and sum([p['raceCount'][seas] for seas in targetSeasons if seas in p['raceCount'].keys()]) > 5
                                  ], key=lambda x: x['rating'], reverse=True)[:numTops]
        topSkipperSum = sum([p['rating'] for p in topSkippers])
        
        topCrews = sorted([p['crewRating'] for p in members
                            #    if p['crewRank'] != 0
                                if p['cross'] > 20
                                and p['outLinks'] > 70
                                and not set(p['seasons']['crew']).isdisjoint(targetSeasons)
                                # and sum([p['raceCount'][seas] for seas in targetSeasons if seas in p['raceCount'].keys()]) > 5
                               ], reverse=True)[:numTops]
        topCrewsSum = sum(topCrews)
        
        topRating = (topSkipperSum + topCrewsSum) / (numTops * 2)
        # print(topSkippers, topCrews, topRating)

        # Women's
        numTops = 2
        topWomenSkippers = sorted([p['womenSkipperRating'] for p in members
                                #   if p['skipperRank'] != 0
                                  if p['cross'] > 20
                                  and p['gender'] == 'F'
                                  and not set(p['raceCount'].keys()).isdisjoint(targetSeasons)
                                  and all(['Skipper' in p['raceCount'][seas].keys() for seas in targetSeasons if seas in p['raceCount'].keys()])
                                  and sum([p['raceCount'][seas]['Skipper'] for seas in targetSeasons if seas in p['raceCount'].keys()]) > 5
                                  ], reverse=True)[:numTops]
        topWomenSkipperSum = sum(topWomenSkippers)
        
        topWomenCrews = sorted([p['womenCrewRating'] for p in members
                            #    if p['crewRank'] != 0
                                if p['cross'] > 20
                                and p['gender'] == 'F'
                                and not set(p['raceCount'].keys()).isdisjoint(targetSeasons)
                                and all(['Crew' in p['raceCount'][seas].keys() for seas in targetSeasons if seas in p['raceCount'].keys()])
                                and sum([p['raceCount'][seas]['Crew'] for seas in targetSeasons if seas in p['raceCount'].keys()]) > 5
                                ], reverse=True)[:numTops]
        topWomenCrewsSum = sum(topWomenCrews)
        
        topWomenRating = (topWomenSkipperSum + topWomenCrewsSum) / (numTops * 2)
        
        # Team racing:
        
        numTops = 3
        topSkippersTR = sorted([p['tsr'] for p in members 
                            if p['tsr'] != 1000
                            and not set(p['seasons']['skipper']).isdisjoint(targetSeasons)
                            ], reverse=True)[:numTops]
        topCrewsTR = sorted([p['tcr'] for p in members 
                             if p['tcr'] != 1000 
                            and not set(p['seasons']['crew']).isdisjoint(targetSeasons)
                            ], reverse=True)[:numTops]
        
        topRatingTR = (sum(topSkippersTR) + sum(topCrewsTR)) / (numTops * 2)
        
        numTops = 3
        topWomenSkippersTR = sorted([p['wtsr'] for p in members if p['wtsr'] != 1000 
                                                                and not set(p['seasons']['skipper']).isdisjoint(targetSeasons)], reverse=True)[:numTops]
        topWomenCrewsTR = sorted([p['wtcr'] for p in members if p['wtcr'] != 1000 and not set(p['seasons']['crew']).isdisjoint(targetSeasons)], reverse=True)[:numTops]
        
        topWomenRatingTR = (sum(topWomenSkippersTR) + sum(topWomenCrewsTR)) / (numTops * 2)
    
    #'#1': ,'#2': ,
    teams.append({"name":team, 
                  'topRating': topRating, 
                  'topSkippers': topSkippers,
                  'topWomenRating': topWomenRating,
                  'topRatingTR': topRatingTR,
                  'topWomenRatingTR': topWomenRatingTR, 
                  'teamRating': teamRating, 
                  "avg": avg, 
                  'avgRatio': avgRatio, 
                  "region": region, 
                  "link": url, 
                  'memberCount': numCurMembers, 
                  })
    col.document(team.replace(" ", "-").replace("/", "-").lower()).set({"name":team, "avg": avg, 'avgRatio': avgRatio, "region": region, "link": url, 'members': members})
    # if i > 20:
    #     break
#     if i % 20 == 0: # commit every 20 documents
#             batch.commit()
# batch.commit()
doc = db.collection('vars').document('eloTeams').set({"teams": teams})
newTeams = sorted(teams,key=lambda x: x['topRating'], reverse=True)
newTeams

0/207 Hawaii
1/207 Brown
2/207 Southern Cal
3/207 Salve Regina
4/207 UC Santa Barbara
5/207 Cal Poly
6/207 Washington
7/207 Channel Islands
8/207 UC San Diego
9/207 British Columbia
10/207 UC Los Angeles
11/207 Westmont College
12/207 Arizona State
13/207 Texas A&M Galveston
14/207 Texas A&M
15/207 Tulane
16/207 Rice
17/207 Texas
18/207 Oklahoma State
19/207 Texas A&M C. Christ
20/207 Central Oklahoma
21/207 Notre Dame
22/207 Jacksonville
23/207 Florida
24/207 Tennessee
25/207 Rollins
26/207 North Carolina State
27/207 Georgia Tech
28/207 Auburn
29/207 Charleston
30/207 South Florida
31/207 Old Dominion
32/207 Eckerd
33/207 Florida State
34/207 U. Miami
35/207 UW Milwaukee
36/207 Stony Brook
37/207 Duke
38/207 Clemson
39/207 U South Carolina
40/207 UNC Wilmington
41/207 Georgia
42/207 Berkeley
43/207 CSU Long Beach
44/207 Monterey Bay
45/207 UC Irvine
46/207 UC Davis
47/207 Rhode Island
48/207 Georgetown
49/207 Dartmouth
50/207 MIT
51/207 George Washington
52/207 Navy
53/207 Fordham
54

[{'name': 'Stanford',
  'topRating': 2459.6666666666665,
  'topSkippers': [{'key': 'thomas-sitzmann', 'rating': 2531},
   {'key': 'reade-decker', 'rating': 2504},
   {'key': 'vanessa-lahrkamp', 'rating': 2484}],
  'topWomenRating': 2323.0,
  'topRatingTR': 1657.4710278633747,
  'topWomenRatingTR': 1552.7325139034674,
  'teamRating': np.float64(24352.890000000003),
  'avg': np.float64(1567.7160899306969),
  'avgRatio': np.float64(0.7589192786489893),
  'region': 'PCCSC',
  'link': 'https://scores.collegesailing.org/schools/stanford',
  'memberCount': np.float64(20.0)},
 {'name': 'Yale',
  'topRating': 2351.1666666666665,
  'topSkippers': [{'key': 'stephan-baker', 'rating': 2454},
   {'key': 'jack-egan', 'rating': 2426},
   {'key': 'emma-cowles', 'rating': 2418}],
  'topWomenRating': 2282.25,
  'topRatingTR': 1698.1737876356165,
  'topWomenRatingTR': 1746.9529021863236,
  'teamRating': np.float64(28909.36666666667),
  'avg': np.float64(1489.1647091672203),
  'avgRatio': np.float64(0.6405

In [25]:
topSkippers = []
for p in sorted([p for p in people.values() if p.skipperRank <= 100 
                                            and p.skipperRank != 0 
                                            and not set(p.seasons['skipper']).isdisjoint(targetSeasons)],key=lambda p: p.skipperRank):
    topSkippers.append({'name': p.name,'key': p.key, 'year':p.year, 'rank': int(p.skipperRank), 'team': list(p.teams),'gender': p.gender, 'rating': p.sr.ordinal(target=targetElo, alpha=200 / model.sigma), 'seasons': list(p.seasons['skipper'])})
doc = db.collection('vars').document('topSkippers').set({"sailors": topSkippers})

topCrews = []
for p in sorted([p for p in people.values() if p.crewRank <= 100 
                                            and p.crewRank != 0 
                                            and not set(p.seasons['crew']).isdisjoint(targetSeasons)],key=lambda p: p.crewRank):
    topCrews.append({'name': p.name,'key': p.key, 'year':p.year, 'rank': int(p.crewRank), 'team': list(p.teams),'gender': p.gender, 'rating': p.cr.ordinal(target=targetElo, alpha=200 / model.sigma), 'seasons': list(p.seasons['crew'])})
doc = db.collection('vars').document('topCrews').set({"sailors": topCrews})

topSkippers = []
for p in sorted([p for p in people.values() if p.skipperRankTR <= 100 
                                            and p.skipperRankTR != 0 
                                            and not set(p.seasons['skipper']).isdisjoint(targetTRSeasons)],key=lambda p: p.skipperRankTR):
    topSkippers.append({'name': p.name,
                        'key': p.key,
                        'year':p.year,
                        'rank': int(p.skipperRankTR),
                        'team': list(p.teams),
                        'gender': p.gender,
                        'rating': p.tsr.ordinal(target=targetElo, alpha=200 / model.sigma),
                        'seasons': list(p.seasons['skipper'])})
doc = db.collection('vars').document('topSkippersTR').set({"sailors": topSkippers})

topCrews = []
for p in sorted([p for p in people.values() if p.crewRankTR <= 100 
                                            and p.crewRankTR != 0 
                                            and not set(p.seasons['crew']).isdisjoint(targetTRSeasons)],key=lambda p: p.crewRankTR):
    topCrews.append({'name': p.name,'key': p.key, 'year':p.year, 'rank': int(p.crewRankTR), 'team': list(p.teams),'gender': p.gender, 'rating': p.tcr.ordinal(target=targetElo, alpha=200 / model.sigma), 'seasons': list(p.seasons['crew'])})
doc = db.collection('vars').document('topCrewsTR').set({"sailors": topCrews})

#Womens
topSkippers = []
for p in sorted([p for p in people.values() if p.womenSkipperRank <= 100 
                                            and p.womenSkipperRank != 0 
                                            and not set(p.seasons['skipper']).isdisjoint(targetSeasons)],key=lambda p: p.womenSkipperRank):
    topSkippers.append({'name': p.name,'key': p.key, 'year':p.year, 'rank': int(p.womenSkipperRank), 'team': list(p.teams),'gender': p.gender, 'rating': p.wsr.ordinal(target=targetElo, alpha=200 / model.sigma), 'seasons': list(p.seasons['skipper'])})
doc = db.collection('vars').document('topWomenSkippers').set({"sailors": topSkippers})
topCrews = []
for p in sorted([p for p in people.values() if p.womenCrewRank <= 100 
                                            and p.womenCrewRank != 0 
                                            and not set(p.seasons['crew']).isdisjoint(targetSeasons)],key=lambda p: p.womenCrewRank):
    topCrews.append({'name': p.name,'key': p.key, 'year':p.year, 'rank': int(p.womenCrewRank), 'team': list(p.teams),'gender': p.gender, 'rating': p.wcr.ordinal(target=targetElo, alpha=200 / model.sigma), 'seasons': list(p.seasons['crew'])})
doc = db.collection('vars').document('topWomenCrews').set({"sailors": topCrews})

topSkippers = []
for p in sorted([p for p in people.values() if p.womenSkipperRankTR <= 100
                                            and p.womenSkipperRankTR != 0 
                                            and not set(p.seasons['skipper']).isdisjoint(targetTRSeasons)],key=lambda p: p.womenSkipperRankTR):
    topSkippers.append({'name': p.name,'key': p.key, 'year':p.year, 'rank': int(p.womenSkipperRankTR), 'team': list(p.teams),'gender': p.gender, 'rating': p.wtsr.ordinal(target=targetElo, alpha=200 / model.sigma), 'seasons': list(p.seasons['skipper'])})
doc = db.collection('vars').document('topWomenSkippersTR').set({"sailors": topSkippers})
topCrews = []
for p in sorted([p for p in people.values() if p.womenCrewRankTR <= 100 
                                            and p.womenCrewRankTR != 0 
                                            and not set(p.seasons['crew']).isdisjoint(targetTRSeasons)],key=lambda p: p.womenCrewRankTR):
    topCrews.append({'name': p.name,'key': p.key, 'year':p.year, 'rank': int(p.womenCrewRankTR), 'team': list(p.teams),'gender': p.gender, 'rating': p.wtcr.ordinal(target=targetElo, alpha=200 / model.sigma), 'seasons': list(p.seasons['crew'])})
doc = db.collection('vars').document('topWomenCrewsTR').set({"sailors": topCrews})

In [71]:
import json
flattened_dict = {p.key: {'team': p.teams[-1], 'year': p.year, 'name': p.name} for p in people.values() if 'f24' in p.seasons['skipper'] or 'f24' in p.seasons['crew']}
# print(flattened_dict)
doc = db.collection('vars').document('allSailors').set({'allSailors': json.dumps(flattened_dict, separators=(',', ':'))})

In [16]:
len(df_races_full['Regatta'].unique())

1724

In [None]:

# 'score': int(score), # Need to rewrite to include DNF and such (correctly evaluating score but its hard to tell )
#                 'pos': type,
#                 'predicted': pred[0],
#                 'ratio': 1 - ((int(score) - 1) / (len(racers) - 1)), # Calculate ratio here
#                 'change': change,
#                 'regAvg': regattaAvg,
#                 'cross': isCross,
#                 'outLinks': outLinks,
#                 'skipperRating': sailor.sr.ordinal(target=targetElo, alpha=200 / model.sigma), # add offset to prevent negative ratings
#                 'crewRating': sailor.cr.ordinal(target=targetElo, alpha=200 / model.sigma), # add offset to prevent negative ratings
#                 'womenSkipperRating': sailor.wsr.ordinal(target=targetElo, alpha=200 / model.sigma), # add offset to prevent negative ratings
#                 'womenCrewRating': sailor.wcr.ordinal(target=targetElo, alpha=200 / model.sigma), # add offset to prevent negative ratings
#                 'womens': womens,
#                 'date' :date,
#                 'partner': {'name': partner, 'link': partnerLink},
#                 'venue': venue,
#                 'raceID': actualID,
#                 'scoring': scoring
                
for regatta in list(df_races_full['Regatta'].unique()):
    races = df_races_full[df_races_full['Regatta'] == regatta]
    raceIDs = list(races['raceID'].unique())
    links = races['Sailor'].unique()
    date = races['Date'].unique()[0]
    venue = races['Venue'].unique()[0]
    scoring = races['scoring'].unique()[0]
    
    racePpl = [{
        "key":p.key, 
        "Name":p.name, 
        'Year': p.year,
        "Teams": list(p.teams),
        "Rating": int(p.r.mu),
        "GlobalRank": int(p.rank),
        "races": [{
                'sailor': p.name,
                'key': p.key,
                'pos': race['pos'],
                "raceID": race['raceID'],
                "score": float(race['score']),
                "predicted": int(race['predicted']), 
                "change": float(race['change']), 
                'newRating': float(race['newRating']),
                'partner':race['partner'],
                'ratio': float(race['ratio']),
                } for race in p.races if race['raceID'].split("/")[0] + "/" + race['raceID'].split("/")[1] == regatta]
        } for p in people.values() if p.name in links]
    
    # race = {'raceID':'', 'raceNum':0, 'div': '', 'sailors':[]}
    # person = {'name':'', 'rating':0, 'change':0, 'team': '', 'pos': '', 'div':'', 'partner': ''}
    doc = {'regattaName': regatta,'raceIDs':raceIDs,  'sailors': racePpl}
    
    # for race in races['raceID'].unique():
    #     sailors = races[races['raceID'] == race, 'Sailor'].unique()
    # for p in [p for p in people.values() if p.name in sailors]:
        # racePpl.append({'name':p.name, 'rating':p.rating, 'changes':p.changes, 'team': p.team, 'pos': '', 'div':'', 'partner': ''})
    # print(regatta)
    # db.collection('eloRegattas').document().set({'regattaName': regatta,'raceIDs':raceIDs, 'sailors': racePpl}, timeout=15)

s16/peter-wenner-rainbow-invite 2016-01-16 00:00:00
s16/jeff-simon 2016-01-23 00:00:00
s16/seisa-qualifier 2016-01-30 00:00:00
s16/harris-kempner 2016-02-06 00:00:00
s16/old-south 2016-02-13 00:00:00
s16/usf-women 2016-02-20 00:00:00
s16/nelson-roltsch 2016-02-20 00:00:00
s16/saisa-open 2016-02-20 00:00:00
s16/anteater-open 2016-02-20 00:00:00
s16/charleston-women 2016-02-27 00:00:00
s16/barnyard-bazaar 2016-02-27 00:00:00
s16/sea-to-sky 2016-02-27 00:00:00
s16/eckerd-interconference 2016-03-05 00:00:00
s16/odu-open 2016-03-05 00:00:00
s16/mustang-open 2016-03-05 00:00:00
s16/saisa-north-points 2016-03-05 00:00:00
s16/charleston-spring-coed 2016-03-12 00:00:00
s16/navy-spring-women 2016-03-12 00:00:00
s16/2016-sailpack-invitational 2016-03-12 00:00:00
s16/ice-breaker 2016-03-12 00:00:00
s16/south-points-ncf 2016-03-12 00:00:00
s16/saisa-sp 2016-03-12 00:00:00
s16/woollum 2016-03-12 00:00:00
s16/st-mary-women-interconference 2016-03-19 00:00:00
s16/west-canada-cup 2016-03-19 00:00:00
s1

KeyboardInterrupt: 

In [None]:
def delete_collection(coll_ref, batch_size):
    if batch_size == 0:
        return

    docs = coll_ref.list_documents(page_size=batch_size)
    deleted = 0

    for doc in docs:
        if deleted % 50 == 0:
            print(f"{deleted} Deleting doc {doc.id} => {doc.get().to_dict()}")
        doc.delete()
        deleted = deleted + 1

    if deleted >= batch_size:
        return delete_collection(coll_ref, batch_size)
col = db.collection('eloRegattas')
delete_collection(col, 400)