In [1]:
import pandas as pd
from datetime import datetime

In [2]:
result_cols = ["Bib", "Name", "NA", "Run1", "Run2"]
result_cols_keep = ["Bib", "Name", "Run1", "Run2"]
startList_cols_keep = ["Bib", "Name", "Team", "Club"]

In [3]:
results = pd.read_csv('data/140124FnGResultsBoth.csv', header=None, names=result_cols)[result_cols_keep]
startList = pd.read_csv('data/140124FnGStartList.csv')[startList_cols_keep]

In [4]:
startList.columns = ["Bib", "Name", "Tier", "Club"]

In [5]:
results.head()

Unnamed: 0,Bib,Name,Run1,Run2
0,2,Maurice Cacho,34.16,32.71
1,4,Justin Rosenberg,35.81,33.66
2,5,David Rosenblatt,31.44,30.99
3,6,Adam Szakacs,37.8,36.13
4,7,Eric Rosen,33.05,32.19


In [6]:
startList.head()

Unnamed: 0,Bib,Name,Tier,Club
0,1,Jennifer Hsiung,1,Mitch Perreault
1,2,Maurice Cacho,1,Will Carter
2,3,Talia Laurie,1,Graham Ramshaw
3,4,Justin Rosenberg,1,Stephanie Coward
4,5,David Rosenblatt,1,Mike McTaggart


In [7]:
combined = startList.merge(results, how="left", on=["Bib", "Name"])

In [8]:
def calculate_points_corrected(df):
    df.replace({'DNF': 9998, pd.NA: 9999}, inplace=True)
    df['Run1'] = pd.to_numeric(df['Run1']) # errors='coerce')
    df['Run2'] = pd.to_numeric(df['Run2']) # errors='coerce')
    df['Best Time'] = df[['Run1', 'Run2']].min(axis=1)
    # Initialize a column for points
    df['Points'] = 0
    
    # Process each tier
    for tier in df['Tier'].unique():
        # Filter the tier
        tier_df = df[df['Tier'] == tier]
    
        # Sort by best time
        tier_sorted = tier_df.sort_values(by='Best Time')
    
        # Assign points based on the number of racers in the tier
        num_racers = len(tier_sorted)
        tier_sorted['Points'] = range(8, 8-num_racers, -1)
    
        # Set absent racer points to zero:
        tier_sorted.loc[tier_sorted['Best Time'] == 9999, 'Points'] = 0
        
        # Handle ties for DNF (split points for racers with 9998 as their best time)
        dnf_racers = tier_sorted[tier_sorted['Best Time'] == 9998]
        if not dnf_racers.empty:
            dnf_points = dnf_racers['Points'].sum() / len(dnf_racers)
            tier_sorted.loc[tier_sorted['Best Time'] == 9998, 'Points'] = dnf_points
    
        # Update the main dataframe
        df.loc[df['Tier'] == tier, 'Points'] = tier_sorted['Points']

    # Calculate team points
    teams = df['Club'].unique()
    team_points = {}
    team_dfs = []
    for team in teams:
        team_df = df[df['Club'] == team]
        # If a team is missing a racer in a tier, give them the average points of that tier
        for tier in df['Tier'].unique():
            if tier not in team_df['Tier'].values:
                avg_points = df[df['Tier'] == tier]['Points'].mean()
                #team_df = team_df.append({'Club': team, 'Tier': tier, 'Points': avg_points}, ignore_index=True)
                # Using loc to append to the dataframe instead of append method
                #team_df.loc[-1] = [pd.NA, pd.NA, tier, team, pd.NA, pd.NA, pd.NA, avg_points]
                team_df.loc[len(team_df)] = [pd.NA, pd.NA, tier, team, pd.NA, pd.NA, pd.NA, avg_points]
        team_points[team] = team_df['Points'].sum()
        team_dfs.append(team_df)
    
    df_out = pd.concat(team_dfs)
    return team_points, df_out

In [9]:
team_points, df_out = calculate_points_corrected(combined)

  team_df.loc[len(team_df)] = [pd.NA, pd.NA, tier, team, pd.NA, pd.NA, pd.NA, avg_points]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_df.loc[len(team_df)] = [pd.NA, pd.NA, tier, team, pd.NA, pd.NA, pd.NA, avg_points]


In [10]:
team_points

{'Mitch Perreault': 52,
 'Will Carter': 73,
 'Graham Ramshaw ': 37,
 'Stephanie Coward': 51.0,
 'Mike McTaggart': 64,
 'Don French ': 67,
 'Joanna Perreault': 46,
 'Adam Grossman': 68}

In [11]:
df_out.to_csv("140124_scores_groupbyClub.csv")

In [13]:
df_out.sort_values(["Tier", 'Points'], ascending=False).to_csv("140124_scores_groupbyTier.csv")

In [106]:
df_out[df_out.Tier == 13].sort_values(["Tier", 'Points'], ascending=False).head(10)

Unnamed: 0,Bib,Name,Tier,Club,Run1,Run2,Best Time,Points
99,120,Hannah Kilmer Choi,13,Will Carter,38.07,9999.0,38.07,8.0
98,119,Jenna Livingston,13,Stephanie Coward,42.45,41.35,41.35,7.0
100,121,Graham Ramshaw,13,Graham Ramshaw,44.74,42.34,42.34,6.0
101,122,Kevin Kilmer Choi,13,Adam Grossman,42.47,9999.0,42.47,5.0
95,116,Melanie Leistner Lavoie,13,Mike McTaggart,42.87,42.92,42.87,4.0
102,123,Bernard Oegema,13,Don French,46.0,44.35,44.35,3.0
96,117,Kyla Taylor,13,Mitch Perreault,52.1,50.8,50.8,2.0
97,118,Ashley Ghan,13,Joanna Perreault,9999.0,9999.0,9999.0,0.0


In [107]:
df_out[df_out.Tier == 12].sort_values(["Tier", 'Points'], ascending=False).head(10)

Unnamed: 0,Bib,Name,Tier,Club,Run1,Run2,Best Time,Points
93,114,Sacha Fiand,12,Mitch Perreault,40.94,40.51,40.51,8.0
89,110,Robert Milthorpe,12,Don French,44.12,42.71,42.71,7.0
91,112,Darcy McDonald,12,Adam Grossman,46.96,46.54,46.54,6.0
94,115,Linda Leistner,12,Will Carter,47.37,48.1,47.37,5.0
87,108,Daniel Simmons Stubbs,12,Mike McTaggart,48.16,9999.0,48.16,4.0
90,111,Sheri Ramshaw,12,Graham Ramshaw,50.37,51.92,50.37,3.0
92,113,Ethan Ghan,12,Stephanie Coward,9999.0,9999.0,9999.0,0.0
88,109,Mackenzie Hamilton,12,Joanna Perreault,9999.0,9999.0,9999.0,0.0


In [108]:
df_out[df_out.Tier == 11].sort_values(["Tier", 'Points'], ascending=False).head(10)

Unnamed: 0,Bib,Name,Tier,Club,Run1,Run2,Best Time,Points
80,101.0,Joel Farber,11,Will Carter,43.0,42.55,42.55,8.0
82,103.0,Mark Sandell,11,Graham Ramshaw,48.02,43.83,43.83,7.0
86,107.0,Terence Woodside,11,Mike McTaggart,44.26,9999.0,44.26,6.0
12,,,11,Stephanie Coward,,,,5.0
83,104.0,Steve Crawford,11,Don French,45.49,44.92,44.92,5.0
81,102.0,Sarah Ledwidge,11,Joanna Perreault,47.79,47.05,47.05,4.0
84,105.0,Elaine Kilmer Choi,11,Mitch Perreault,49.19,47.15,47.15,3.0
85,106.0,Dena Silverberg,11,Adam Grossman,53.63,56.86,53.63,2.0


In [110]:
for i in range (10, 0, -1):
    print(df_out[df_out.Tier == i].sort_values(["Tier", 'Points'], ascending=False).shape)

(8, 8)
(8, 8)
(8, 8)
(8, 8)
(8, 8)
(8, 8)
(8, 8)
(8, 8)
(8, 8)
(8, 8)
