In [1]:
import pandas as pd
import numpy as np
import sqlite3

In [2]:
from helper_functions import upload_new_race_results

In [3]:
# Set the database path to a location with write permissions
db_path = '../race_league_results.db'

# Connect to SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()


## Add 2024 races:

In [4]:
path = 'data/2024/240218FnGResultsBoth.csv'
race_date = '02/18/2024'
race_id = 240218
description = '2024 F&G Race #4'
N_tiers=13
N_teams=8 

""" # Comment out for now
upload_new_race_results(
    path,
    race_date,
    race_id,
    description,
    N_tiers,
    N_teams,
    conn
)
"""

' # Comment out for now\nupload_new_race_results(\n    path,\n    race_date,\n    race_id,\n    description,\n    N_tiers,\n    N_teams,\n    conn\n)\n'

# Adding Bib to the race data... starting in 2025!
So that time trials have bibs too! 

In [5]:
sql = """
ALTER TABLE RaceResults
ADD COLUMN bib INTEGER;
"""

cursor = conn.cursor()
cursor.execute(sql)
conn.commit()

In [8]:
df = pd.read_sql("""
    SELECT * from RaceResults limit 2
    """,
    conn
)

In [9]:
df

Unnamed: 0,racer_id,discipline,team,tier,run1,run2,best_time,points,race_id,bib
0,jeffcox,SKI,,,35.82,,35.82,,1,
1,mcleanwood,SKI,,,36.1,,36.1,,1,


# Fixed bug, get points and audit are working as expected! 

In [5]:
from data.queries import get_point_total, audit_df

In [6]:
foo = audit_df(2024, conn)

In [7]:
foo.to_csv('full_audit_2024.csv')

In [8]:
bar = get_point_total(2024, conn)
bar

Unnamed: 0,team,team_points
0,Will Carter,231.0
1,Don French,229.0
2,Mike McTaggart,205.0
3,Graham Ramshaw,188.5
4,Stephanie Coward,187.0
5,Joanna Perreault,186.5
6,Adam Grossman,182.0
7,Mitch Perreault,181.0


In [None]:
# Commit the changes and close the connection
conn.commit()
conn.close()

# Initial set up

## set up racers table

In [5]:
racers_df = pd.read_csv('data/raw/FnGRacers.csv')
racers_df.drop(columns=["spouse_id"],inplace=True)
racers_df = drop_nulls(racers_df, "racer_id")
racers_df["racer_id"]=racers_df.first_name.str.lower() + racers_df.last_name.str.lower()
racers_df["racer_id"]=racers_df["racer_id"].apply(clean_string)
racers_df.head()

Unnamed: 0,racer_id,first_name,last_name,gender,birth_year
2,bradabbott,Brad,Abbott,M,1955.0
3,toriabbott,Tori,Abbott,F,
4,jayadlington,Jay,Adlington,M,1975.0
5,suealexanderash,Sue,Alexander Ash,-,1961.0
6,christineallan,Christine,Allan,F,1968.0


In [6]:
racers_df.shape

(309, 5)

In [7]:
racers_df.to_sql('Racers', conn, if_exists='replace', index=False)
conn.commit()

## set up races table

In [8]:
races_df = pd.read_csv('data/raw/FnGRaces.csv')
races_df['race_date'] = pd.to_datetime(races_df['race_date'], format='%m/%d/%Y')

In [9]:
races_df.to_sql('Races', conn, if_exists='replace', index=False)
conn.commit()

In [10]:
races_df.head()

Unnamed: 0,race_id,race_date,description
0,3039,2023-02-19,2023 F&G Race#4
1,3038,2023-02-12,2023 F&G Race#3
2,3037,2023-02-05,2023 F&G Race#2
3,3036,2023-01-15,2023 F&G Race#1
4,3035,2022-12-31,2023 Time Trials


## set up race results table:

In [4]:
from data.helper_functions import RACE_RESULT_COL_ORDER

In [37]:
results_df = pd.read_csv('data/raw/FnGResults.csv')
race_result_col_order = RACE_RESULT_COL_ORDER

In [38]:
results_df['racer_id'] = (
    results_df['racer_id']
    .str.split(', ')
    .apply(lambda x: x[1].lower() + x[0].lower())
    .apply(clean_string)
)

In [39]:
results_df["run1"] = results_df["_time"]
results_df["run2"] = np.nan
results_df["best_time"] = results_df["_time"]
results_df["points"] = np.nan
results_df["team"] = np.nan
results_df["tier"] = np.nan

results_df.drop(columns=["_time"],inplace=True)

In [40]:
results_df[race_result_col_order].head()

Unnamed: 0,racer_id,discipline,team,tier,run1,run2,best_time,points,race_id
0,jeffcox,SKI,,,35.82,,35.82,,1
1,mcleanwood,SKI,,,36.1,,36.1,,1
2,jeffparr,SKI,,,36.95,,36.95,,1
3,derekcrawford,SKI,,,36.97,,36.97,,1
4,brandonhune,SKI,,,37.93,,37.93,,1


In [41]:
results_df[race_result_col_order].to_sql('RaceResults', conn, if_exists='replace', index=False)
conn.commit()

## set up race team data:

In [21]:
teams_df = pd.read_csv('data/raw/24FnGStartList.csv')
teams_df = drop_nulls(teams_df, "name")
teams_df["racer_id"] = teams_df["name"].apply(clean_string)
teams_df.columns = ["bib", "discipline", "name", "tier", "team", "racer_id"]
teams_df["year"] = 2024

In [22]:
teams_df

Unnamed: 0,bib,discipline,name,tier,team,racer_id,year
0,1,SKI,Jennifer Hsiung,1,Mitch Perreault,jenniferhsiung,2024
1,2,SKI,Maurice Cacho,1,Will Carter,mauricecacho,2024
2,4,SKI,Justin Rosenberg,1,Stephanie Coward,justinrosenberg,2024
3,5,SKI,David Rosenblatt,1,Mike McTaggart,davidrosenblatt,2024
4,6,SKI,Adam Szakacs,1,Don French,adamszakacs,2024
...,...,...,...,...,...,...,...
95,119,SNBD,Jenna Livingston,13,Stephanie Coward,jennalivingston,2024
96,120,SNBD,Hannah Kilmer Choi,13,Will Carter,hannahkilmerchoi,2024
97,121,SNBD,Graham Ramshaw,13,Graham Ramshaw,grahamramshaw,2024
98,122,SNBD,Kevin Kilmer Choi,13,Adam Grossman,kevinkilmerchoi,2024


In [23]:
teams_df.to_sql('Teams', conn, if_exists='replace', index=False)
conn.commit()

# Queries test:

In [11]:
from data.queries import get_table_schema, get_race_data, get_point_total, get_races_list

In [12]:
races = get_races_list(conn)

In [13]:
races.head(2)

Unnamed: 0,race_id,race_date,description
0,240218,2024-02-18 00:00:00,2024 F&G Race #4
1,240211,2024-02-11 00:00:00,2024 F&G Race #3


In [14]:
order_by = 'team, bib'
order_by = 'bib'

race_id = int(races.head(1).race_id[0])
year=2024

foo = get_race_data(order_by, race_id, year, conn)
foo

Unnamed: 0,bib,name,discipline,team,tier,run1,run2,best_time,points
0,1,Jennifer Hsiung,SKI,Mitch Perreault,1.0,54.02,52.80,52.80,2.0
1,2,Maurice Cacho,SKI,Will Carter,1.0,50.27,45.89,45.89,6.0
2,4,Justin Rosenberg,SKI,Stephanie Coward,1.0,46.74,46.66,46.66,5.0
3,5,David Rosenblatt,SKI,Mike McTaggart,1.0,44.44,44.77,44.44,8.0
4,6,Adam Szakacs,SKI,Don French,1.0,51.73,50.88,50.88,3.0
...,...,...,...,...,...,...,...,...,...
95,119,Jenna Livingston,SNBD,Stephanie Coward,13.0,27.78,27.40,27.40,7.0
96,120,Hannah Kilmer Choi,SNBD,Will Carter,13.0,25.41,25.34,25.34,8.0
97,121,Graham Ramshaw,SNBD,Graham Ramshaw,13.0,28.53,28.58,28.53,6.0
98,122,Kevin Kilmer Choi,SNBD,Adam Grossman,13.0,9998.00,37.08,37.08,2.0


In [15]:
foo.groupby("team").agg({"points":"sum"}).sort_values("points", ascending=False)

Unnamed: 0_level_0,points
team,Unnamed: 1_level_1
Mitch Perreault,67.0
Don French,66.0
Stephanie Coward,58.0
Will Carter,57.0
Graham Ramshaw,52.0
Joanna Perreault,51.0
Mike McTaggart,39.0
Adam Grossman,29.0
