# Creating this notebook to manually fix issues in the DB as they come up!

## Fix 1: discipline strings
extra whitespace in the discipline values

In [1]:
import pandas as pd
import numpy as np
import sqlite3

from queries import get_race_data, get_point_total, get_races_list, audit_df, get_race_years

In [2]:
# Set the database path to a location with write permissions
db_path = '../race_league_results.db'

# Connect to SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()


In [14]:
sql = """
UPDATE RaceResults
SET discipline = TRIM(discipline);
"""

cursor = conn.cursor()
cursor.execute(sql)
conn.commit()

In [15]:
sql = """
select discipline, COUNT(*)
from RaceResults
group by 1;
"""
df = pd.read_sql(sql, conn)

In [16]:
df

Unnamed: 0,discipline,COUNT(*)
0,,16
1,SKI,3900
2,SNBD,701


## Fixing the time trial data

In [78]:
def get_races_list(conn):
    return pd.read_sql_query(
        "select *, substr(description, 1,4) as year From Races order by race_date DESC"
        , conn
    )

In [79]:
race_list = get_races_list(conn)  # returns a DataFrame
races_by_year = {}

In [80]:
for y in years:
    races_for_year = race_list[race_list['year'] == y]
    races_list = races_for_year.to_dict('records')
    races_by_year[y] = races_list

In [81]:
races_by_year

{'2024': [{'race_id': 240218,
   'race_date': '2024-02-18 00:00:00',
   'description': '2024 F&G Race #4',
   'year': '2024'},
  {'race_id': 240211,
   'race_date': '2024-02-11 00:00:00',
   'description': '2024 F&G Race #3',
   'year': '2024'},
  {'race_id': 240121,
   'race_date': '2024-01-21 00:00:00',
   'description': '2024 F&G Race #2',
   'year': '2024'},
  {'race_id': 240114,
   'race_date': '2024-01-14 00:00:00',
   'description': '2024 F&G Race #1',
   'year': '2024'}],
 '2023': [{'race_id': 3039,
   'race_date': '2023-02-19 00:00:00',
   'description': '2023 F&G Race#4',
   'year': '2023'},
  {'race_id': 3038,
   'race_date': '2023-02-12 00:00:00',
   'description': '2023 F&G Race#3',
   'year': '2023'},
  {'race_id': 3037,
   'race_date': '2023-02-05 00:00:00',
   'description': '2023 F&G Race#2',
   'year': '2023'},
  {'race_id': 3036,
   'race_date': '2023-01-15 00:00:00',
   'description': '2023 F&G Race#1',
   'year': '2023'},
  {'race_id': 3035,
   'race_date': '2022-1

In [None]:

races_metadata = {
    "years": years,
    "races": races_by_year
}

In [17]:
sql = """
select *
from RaceResults
where discipline = 'SNBD'
limit 5
"""
df = pd.read_sql(sql, conn)

In [18]:
df

Unnamed: 0,racer_id,discipline,team,tier,run1,run2,best_time,points,race_id
0,kevinkilmerchoi,SNBD,,,51.22,,51.22,,1
1,robinmanley,SNBD,,,52.5,,52.5,,1
2,bernardoegema,SNBD,,,54.53,,54.53,,1
3,terencewoodside,SNBD,,,55.23,,55.23,,1
4,kimlivingston,SNBD,,,56.61,,56.61,,1


In [23]:

sql = """
SELECT 
    year, 
    discipline,
    SUM(cnt) AS racer_cnt
FROM (
    select discipline, race_id, COUNT(*) as cnt
    from RaceResults
    group by 1,2
) AS race 
LEFT JOIN (
    select race_id, strftime('%Y', race_date) AS year
    from Races
) AS team
ON race.race_id = team.race_id
WHERE discipline = 'SNBD'
GROUP BY 1,2

"""
df = pd.read_sql(sql, conn)
df

Unnamed: 0,year,discipline,racer_cnt
0,2013,SNBD,108
1,2014,SNBD,89
2,2015,SNBD,69
3,2016,SNBD,72
4,2017,SNBD,95
5,2018,SNBD,78
6,2019,SNBD,51
7,2020,SNBD,33
8,2022,SNBD,22
9,2024,SNBD,84


In [32]:
# should be results in 2022
year=2022
sql = f"""
SELECT race.*, team.*
FROM (
    select *
    from RaceResults
) AS race 
JOIN (
    select race_id, race_date, strftime('%Y', race_date) AS year
    from Races
    WHERE strftime('%Y', race_date) = '{year}'
) AS team
ON race.race_id = team.race_id
WHERE discipline = 'SNBD'
;
"""
df = pd.read_sql(sql, conn)

In [34]:
df.shape, df.head(2)

((22, 12),
            racer_id discipline  team  tier   run1  run2  best_time points  \
 0  andrewhildebrand       SNBD  None  None  31.76  None      31.76   None   
 1   kevinkilmerchoi       SNBD  None  None  36.76  None      36.76   None   
 
    race_id  race_id            race_date  year  
 0     2034     2034  2022-01-09 00:00:00  2022  
 1     2034     2034  2022-01-09 00:00:00  2022  )

In [35]:
# should be results in 2022
race_id=2034
sql = f"""
select racer_id, discipline, team, tier, run1, run2, best_time, points
from RaceResults
where race_id = {race_id}
;
"""
df2 = pd.read_sql(sql, conn)

In [36]:
df2

Unnamed: 0,racer_id,discipline,team,tier,run1,run2,best_time,points
0,brandonhune,SKI,,,24.32,,24.32,
1,michaelmctaggart,SKI,,,24.59,,24.59,
2,jeffcox,SKI,,,24.66,,24.66,
3,lauracoward,SKI,,,24.71,,24.71,
4,mcleanwood,SKI,,,24.72,,24.72,
...,...,...,...,...,...,...,...,...
61,kieranarnold,SNBD,,,40.07,,40.07,
62,lindaleistner,SNBD,,,40.96,,40.96,
63,billvangroningen,SNBD,,,41.88,,41.88,
64,stephencrawford,SNBD,,,44.41,,44.41,


### HERE! Why doesn't this work?!!?!? -> Lets compare to what is in the json...

In [37]:
import json
import pprint  # optional, but handy for "pretty-printing"

json_file_path = "../docs/data/race_results.json"  # Adjust to your actual file path
with open(json_file_path, 'r') as f:
    data = json.load(f)

In [38]:
filtered_rows = [row for row in data["rows"] if row[-1] == race_id]

In [44]:
discipline_selected = 'SNBD'
race_id=2034

In [45]:
# Find the indices for race_id and discipline
race_id_index = 9
discipline_index = 2

# Filter rows based on matching race_id and discipline
filtered_rows = [
    row
    for row in data["rows"]
    if row[race_id_index] == race_id and row[discipline_index] == discipline_selected
]


In [55]:
filtered_rows[0]

[None, 'andrewhildebrand', 'SNBD', None, None, 31.76, None, 31.76, None, 2034]

In [40]:
df2[df2.discipline == 'SNBD']

Unnamed: 0,racer_id,discipline,team,tier,run1,run2,best_time,points
59,andrewhildebrand,SNBD,,,31.76,,31.76,
60,kevinkilmerchoi,SNBD,,,36.76,,36.76,
61,kieranarnold,SNBD,,,40.07,,40.07,
62,lindaleistner,SNBD,,,40.96,,40.96,
63,billvangroningen,SNBD,,,41.88,,41.88,
64,stephencrawford,SNBD,,,44.41,,44.41,
65,cindyrossignol,SNBD,,,68.01,,68.01,
