In [26]:
import numpy as np
import pandas as pd 
import fastf1 as f 
from pathlib import Path
from datetime import datetime

In [27]:
cwd = Path.cwd()
parent_path = cwd.parent

In [28]:
cache_path = parent_path / "Cache"
print(f'A cache for loading FastF1 data will be created in this directory: {cache_path}')

Path.mkdir(cache_path, exist_ok=True)

f.Cache.enable_cache(cache_path)

A cache for loading FastF1 data will be created in this directory: d:\Projects\F1-Visualization\Cache


In [29]:
data_path = parent_path / "Data"
print(f"Relevant data and CSV files will be stored in this directory: {data_path}")

Path.mkdir(data_path, exist_ok=True)

Relevant data and CSV files will be stored in this directory: d:\Projects\F1-Visualization\Data


In [30]:
pause = input("You can optionally configure which seasons' data to update in this cell. The default (enter nothing) is to update all seasons starting from 2018.")

current_season = 2023

# enter the seasons whose data you wish to load here, separated by commas 
# example: load_seasons = [2021, 2022]
load_seasons = []

if load_seasons == []:
    load_seasons = list(range(2018, current_season + 1))

In [31]:
num_rounds = {2018:21, 2019:21, 2020:17, 2021:22, 2022:22, 2023:24}

current_schedule = f.get_event_schedule(current_season)
rounds_completed = current_schedule[current_schedule["EventDate"] < datetime.now()]["RoundNumber"].max()

if pd.isna(rounds_completed):
    rounds_completed = 0

print(f"Correctness Check: {rounds_completed} rounds of the {current_season} season have been completed")
num_rounds[current_season] = rounds_completed

events        ERROR 	Failed to access primary schedule backend. Falling back to Ergast! Reason: Trailing data)


Correctness Check: 0 rounds of the 2023 season have been completed


In [32]:
def load_all_data(season, path):
    # assumes there is no data for the season yet
    # data will be stored at the location specified by path as a csv

    race_dfs = []
    schedule = f.get_event_schedule(season)

    for i in range(1, num_rounds[season] + 1):
        race = f.get_session(season, i, 'R')
        race.load()
        laps = race.laps
        laps["RoundNumber"] = i
        laps["EventName"] = schedule[schedule["RoundNumber"] == i]["EventName"].item()
        race_dfs.append(laps)
    
    if race_dfs:
        all_laps = pd.concat(race_dfs, ignore_index=True)
        all_laps.to_csv(path)
        print(f"Finished loading {season} season data.")
    else:
        print(f"No data available for {season} season yet.")
        
    return None

In [33]:
def update_data(season, path):
    existing_data = pd.read_csv(path, index_col=0, header=0)

    schedule = f.get_event_schedule(season)

    loaded_rounds = set(pd.unique(existing_data["RoundNumber"]))
    newest_round = num_rounds[season]
    all_rounds = set(range(1, newest_round + 1))
    missing_rounds = all_rounds.difference(loaded_rounds)

    if not missing_rounds:
        print(f"{season} season is already up to date.")
        return None
    else:
        # correctness check 
        print("Existing coverage: ", loaded_rounds)
        print("Coverage to be added: ", missing_rounds)

    race_dfs = []

    for i in missing_rounds:
        race = f.get_session(2022, i, 'R')
        race.load()
        laps = race.laps
        laps["RoundNumber"] = i
        laps["EventName"] = schedule.loc[schedule["RoundNumber"] == i]["EventName"].item()
        race_dfs.append(laps)

    all_laps = pd.concat(race_dfs, ignore_index=True)
    
    all_laps.to_csv(path, mode='a')
    print(f"Finished updating {season} season data.")
    return None    

In [35]:
for season in load_seasons:
    path = parent_path / "Data" / ("all_laps_" + str(season) + ".csv")
    
    if Path.is_file(path):
        update_data(season, path)
    else:
        load_all_data(season, path)

2018 season is already up to date.
2019 season is already up to date.
2020 season is already up to date.
2021 season is already up to date.
2022 season is already up to date.


events        ERROR 	Failed to access primary schedule backend. Falling back to Ergast! Reason: Trailing data)


No data available for 2023 season yet.
