# Warning

You should ***not*** use `run all` in this notebook. 

Run the following five cells first.

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 
import fastf1 as f 
import os
from datetime import datetime

In [None]:
# default to creating a cache directory within the current directory
cache_path = os.getcwd() + "\\Cache"

In [None]:
print('A cache for loading FastF1 data will be created in this directory: {}'.format(cache_path))
response = ''
    
while (response != 'Y' and response != 'N'):
    response = input("Enter Y to continue or N to terminate running and reset cache path.")
    if response == 'N':
        # Trigger assertion error to terminate cell 
        assert False

In [None]:
f.Cache.enable_cache(cache_path)

In [None]:
schedule_2021 = f.get_event_schedule(2021)
schedule_2022 = f.get_event_schedule(2022)

Now, decide whether you should run the first time loading section or the incremental load section.

## First Time Loading

*If you have loaded the data before and is adding new grand prix in 2022 only, skip to the incremental load section*

***Only run one of these two sections!***

In [None]:
num_rounds_2021 = 22

In [None]:
race_dfs = []

for i in range(1, num_rounds_2021+1):
    race = f.get_session(2021, i, 'R')
    race.load()
    laps = race.laps
    laps["RoundNumber"] = i
    laps["EventName"] = schedule_2021[schedule_2021["RoundNumber"] == i]["EventName"].item()
    race_dfs.append(laps)

all_laps_2021 = pd.concat(race_dfs, ignore_index=True)

In [None]:
all_laps_2021.to_csv("all_laps_2021.csv")

In [None]:
# Find the number of rounds of the 2022 season that has happened already
num_rounds_2022 = schedule_2022[schedule_2022["EventDate"] < datetime.now()]["RoundNumber"].max()

In [None]:
race_dfs = []

for i in range(1, num_rounds_2022 + 1):
    race = f.get_session(2022, i, 'R')
    race.load()
    laps = race.laps
    laps["RoundNumber"] = i
    laps["EventName"] = schedule_2022.loc[schedule_2022["RoundNumber"] == i]["EventName"].item()
    race_dfs.append(laps)

all_laps_2022 = pd.concat(race_dfs, ignore_index=True)

In [None]:
all_laps_2022.to_csv("all_laps_2022.csv")

## Incremental Load

For updating 2022 data

In [None]:
all_laps_2022 = pd.read_csv("all_laps_2022.csv", parse_dates=["LapStartDate"], infer_datetime_format=True, index_col=0, header=0, true_values=["True"], false_values=["False"])
all_laps_2022[["Time", "LapTime", "PitInTime", "PitOutTime", "Sector1Time", "Sector2Time", "Sector3Time", "Sector1SessionTime", "Sector2SessionTime", "Sector3SessionTime", "LapStartTime"]] = all_laps_2022[["Time", "LapTime", "PitInTime", "PitOutTime", "Sector1Time", "Sector2Time", "Sector3Time", "Sector1SessionTime", "Sector2SessionTime", "Sector3SessionTime", "LapStartTime"]].apply(pd.to_timedelta)

In [None]:
loaded_round_numbers = pd.unique(all_laps_2022["RoundNumber"])
newest_round_number = schedule_2022[schedule_2022["EventDate"] < datetime.now()]["RoundNumber"].max()

In [None]:
race_dfs = [all_laps_2022]

for i in range(1, newest_round_number+1):
    if i not in loaded_round_numbers:
        race = f.get_session(2022, i, 'R')
        race.load()
        laps = race.laps
        laps["RoundNumber"] = i
        laps["EventName"] = schedule_2022.loc[schedule_2022["RoundNumber"] == i]["EventName"].item()
        race_dfs.append(laps)

all_laps_2022 = pd.concat(race_dfs, ignore_index=True)

In [None]:
all_laps_2022.to_csv("all_laps_2022.csv")