In [1]:
import fastf1
import pandas as pd
import numpy as np
from tqdm import tqdm
from src.utils import get_lap_data
import logging
import warnings
warnings.filterwarnings("ignore")
logging.getLogger('fastf1').setLevel(logging.ERROR)  # This will disable both logs and warnings
fastf1.Cache.enable_cache('./.cache')  # replace with your cache directory
#fastf1.Cache.offline_mode(True)

In [2]:
team_driver_performance = pd.read_csv('.cache/hist_data/team_driver_performance.csv')
Team_Mapping = {'Alfa Romeo': 1, 'AlphaTauri': 2, 'Alpine': 3, 'Aston Martin': 4, 'Ferrari': 5, 'Haas F1 Team': 1, 'Kick Sauber': 6, 'McLaren': 7, 'Mercedes': 8, 'RB': 2, 'Racing Bulls': 9, 'Red Bull Racing': 10, 'Williams': 11}

prac_columns = ['DriverNumber_prac','LapTime_prac', 'Sector1Time_prac', 'Sector2Time_prac', 'Sector3Time_prac', 'SpeedI1_prac', 'SpeedI2_prac', 'TyreLife_prac','Position_prac','Driver_prac']
old_qual_columns = ['LapTime_old_qual', 'Position_old_qual','Sector1Time_old_qual', 'Sector2Time_old_qual', 'Sector3Time_old_qual','Driver_old_qual']

In [13]:
session = fastf1.get_session(2023, 3, 'Q')

'Australian Grand Prix'

In [17]:
def load_data(race,year):
    no_prac = False
    no_qual = False
    e_name = ''
    # Load sessions 
    try:
        session = fastf1.get_session(year, race, 'Q')
        e_name = session.event.EventName
        session.load()
        lap_qual = get_lap_data(session)
    except Exception as e:
        print(f"Error loading qualifying session for {race} in {year}: {e}")
        lap_qual = None

    try:
        session_old_qual = fastf1.get_session(year-1,race,'Q')
        e_name = session_old_qual.event.EventName
        session_old_qual.load()
        lap_old_qual = get_lap_data(session_old_qual)
        lap_old_qual.columns = [f'{col}_old_qual' if col not in ['Team'] else col for col in lap_old_qual.columns]
    except Exception as e:
        print(f"Error loading old qualifying session for {race} in {year-1}: {e}")
        lap_old_qual = pd.DataFrame([[-1]*len(old_qual_columns)]*20,columns=old_qual_columns)  # Empty DataFrame if old qualifying session fails
        lap_old_qual['Driver_old_qual'] = lap_qual['Driver']
        no_qual = True

    try:
        session_p = fastf1.get_session(year,race,'FP3')
        session_p.load()
        e_name = session_p.event.EventName
        lap_prac = get_lap_data(session_p)
        lap_prac.columns = [f'{col}_prac' if col not in ['Team'] else col for col in lap_prac.columns]
    except Exception as e:
        print(f"Error loading practice session for {race} in {year}: {e}")
        lap_prac = pd.DataFrame([[-1]*len(prac_columns)]*20,columns=prac_columns)  # Empty DataFrame if practice session fails
        lap_prac['DriverNumber_prac'] = lap_qual['DriverNumber']
        lap_prac['Driver_prac'] = lap_qual['Driver']
        no_prac = True

    if no_prac and no_qual:
        return None, None, None
    else:
        lap_old_qual_clean = lap_old_qual[old_qual_columns]
        lap_prac_clean = lap_prac[prac_columns]
        final_lap = (
            lap_prac_clean
            .merge(lap_old_qual_clean, left_on='Driver_prac', right_on='Driver_old_qual')
            .merge(
                team_driver_performance[team_driver_performance['Country'] == e_name],
                left_on='Driver_prac', right_on='Name'
            )
            .drop(columns=['Driver_prac', 'Driver_old_qual','Name','Country'])
            .set_index('DriverNumber_prac')
        )
        final_lap['race_event'] = [race]*len(final_lap)
        
        if lap_qual is not None:
            y = lap_qual[['LapTime', 'Position', 'DriverNumber']].set_index('DriverNumber').loc[final_lap.index]
            y_laptime = y.pop('LapTime').to_list()
            y_position = y.pop('Position').to_list()
        else:
            y_laptime = [-1]*len(final_lap)
            y_position = [-1]*len(final_lap)


        return final_lap.values, y_laptime, y_position

In [None]:
import os

import concurrent.futures

error_X = []
X_train = []
y_train_lap = []
y_train_pos = []

pairs = []
for y in [2023, 2024, 2025]:
    rounds = range(1, 17) if y == 2025 else range(1, 25)
    for r in rounds:
        pairs.append((r, y))

acc_X = []
acc_y1 = []
acc_y2 = []

max_workers = 2
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as ex:
    fut_to_pair = {ex.submit(load_data, r, y): (r, y) for r, y in pairs}
    for fut in tqdm(concurrent.futures.as_completed(fut_to_pair), total=len(fut_to_pair)):
        r, y = fut_to_pair[fut]
        try:
            X, y1, y2 = fut.result()
        except Exception as e:
            print(f"Error for {r} {y}: {e}")
            error_X.append((r, y))
            continue

        # treat empty result
        if X is not None:
            if isinstance(X, np.ndarray) and X.size == 0:
                error_X.append((r, y))
                continue
            if len(X) == 0:
                error_X.append((r, y))
                continue

            acc_X.append(X)
            acc_y1.extend(y1)
            acc_y2.extend(y2)

  0%|                                                                                                             | 0/64 [00:00<?, ?it/s]

  5%|████▋                                                                                                | 3/64 [00:26<08:47,  8.64s/it]

Error loading practice session for 4 in 2023: Session type 'FP3' does not exist for this event


 14%|██████████████▏                                                                                      | 9/64 [00:59<05:10,  5.65s/it]

Error loading practice session for 9 in 2023: Session type 'FP3' does not exist for this event


 19%|██████████████████▊                                                                                 | 12/64 [01:16<04:34,  5.27s/it]

Error loading practice session for 12 in 2023: Session type 'FP3' does not exist for this event


 27%|██████████████████████████▌                                                                         | 17/64 [01:47<04:20,  5.54s/it]

Error loading practice session for 17 in 2023: Session type 'FP3' does not exist for this event


 28%|████████████████████████████▏                                                                       | 18/64 [01:49<03:35,  4.68s/it]

Error loading practice session for 18 in 2023: Session type 'FP3' does not exist for this event


 30%|█████████████████████████████▋                                                                      | 19/64 [01:57<04:16,  5.70s/it]

Error loading practice session for 20 in 2023: Session type 'FP3' does not exist for this event


 33%|████████████████████████████████▊                                                                   | 21/64 [02:09<04:27,  6.22s/it]

Error loading qualifying session for 23 in 2023: Invalid round: 23
Error loading old qualifying session for 23 in 2022: Invalid round: 23
Error for 23 2023: 'NoneType' object is not subscriptable
Error loading qualifying session for 24 in 2023: Invalid round: 24
Error loading old qualifying session for 24 in 2022: Invalid round: 24
Error for 24 2023: 'NoneType' object is not subscriptable


 45%|█████████████████████████████████████████████▎                                                      | 29/64 [02:42<03:02,  5.22s/it]

Error loading practice session for 5 in 2024: Session type 'FP3' does not exist for this event


 47%|██████████████████████████████████████████████▉                                                     | 30/64 [02:44<02:25,  4.29s/it]

Error loading practice session for 6 in 2024: Session type 'FP3' does not exist for this event


 55%|██████████████████████████████████████████████████████▋                                             | 35/64 [03:16<02:37,  5.42s/it]

Error loading practice session for 11 in 2024: Session type 'FP3' does not exist for this event


 67%|███████████████████████████████████████████████████████████████████▏                                | 43/64 [04:01<01:36,  4.59s/it]

Error loading practice session for 19 in 2024: Session type 'FP3' does not exist for this event


 69%|████████████████████████████████████████████████████████████████████▊                               | 44/64 [04:09<01:53,  5.65s/it]

Error loading practice session for 21 in 2024: Session type 'FP3' does not exist for this event


 72%|███████████████████████████████████████████████████████████████████████▉                            | 46/64 [04:17<01:26,  4.82s/it]

Error loading old qualifying session for 23 in 2023: Invalid round: 23
Error loading practice session for 23 in 2024: Session type 'FP3' does not exist for this event


 73%|█████████████████████████████████████████████████████████████████████████▍                          | 47/64 [04:19<01:11,  4.21s/it]

Error loading old qualifying session for 24 in 2023: Invalid round: 24


 77%|████████████████████████████████████████████████████████████████████████████▌                       | 49/64 [04:31<01:16,  5.11s/it]

Error loading practice session for 2 in 2025: Session type 'FP3' does not exist for this event


 83%|██████████████████████████████████████████████████████████████████████████████████▊                 | 53/64 [04:51<00:59,  5.43s/it]

Error loading practice session for 6 in 2025: Session type 'FP3' does not exist for this event


 86%|█████████████████████████████████████████████████████████████████████████████████████▉              | 55/64 [05:02<00:52,  5.85s/it]

Error for 7 2025: "['22'] not in index"


 95%|███████████████████████████████████████████████████████████████████████████████████████████████▎    | 61/64 [05:33<00:15,  5.17s/it]

Error loading practice session for 13 in 2025: Session type 'FP3' does not exist for this event


 98%|██████████████████████████████████████████████████████████████████████████████████████████████████▍ | 63/64 [05:45<00:05,  5.52s/it]

Error for 15 2025: "['18'] not in index"


100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [05:47<00:00,  5.43s/it]


ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has 2 dimension(s)

In [25]:
X_train = np.concat(acc_X)

# y_train_lap / y_train_pos might be lists or arrays in the notebook; handle both
if isinstance(y_train_lap, list):
    y_train_lap.extend(acc_y1)
else:
    y_train_lap = np.concatenate([y_train_lap, np.array(acc_y1)])

if isinstance(y_train_pos, list):
    y_train_pos.extend(acc_y2)
else:
    y_train_pos = np.concatenate([y_train_pos, np.array(acc_y2)])

In [26]:
X_train.shape, len(y_train_lap), len(y_train_pos), len(error_X)

((775, 25), 775, 775, 4)

In [27]:
#Save the training data
np.save('.cache/hist_data/X_train.npy',X_train)
np.save('.cache/hist_data/y_train_lap.npy',y_train_lap)
np.save('.cache/hist_data/y_train_pos.npy',y_train_pos)