In [1]:
import os
import time
import fastf1
import logging
import warnings
import pandas as pd
from tqdm import tqdm
warnings.filterwarnings("ignore")

from concurrent.futures import ThreadPoolExecutor, as_completed

os.chdir('/Users/rohith/Documents/Projects/F1')
fastf1.Cache.enable_cache('.cache')  # replace with your cache directory
logging.getLogger('fastf1').setLevel(logging.ERROR)  # This will disable both logs and warnings
fastf1.Cache.offline_mode(True)

In [13]:
def get_lap_data(session):
    """
    Get lap data for a given session.
    Args:
        session (fastf1.core.Session): The session object from FastF1.
    Returns:
        pd.DataFrame: A DataFrame containing lap data with relevant columns.
    """
    lap = session.laps
    lap_prac = lap[(lap['IsAccurate']==True) & (lap['IsPersonalBest']==True)]
    lap_prac = lap_prac.groupby('Driver').apply(
        lambda x: x[x['LapTime'] == x['LapTime'].min()]).sort_values('LapTime').reset_index(drop=True)[['Driver','Team']]
    for col in lap_prac.select_dtypes(include='timedelta64[ns]').columns:
        lap_prac[col] = lap_prac[col].dt.total_seconds()
    lap_prac.reset_index(inplace=True)
    lap_prac.rename(columns={'index':'Position'},inplace=True)
    lap_prac['Position'] = lap_prac['Position'] + 1
    lap_prac['session'] = session.name
    lap_prac['country'] = session.event.EventName
    lap_prac['year'] = session.event.year
    lap_prac['Date'] = session.event.EventDate
    
    return lap_prac

def fetch_lap_data(year, race, race_type):
    try:
        session = fastf1.get_session(year, race, race_type)
        session.load()
        time.sleep(5)  # To avoid overwhelming the server

        return get_lap_data(session)
    except Exception as e:
        print(f"Could not load session for year: {year} and race {race} due to {e}")
        return None


In [16]:
qualifying_sessions = pd.DataFrame([])
race_type = 'Q'
tasks = [(year, race, race_type) for race in range(1, 25) for year in [2022, 2023, 2024, 2025]]

with ThreadPoolExecutor(max_workers=8) as executor:
    futures = [executor.submit(fetch_lap_data, year, race, race_type) for year, race, race_type in tasks]
    for future in tqdm(as_completed(futures), total=len(futures)):
        result = future.result()
        if result is not None:
            qualifying_sessions = pd.concat([qualifying_sessions, result], ignore_index=True)

current_drivers = qualifying_sessions[qualifying_sessions['year']==2025].Driver.unique().tolist()
print(f"Current Drivers: {current_drivers}")
circuits = qualifying_sessions['country'].unique().tolist()
print(f"Circuits: {circuits}")
team_mapping = {'Alfa Romeo': 1, 'AlphaTauri': 2, 'Alpine': 3, 'Renault':3,'Aston Martin': 4, 'Ferrari': 5, 'Haas F1 Team': 1, 'Kick Sauber': 6, 'McLaren': 7, 'Mercedes': 8, 'RB': 2, 'Racing Bulls': 2, 'Red Bull Racing': 9, 'Williams': 10}
print(f"Team Mapping: {team_mapping}")

driver_stats_list = []

Could not load session for year: 2025 and race 1 due to Failed to load any schedule data.


 17%|███████████████████                                                                                               | 16/96 [00:39<01:35,  1.19s/it]

Could not load session for year: 2024 and race 6 due to Invalid round: 6


 20%|██████████████████████▌                                                                                           | 19/96 [00:39<00:46,  1.64it/s]

Could not load session for year: 2025 and race 6 due to Failed to load any schedule data.


 79%|██████████████████████████████████████████████████████████████████████████████████████████▎                       | 76/96 [03:11<00:55,  2.76s/it]

Could not load session for year: 2025 and race 20 due to The data you are trying to access has not been loaded yet. See `Session.load`


 80%|███████████████████████████████████████████████████████████████████████████████████████████▍                      | 77/96 [03:17<01:07,  3.55s/it]

Could not load session for year: 2025 and race 21 due to The data you are trying to access has not been loaded yet. See `Session.load`


 85%|█████████████████████████████████████████████████████████████████████████████████████████████████▍                | 82/96 [03:20<00:14,  1.01s/it]

Could not load session for year: 2022 and race 23 due to Invalid round: 23
Could not load session for year: 2023 and race 23 due to Invalid round: 23
Could not load session for year: 2022 and race 24 due to Invalid round: 24
Could not load session for year: 2023 and race 24 due to Invalid round: 24


 94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▉       | 90/96 [03:27<00:06,  1.03s/it]

Could not load session for year: 2025 and race 22 due to The data you are trying to access has not been loaded yet. See `Session.load`
Could not load session for year: 2025 and race 23 due to The data you are trying to access has not been loaded yet. See `Session.load`
Could not load session for year: 2025 and race 24 due to The data you are trying to access has not been loaded yet. See `Session.load`


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 96/96 [03:33<00:00,  2.22s/it]

Current Drivers: ['PIA', 'RUS', 'NOR', 'VER', 'HAM', 'LEC', 'HAD', 'ANT', 'TSU', 'ALB', 'OCO', 'SAI', 'HUL', 'ALO', 'STR', 'GAS', 'BEA', 'DOO', 'BOR', 'LAW', 'COL']
Circuits: ['Bahrain Grand Prix', 'Saudi Arabian Grand Prix', 'Chinese Grand Prix', 'Australian Grand Prix', 'Japanese Grand Prix', 'Miami Grand Prix', 'Azerbaijan Grand Prix', 'Emilia Romagna Grand Prix', 'Spanish Grand Prix', 'Monaco Grand Prix', 'Canadian Grand Prix', 'Hungarian Grand Prix', 'Austrian Grand Prix', 'British Grand Prix', 'French Grand Prix', 'Belgian Grand Prix', 'Dutch Grand Prix', 'Italian Grand Prix', 'Singapore Grand Prix', 'Qatar Grand Prix', 'United States Grand Prix', 'Mexico City Grand Prix', 'Las Vegas Grand Prix', 'São Paulo Grand Prix', 'Abu Dhabi Grand Prix']
Team Mapping: {'Alfa Romeo': 1, 'AlphaTauri': 2, 'Alpine': 3, 'Renault': 3, 'Aston Martin': 4, 'Ferrari': 5, 'Haas F1 Team': 1, 'Kick Sauber': 6, 'McLaren': 7, 'Mercedes': 8, 'RB': 2, 'Racing Bulls': 2, 'Red Bull Racing': 9, 'Williams': 10}




In [30]:

for i in current_drivers:

    driver_sessions = qualifying_sessions[qualifying_sessions['Driver'] == i].sort_values('Date')
    recent_sessions = driver_sessions[driver_sessions['year'] == 2025]
    avg_driver_pos = driver_sessions.groupby('country')['Position'].mean().sort_values().to_dict()
    team_name = driver_sessions['Team'].values[-1]
    team_perf = qualifying_sessions[qualifying_sessions['Team'] == team_name].groupby('country')['Position'].mean().sort_values().to_dict()
    
    
    if 'Great Britain' in avg_driver_pos.keys() and 'United Kingdom' in avg_driver_pos.keys():
        avg_driver_pos['Great Britain'] = (avg_driver_pos['United Kingdom'] + avg_driver_pos['Great Britain'])/2
        avg_driver_pos.pop('United Kingdom')
    elif 'United Kingdom' in avg_driver_pos.keys() and  'Great Britain' not in avg_driver_pos.keys():
        avg_driver_pos['Great Britain'] = avg_driver_pos['United Kingdom']
        avg_driver_pos.pop('United Kingdom')
    
    for con in avg_driver_pos.keys():
        try : 
            lp = int(recent_sessions[recent_sessions['country'] == con]['Position'].iloc[0])
        except:
            lp = -1
        
        try:
            tf = float(team_perf[con])
        except:
            tf = -1
        driver_stats_list.append({
            'Name': i,
            'Avg_Position_Past': int(driver_sessions['Position'].mean()),
            'Best_Position_Past': int(driver_sessions['Position'].min()),
            'Worst_Position_Past': int(driver_sessions['Position'].max()),
            'Best_Position_Recent': int(recent_sessions['Position'].min()),
            'Worst_Position_Recent': int(recent_sessions['Position'].max()),
            'Avg_Position_Recent': int(recent_sessions['Position'].mean()),
            'Last_Position': lp,
            'Avg_Finish_in_this_circuit' : int(avg_driver_pos[con]),
            'Avg_Team_Finish_in_this_circuit' : tf,
            'Country': con,
            'Team': team_mapping[team_name]
        })

d = pd.DataFrame(driver_stats_list)
for i in d.columns:
    if i not in ['Name','Country']:
        d[i] = round(d[i]).astype(int)
d.to_csv('.cache/hist_data/team_driver_performance.csv',index=False)
