In [24]:
import pandas as pd
import os  
from datetime import timedelta, datetime
from fastf1.ergast import Ergast

ergast = Ergast()

#parent_file = Path(__file__).resolve().parent.parent
parent_file = '/home/kurios/Documents/f1_analysis/'

year = 2025
race_quarter = 1
race_session = 'R'

#year = int(input("Year ? "))
#race_quarter = int(input("Race Quarter ? (1,4) "))
#race_session = input('Session ?  (S, R) ')

def race_quarter_value(race_quarter):
    match race_quarter:
        case 1 :
            return range(1, 7)
        case 2 :
            race_numbers = range(6, 13)
            return race_numbers
        case 3 :
            race_numbers = range(12, 19)
            return race_numbers
        case 4 :
            race_numbers = range(18, 25)
            return race_numbers

def parse_laptime_str(lap_string):
    """Parses a lap time string (MM:SS.fff) into a timedelta object."""
    if isinstance(lap_string, str) and ':' in lap_string and '.' in lap_string:
        minutes, rest = lap_string.split(':')
        seconds, milliseconds = rest.split('.')
        return timedelta(minutes=int(minutes), seconds=int(seconds), milliseconds=int(milliseconds))
    else:
        return timedelta(0)


In [None]:
race_years = list(race_quarter_value(race_quarter))
race_recaps = {}
zero_time = timedelta(0)
threshold_multiplier = 1.5

for race_number in race_years:
    keyword = f'{race_number}_{race_session}_race_info'
    driver_data_file = None
    for fname in os.listdir(parent_file + 'data/processed/'):
        if keyword in fname:
            driver_data_file = fname
            break

    if driver_data_file:
        os.chdir(parent_file + 'data/processed/')
        try:
            arr = pd.read_csv(driver_data_file)
            num_rows = len(arr)

            temp_recap = {}

            for i in range(num_rows):
                try:
                    name_driver_1 = arr['driver_1_name'][i]
                    name_driver_2 = arr['driver_2_name'][i]

                    iqr_driver_1 = float(arr['iqr_driver_1'][i][:-2])
                    iqr_driver_2 = float(arr['iqr_driver_2'][i][:-2])

                    laptime_driver_1_str = arr['avg_laptime_driver_1'][i]
                    laptime_driver_2_str = arr['avg_laptime_driver_2'][i]

                    fastest_lap_driver_1_str = arr['fastest_driver_1'][i]  # Corrected: No slicing
                    fastest_lap_driver_2_str = arr['fastest_driver_2'][i]  # Corrected: No slicing
                    safety_car_lap_str = arr['safety_car_lap'][i]      # Corrected: No slicing

                    avg_seconds_driver_1 = parse_laptime_str(laptime_driver_1_str)
                    avg_seconds_driver_2 = parse_laptime_str(laptime_driver_2_str)

                    if (iqr_driver_1 != 0 and iqr_driver_2 != 0 and avg_seconds_driver_1 != zero_time and avg_seconds_driver_2 != zero_time
                            and avg_seconds_driver_1 <= threshold_multiplier * avg_seconds_driver_2 and avg_seconds_driver_2 <= threshold_multiplier * avg_seconds_driver_1):

                        if datetime.strptime(laptime_driver_1_str, '%M:%S.%f') < datetime.strptime(laptime_driver_2_str, '%M:%S.%f'):
                            gap_driver_1 = -round((pd.to_timedelta(datetime.strptime(laptime_driver_2_str, '%M:%S.%f') - datetime.strptime(laptime_driver_1_str, '%M:%S.%f')).total_seconds()), 3)
                        else:
                            gap_driver_1 = round((pd.to_timedelta(datetime.strptime(laptime_driver_1_str, '%M:%S.%f') - datetime.strptime(laptime_driver_2_str, '%M:%S.%f')).total_seconds()), 3)

                        gap_driver_2 = -gap_driver_1
                        
                        lap_advantage_1 = round((int(fastest_lap_driver_1_str[:-3])/(int(fastest_lap_driver_1_str[-2:])-int(safety_car_lap_str[:-3])))*100)
                        lap_advantage_2 = round((int(fastest_lap_driver_2_str[:-3])/(int(fastest_lap_driver_2_str[-2:])-int(safety_car_lap_str[:-3])))*100)

                        pit_stop_number_driver_1 = int(arr['number_pit_driver_1'][i])
                        pit_stop_number_driver_2 = int(arr['number_pit_driver_2'][i])

                        pit_stop_time_driver_1_td = parse_laptime_str(arr['total_duration_pit_driver_1'][i][:-2])
                        pit_stop_time_driver_2_td = parse_laptime_str(arr['total_duration_pit_driver_2'][i][:-2])

                        temp_recap.setdefault(name_driver_1, {'total_iqr': 0, 'total_gap': 0, 'total_lap_advantage': 0, 'total_pit_stop_time': timedelta(0), 'total_pit_stop_number': 0, 'count': 0})
                        temp_recap[name_driver_1]['total_iqr'] += iqr_driver_1
                        temp_recap[name_driver_1]['total_gap'] += gap_driver_1
                        temp_recap[name_driver_1]['total_lap_advantage'] += lap_advantage_1
                        temp_recap[name_driver_1]['total_pit_stop_time'] += pit_stop_time_driver_1_td
                        temp_recap[name_driver_1]['total_pit_stop_number'] += pit_stop_number_driver_1
                        temp_recap[name_driver_1]['count'] += 1

                        temp_recap.setdefault(name_driver_2, {'total_iqr': 0, 'total_gap': 0, 'total_lap_advantage': 0, 'total_pit_stop_time': timedelta(0), 'total_pit_stop_number': 0, 'count': 0})
                        temp_recap[name_driver_2]['total_iqr'] += iqr_driver_2
                        temp_recap[name_driver_2]['total_gap'] += gap_driver_2
                        temp_recap[name_driver_2]['total_lap_advantage'] += lap_advantage_2
                        temp_recap[name_driver_2]['total_pit_stop_time'] += pit_stop_time_driver_2_td
                        temp_recap[name_driver_2]['total_pit_stop_number'] += pit_stop_number_driver_2
                        temp_recap[name_driver_2]['count'] += 1

                except (KeyError, ValueError) as e:
                    print(f"Error processing row {i} in {driver_data_file}: {e}")
                    continue

            for name, data in temp_recap.items():
                count = data['count']
                race_recaps.setdefault(name, {'Average IQR': 0, 'Average Gap Race Lap': 0, 'Lap Advantage': 0, 'Average Pit Stop Time': timedelta(0), 'total_count': 0, 'total_pit_stop_number': 0})
                race_recaps[name]['Average IQR'] += data['total_iqr']
                race_recaps[name]['Average Gap Race Lap'] += data['total_gap']
                race_recaps[name]['Lap Advantage'] += data['total_lap_advantage']
                race_recaps[name]['Average Pit Stop Time'] += data['total_pit_stop_time']
                race_recaps[name]['total_count'] += count
                race_recaps[name]['total_pit_stop_number'] += data['total_pit_stop_number']


        except FileNotFoundError:
            print(f"Error: File not found - {driver_data_file}")
        except pd.errors.EmptyDataError:
            print(f"Error: Empty data in file - {driver_data_file}")
        except Exception as e:
            print(f"An error occurred while processing {driver_data_file}: {e}")

final_race_recaps = {}
for name, data in race_recaps.items():
    count = data['total_count']
    avg_iqr = round(data['Average IQR'] / count, 3) if count > 0 else "N/A"
    avg_gap = round(data['Average Gap Race Lap'] / count, 3) if count > 0 else "N/A"
    lap_advantage = round(data['Lap Advantage'] / count) if count > 0 else "N/A" # Changed to 2 decimals
    avg_pit_stop_time = round(data['Average Pit Stop Time'].total_seconds() / data['total_pit_stop_number'], 3) if data['total_pit_stop_number'] > 0 else "N/A"

    final_race_recaps[name] = {
        'Average IQR': f'{avg_iqr}s',
        'Average Gap Race Lap': f'{avg_gap}s',
        'Lap Advantage': f'{lap_advantage}%',
        'Average Pit Stop Time': f'{avg_pit_stop_time}s'
    }

In [26]:
print(final_race_recaps)

{'Max Verstappen': {'Average IQR': '2.003s', 'Average Gap Race Lap': '-0.663s', 'Lap Advantage': '83%', 'Average Pit Stop Time': '20.145s'}, 'Liam Lawson': {'Average IQR': '1.905s', 'Average Gap Race Lap': '0.973s', 'Lap Advantage': '26%', 'Average Pit Stop Time': '21.0s'}, 'Kimi Antonelli': {'Average IQR': '1.907s', 'Average Gap Race Lap': '0.435s', 'Lap Advantage': '28%', 'Average Pit Stop Time': '23.692s'}, 'George Russell': {'Average IQR': '1.571s', 'Average Gap Race Lap': '-0.435s', 'Lap Advantage': '72%', 'Average Pit Stop Time': '19.838s'}, 'Fernando Alonso': {'Average IQR': '1.862s', 'Average Gap Race Lap': '-0.217s', 'Lap Advantage': '56%', 'Average Pit Stop Time': '20.96s'}, 'Lance Stroll': {'Average IQR': '1.829s', 'Average Gap Race Lap': '0.217s', 'Lap Advantage': '44%', 'Average Pit Stop Time': '20.501s'}, 'Charles Leclerc': {'Average IQR': '1.866s', 'Average Gap Race Lap': '-0.351s', 'Lap Advantage': '70%', 'Average Pit Stop Time': '20.224s'}, 'Lewis Hamilton': {'Average 