In [7]:
import pandas as pd
import os  
from datetime import timedelta, datetime
from fastf1.ergast import Ergast

ergast = Ergast()

#parent_file = Path(__file__).resolve().parent.parent
parent_file = '/home/kurios/Documents/f1_analysis/'

year = 2025
race_quarter = 1
race_session = 'R'

#year = int(input("Year ? "))
#race_quarter = int(input("Race Quarter ? (1,4) "))
#race_session = input('Session ?  (S, R) ')

def race_quarter_value(race_quarter):
    match race_quarter:
        case 1 :
            return range(1, 7)
        case 2 :
            race_numbers = range(6, 13)
            return race_numbers
        case 3 :
            race_numbers = range(12, 19)
            return race_numbers
        case 4 :
            race_numbers = range(18, 25)
            return race_numbers

def parse_laptime_str(lap_string):
    """Parses a lap time string (MM:SS.fff) into a timedelta object."""
    if isinstance(lap_string, str) and ':' in lap_string and '.' in lap_string:
        minutes, rest = lap_string.split(':')
        seconds, milliseconds = rest.split('.')
        return timedelta(minutes=int(minutes), seconds=int(seconds), milliseconds=int(milliseconds))
    else:
        return timedelta(0)

In [11]:
race_years = list(race_quarter_value(race_quarter))
recaps = {}
zero_time = timedelta(0)
threshold_multiplier = 1.5

qual_session = range(1, 4)

for race_number in race_years:
    for race_session in ('R', 'S'):

        keyword = f'{race_number}_{race_session}_race_info'
        driver_data_file = None
        for fname in os.listdir(parent_file + 'data/processed/'):
            if keyword in fname:
                driver_data_file = fname
                break

        if driver_data_file:
            os.chdir(parent_file + 'data/processed/')
            arr = pd.read_csv(driver_data_file)
            num_rows = len(arr)

            temp_recap = {}

            for i in range(num_rows):
                name_driver_1 = arr['driver_1_name'][i]
                name_driver_2 = arr['driver_2_name'][i]

                iqr_driver_1 = float(arr['iqr_driver_1'][i][:-2])
                iqr_driver_2 = float(arr['iqr_driver_2'][i][:-2])

                laptime_driver_1_str = arr['avg_laptime_driver_1'][i]
                laptime_driver_2_str = arr['avg_laptime_driver_2'][i]

                fastest_lap_driver_1_str = arr['fastest_driver_1'][i]
                fastest_lap_driver_2_str = arr['fastest_driver_2'][i]
                safety_car_lap_str = arr['safety_car_lap'][i]

                avg_seconds_driver_1 = parse_laptime_str(laptime_driver_1_str)
                avg_seconds_driver_2 = parse_laptime_str(laptime_driver_2_str)

                if (iqr_driver_1 != 0 and iqr_driver_2 != 0 and avg_seconds_driver_1 != zero_time and avg_seconds_driver_2 != zero_time
                        and avg_seconds_driver_1 <= threshold_multiplier * avg_seconds_driver_2 and avg_seconds_driver_2 <= threshold_multiplier * avg_seconds_driver_1):

                    if datetime.strptime(laptime_driver_1_str, '%M:%S.%f') < datetime.strptime(laptime_driver_2_str, '%M:%S.%f'):
                        gap_driver_1 = -round((pd.to_timedelta(datetime.strptime(laptime_driver_2_str, '%M:%S.%f') - datetime.strptime(laptime_driver_1_str, '%M:%S.%f')).total_seconds()), 3)
                    else:
                        gap_driver_1 = round((pd.to_timedelta(datetime.strptime(laptime_driver_1_str, '%M:%S.%f') - datetime.strptime(laptime_driver_2_str, '%M:%S.%f')).total_seconds()), 3)

                    gap_driver_2 = -gap_driver_1

                    lap_advantage_1 = round((int(fastest_lap_driver_1_str[:-3])/(int(fastest_lap_driver_1_str[-2:])-int(safety_car_lap_str[:-3])))*100)
                    lap_advantage_2 = round((int(fastest_lap_driver_2_str[:-3])/(int(fastest_lap_driver_2_str[-2:])-int(safety_car_lap_str[:-3])))*100)

                    pit_stop_number_driver_1 = int(arr['number_pit_driver_1'][i])
                    pit_stop_number_driver_2 = int(arr['number_pit_driver_2'][i])

                    pit_stop_time_driver_1_td = parse_laptime_str(arr['total_duration_pit_driver_1'][i][:-2])
                    pit_stop_time_driver_2_td = parse_laptime_str(arr['total_duration_pit_driver_2'][i][:-2])

                    temp_recap.setdefault(name_driver_1, {'total_iqr': 0, 'total_gap': 0, 'total_lap_advantage': 0, 'total_pit_stop_time': timedelta(0), 'total_pit_stop_number': 0, 'count': 0})
                    temp_recap[name_driver_1]['total_iqr'] += iqr_driver_1
                    temp_recap[name_driver_1]['total_gap'] += gap_driver_1
                    temp_recap[name_driver_1]['total_lap_advantage'] += lap_advantage_1
                    temp_recap[name_driver_1]['total_pit_stop_time'] += pit_stop_time_driver_1_td
                    temp_recap[name_driver_1]['total_pit_stop_number'] += pit_stop_number_driver_1
                    temp_recap[name_driver_1]['count'] += 1

                    temp_recap.setdefault(name_driver_2, {'total_iqr': 0, 'total_gap': 0, 'total_lap_advantage': 0, 'total_pit_stop_time': timedelta(0), 'total_pit_stop_number': 0, 'count': 0})
                    temp_recap[name_driver_2]['total_iqr'] += iqr_driver_2
                    temp_recap[name_driver_2]['total_gap'] += gap_driver_2
                    temp_recap[name_driver_2]['total_lap_advantage'] += lap_advantage_2
                    temp_recap[name_driver_2]['total_pit_stop_time'] += pit_stop_time_driver_2_td
                    temp_recap[name_driver_2]['total_pit_stop_number'] += pit_stop_number_driver_2
                    temp_recap[name_driver_2]['count'] += 1

            for name, data in temp_recap.items():
                count = data['count']
                recaps.setdefault(name, {'Average IQR': 0, 'Average Gap Race Lap': 0, 'Lap Advantage': 0, 'Average Pit Stop Time': timedelta(0), 'total_count': 0, 'total_pit_stop_number': 0, 'total_qual_gap': 0, 'total_corner_advantage': 0, 'qual_count': 0})
                recaps[name]['Average IQR'] += data['total_iqr']
                recaps[name]['Average Gap Race Lap'] += data['total_gap']
                recaps[name]['Lap Advantage'] += data['total_lap_advantage']
                recaps[name]['Average Pit Stop Time'] += data['total_pit_stop_time']
                recaps[name]['total_count'] += count
                recaps[name]['total_pit_stop_number'] += data['total_pit_stop_number']
                
    for race_session in ('Q', 'SQ'):
        for session in qual_session:
            keyword = f'{race_number}_{race_session}_drivers_info_Q{session}'
            driver_data_file = None
            for fname in os.listdir(parent_file + 'data/processed/'):
                if keyword in fname:
                    driver_data_file = fname
                    break

            if driver_data_file:
                os.chdir(parent_file + 'data/processed/')
                arr = pd.read_csv(driver_data_file)

                num_rows = len(arr)

                for i in range(num_rows):
                    name_driver_1 = arr['Name'][i]
                    name_driver_2 = arr['Name_1'][i]

                    laptime_driver_1_str = arr['LapTime'][i]
                    laptime_driver_2_str = arr['LapTime_1'][i]

                    avg_seconds_driver_1 = parse_laptime_str(laptime_driver_1_str)
                    avg_seconds_driver_2 = parse_laptime_str(laptime_driver_2_str)

                    if (avg_seconds_driver_1 != zero_time and avg_seconds_driver_2 != zero_time
                            and avg_seconds_driver_1 <= threshold_multiplier * avg_seconds_driver_2 and avg_seconds_driver_2 <= threshold_multiplier * avg_seconds_driver_1):

                        time_driver_1 = datetime.strptime(laptime_driver_1_str, '%M:%S.%f')
                        time_driver_2 = datetime.strptime(laptime_driver_2_str, '%M:%S.%f')

                        gap = round((pd.to_timedelta(time_driver_1 - time_driver_2).total_seconds()), 3)

                        corner_adv_1_num, corner_adv_1_den = map(int, arr['corner_advantage'][i].split('/'))
                        corner_advantage_driver_1 = (corner_adv_1_num / corner_adv_1_den) * 100

                        corner_adv_2_num, corner_adv_2_den = map(int, arr['corner_advantage1'][i].split('/'))
                        corner_advantage_driver_2 = (corner_adv_2_num / corner_adv_2_den) * 100

                        if not name_driver_1 in recaps:
                            recaps.setdefault(name_driver_1, {'Average IQR': 0, 'Average Gap Race Lap': 0, 'Lap Advantage': 0, 'Average Pit Stop Time': timedelta(0), 'total_count': 0, 'total_pit_stop_number': 0, 'total_qual_gap': 0, 'total_corner_advantage': 0, 'qual_count': 0})
                        
                        recaps[name_driver_1]['total_qual_gap'] += gap
                        recaps[name_driver_1]['total_corner_advantage'] += corner_advantage_driver_1
                        recaps[name_driver_1]['qual_count'] += 1

                        if not name_driver_2 in recaps:
                            recaps.setdefault(name_driver_2, {'Average IQR': 0, 'Average Gap Race Lap': 0, 'Lap Advantage': 0, 'Average Pit Stop Time': timedelta(0), 'total_count': 0, 'total_pit_stop_number': 0, 'total_qual_gap': 0, 'total_corner_advantage': 0, 'qual_count': 0})
                        
                        recaps[name_driver_2]['total_qual_gap'] -= gap
                        recaps[name_driver_2]['total_corner_advantage'] += corner_advantage_driver_2
                        recaps[name_driver_2]['qual_count'] += 1

In [12]:
final_race_recaps = {}
for name, data in recaps.items():
    count = data['total_count']
    avg_iqr = round(data['Average IQR'] / count, 3) if count > 0 else "N/A"
    avg_gap = round(data['Average Gap Race Lap'] / count, 3) if count > 0 else "N/A"
    lap_advantage = round(data['Lap Advantage'] / count) if count > 0 else "N/A"
    avg_pit_stop_time = round(data['Average Pit Stop Time'].total_seconds() / data['total_pit_stop_number'], 3) if data['total_pit_stop_number'] > 0 else "N/A"

    count = data['qual_count']
    avg_qual_gap = round(data['total_qual_gap'] / count, 3) if count > 0 else "N/A"
    avg_corner_advantage = round(data['total_corner_advantage'] / count) if count > 0 else "N/A"

    final_race_recaps[name] = {
        'Average Gap Quali Lap': f'{avg_qual_gap}s',
        'Corner Advantage': f'{avg_corner_advantage}%',
        'Average IQR': f'{avg_iqr}s',
        'Average Gap Race Lap': f'{avg_gap}s',
        'Lap Advantage': f'{lap_advantage}%',
        'Average Pit Stop Time': f'{avg_pit_stop_time}s'
    }

In [13]:
print(final_race_recaps)

{'Max Verstappen': {'Average Gap Quali Lap': '-0.62s', 'Corner Advantage': '61%', 'Average IQR': '1.825s', 'Average Gap Race Lap': '-0.971s', 'Lap Advantage': '83%', 'Average Pit Stop Time': '20.145s'}, 'Liam Lawson': {'Average Gap Quali Lap': '0.419s', 'Corner Advantage': '41%', 'Average IQR': '2.137s', 'Average Gap Race Lap': '0.901s', 'Lap Advantage': '29%', 'Average Pit Stop Time': '21.0s'}, 'Kimi Antonelli': {'Average Gap Quali Lap': '0.145s', 'Corner Advantage': '53%', 'Average IQR': '1.799s', 'Average Gap Race Lap': '0.429s', 'Lap Advantage': '27%', 'Average Pit Stop Time': '23.692s'}, 'George Russell': {'Average Gap Quali Lap': '-0.145s', 'Corner Advantage': '47%', 'Average IQR': '1.596s', 'Average Gap Race Lap': '-0.429s', 'Lap Advantage': '73%', 'Average Pit Stop Time': '19.838s'}, 'Fernando Alonso': {'Average Gap Quali Lap': '-0.274s', 'Corner Advantage': '53%', 'Average IQR': '1.845s', 'Average Gap Race Lap': '-0.166s', 'Lap Advantage': '53%', 'Average Pit Stop Time': '20.9