In [1]:
import fastf1
import pandas as pd
import numpy as np

In [2]:
def add_weather_info(session, stints_df):
    """
    Adds mean weather information from the session to the stints dataframe
    
    Args:
        session: FastF1 session object containing weather data
        stints_df: DataFrame containing stint data
        
    Returns:
        DataFrame with added weather columns
    """
    # Get weather data from session
    weather = session.weather_data
    
    # Calculate mean values
    mean_weather = weather.mean()
    
    # Add weather columns with broadcast values
    for col in mean_weather.index:
        stints_df[col] = mean_weather[col]
        
    return stints_df


def add_starting_positions(session, stints_df):
    """
    Adds starting grid position for each driver in the stints dataframe
    
    Args:
        session: FastF1 session object containing grid position info
        stints_df: DataFrame containing stint data
        
    Returns:
        DataFrame with added StartingPosition column
    """
    # Get starting grid positions
    grid = session.results[['Abbreviation', 'GridPosition']]
    grid = grid.set_index('Abbreviation')
    grid_dict = grid['GridPosition'].to_dict()
    
    # Add starting position column based on driver
    stints_df['StartingPosition'] = stints_df['Driver'].map(grid_dict)
    
    return stints_df


def add_team_info(session, stints_df):
    """
    Adds team information for each driver in the stints dataframe
    
    Args:
        session: FastF1 session object containing driver info
        stints_df: DataFrame containing stint data
        
    Returns:
        DataFrame with added Team column
    """
    # Create driver to team mapping
    driver_teams = {}
    for driver in session.drivers:
        driver_info = session.get_driver(driver)
        driver_teams[driver_info['Abbreviation']] = driver_info['TeamName']
    
    # Add team column based on driver
    stints_df['Team'] = stints_df['Driver'].map(driver_teams)
    
    return stints_df


def add_braking_telemetry(session, stints_df):
    # TODO: Checar se funciona adequadamente.
    # TODO: Criar função para gerar dado de input
    """
    Analyzes braking telemetry for each stint and adds the mean and standard
    deviation of braking time per lap to the stints DataFrame.

    This function iterates through each stint defined in the input DataFrame.
    For each lap within a stint, it calculates the total time the driver
    spent on the brake pedal. It then computes the mean and standard deviation
    of these braking times across all valid laps in the stint.

    Args:
        session: A loaded FastF1 session object with lap and telemetry data.
                 It's recommended to run `session.load(telemetry=True)` beforehand
                 for better performance.
        stints_df: A pandas DataFrame containing stint information.
                   This DataFrame must include 'Driver' and 'StintNumber' columns.
                   It is typically generated from `session.laps.get_stints()`.

    Returns:
        pandas.DataFrame: The original stints_df with two new columns:
                          'mean_brake_time' and 'std_brake_time'.
    """
    # Ensure the session has the necessary data loaded for performance.
    # This avoids loading data repeatedly inside the loop.
    if not session.laps and not session.car_data:
        print("Session data is not fully loaded. Loading laps and telemetry...")
        session.load(laps=True, telemetry=True, weather=False, messages=False)
        print("Data loading complete.")

    # Lists to store the calculated metrics for each stint
    mean_brake_times = []
    std_brake_times = []

    # Iterate over each row of the stints DataFrame
    # Each row represents a single stint for a driver
    for _, stint in stints_df.iterrows():
        driver = stint['Driver']
        stint_number = stint['StintNumber']

        # Get all laps for the specific driver and stint
        stint_laps = session.laps.pick_drivers([driver])[session.laps.pick_drivers([driver])["Stint"]== stint_number]

        # Skip if there are no laps in the stint
        if not len(stint_laps):
            mean_brake_times.append(np.nan)
            std_brake_times.append(np.nan)
            continue

        # List to hold the total braking duration for each lap in this stint
        lap_brake_durations = []

        # Iterate over each lap within the current stint
        for n, lap in stint_laps.iterlaps():
            try:
                telemetry = lap.telemetry

                # Some laps (e.g., in/out laps) may not have complete telemetry
                if telemetry is None or telemetry.empty or 'Brake' not in telemetry.columns:
                    continue

                # Calculate the time difference between consecutive telemetry samples
                telemetry['TimeDelta'] = telemetry['Time'].diff().dt.total_seconds()

                # Calculate the total time spent braking in this lap by summing the
                # time deltas for all moments the 'Brake' column is True.
                total_braking_time = telemetry.loc[telemetry['Brake'] == True, 'TimeDelta'].sum()

                # We only consider laps where braking actually occurred
                if total_braking_time > 0:
                    lap_brake_durations.append(total_braking_time)

            except Exception as e:
                # Log an error if a specific lap fails for any reason
                print(f"Could not process Lap {lap['LapNumber']} for {driver} in Stint {stint_number}. Error: {e}")
                continue

        # Calculate mean and standard deviation for the stint's braking times
        if lap_brake_durations:
            # If we have valid braking data, calculate stats
            mean_for_stint = np.mean(lap_brake_durations)
            std_for_stint = np.std(lap_brake_durations)
        else:
            # If no valid braking data was found for the stint, use NaN
            mean_for_stint = np.nan
            std_for_stint = np.nan

        # Append the calculated statistics to our lists
        mean_brake_times.append(mean_for_stint)
        std_brake_times.append(std_for_stint)

    # Add the new data as columns to the input DataFrame
    stints_df['mean_brake_time'] = mean_brake_times
    stints_df['std_brake_time'] = std_brake_times

    return stints_df


def add_stint_telemetry_analysis(session, stints_df):
    # TODO: Checar se funciona adequadamente.
    # TODO: Criar função para gerar dado de input
    """
    Analyzes telemetry for each stint to add aggregated performance metrics.

    This function iterates through each stint defined in the input DataFrame.
    For each valid lap within a stint, it calculates:
    1. The average speed.
    2. The difference between the maximum and minimum speed.
    3. The total number of gear changes.

    It then aggregates these per-lap metrics across the entire stint to calculate:
    - Mean and standard deviation of the average lap speeds.
    - Mean and standard deviation of the (max speed - min speed) deltas per lap.
    - The average number of gear changes per lap.

    Args:
        session: A loaded FastF1 session object. It is highly recommended to
                 run `session.load(telemetry=True)` beforehand for performance.
        stints_df: A pandas DataFrame containing stint information, typically
                   generated from `session.laps.get_stints()`. Must include
                   'Driver' and 'StintNumber' columns.

    Returns:
        pandas.DataFrame: The original stints_df with five new columns:
                          'AvgSpeed', 'StdSpeed', 'AvgSpeedDelta',
                          'StdSpeedDelta', and 'AvgGearChanges'.
    """
    # Ensure the session has the necessary data loaded to avoid repeated loading.
    if not session.laps or not session.car_data:
        print("Session data is not fully loaded. Loading laps and telemetry...")
        # Note: Set weather and messages to False if not needed to speed up loading.
        session.load(laps=True, telemetry=True, weather=False, messages=False)
        print("Data loading complete.")

    # Lists to store the calculated metrics for each stint
    stint_avg_speeds = []
    stint_std_speeds = []
    stint_avg_speed_deltas = []
    stint_std_speed_deltas = []
    stint_avg_gear_changes = []

    # Iterate over each row of the stints DataFrame, representing one stint
    for _, stint in stints_df.iterrows():
        driver = stint['Driver']
        stint_number = stint['StintNumber']

        # Get all laps for the specific driver and stint
        stint_laps = session.laps.pick_driver(driver).pick_stint(stint_number)

        # Skip if there are no laps in the stint
        if not len(stint_laps):
            stint_avg_speeds.append(np.nan)
            stint_std_speeds.append(np.nan)
            stint_avg_speed_deltas.append(np.nan)
            stint_std_speed_deltas.append(np.nan)
            stint_avg_gear_changes.append(np.nan)
            continue

        # Lists to hold the metrics for each lap within this single stint
        lap_avg_speeds = []
        lap_speed_deltas = []
        lap_gear_changes = []

        # Iterate over each lap within the current stint
        for _, lap in stint_laps.iterlaps():
            try:
                # Get telemetry for the lap. Use .copy() to avoid SettingWithCopyWarning
                telemetry = lap.get_telemetry().copy()

                # --- Data Validation ---
                # Skip laps with no telemetry or missing essential columns
                if telemetry.empty or 'Speed' not in telemetry.columns or 'nGear' not in telemetry.columns:
                    continue

                # --- Calculations for the Lap ---
                # 1. Average speed for the lap
                lap_avg_speeds.append(telemetry['Speed'].mean())

                # 2. (Max Speed - Min Speed) for the lap
                speed_delta = telemetry['Speed'].max() - telemetry['Speed'].min()
                lap_speed_deltas.append(speed_delta)

                # 3. Number of gear changes for the lap
                # We count how many times the gear value changes from the previous row
                gear_changes = telemetry['nGear'].diff().ne(0).sum()
                lap_gear_changes.append(gear_changes)

            except Exception as e:
                # Log an error if a specific lap fails for any reason
                print(f"Could not process Lap {lap['LapNumber']} for {driver} in Stint {stint_number}. Error: {e}")
                continue

        # --- Aggregation for the Stint ---
        # After analyzing all laps in the stint, calculate the final metrics.
        # Check if any valid laps were processed before calculating stats.
        if len(lap_avg_speeds) > 0:
            stint_avg_speeds.append(np.mean(lap_avg_speeds))
            stint_std_speeds.append(np.std(lap_avg_speeds))
            stint_avg_speed_deltas.append(np.mean(lap_speed_deltas))
            stint_std_speed_deltas.append(np.std(lap_speed_deltas))
            stint_avg_gear_changes.append(np.mean(lap_gear_changes))
        else:
            # If no laps in the stint had valid telemetry, append NaN
            stint_avg_speeds.append(np.nan)
            stint_std_speeds.append(np.nan)
            stint_avg_speed_deltas.append(np.nan)
            stint_std_speed_deltas.append(np.nan)
            stint_avg_gear_changes.append(np.nan)

    # Add the new lists as columns to the original DataFrame
    stints_df['AvgSpeed'] = stint_avg_speeds
    stints_df['StdSpeed'] = stint_std_speeds
    stints_df['AvgSpeedDelta'] = stint_avg_speed_deltas
    stints_df['StdSpeedDelta'] = stint_std_speed_deltas
    stints_df['AvgGearChanges'] = stint_avg_gear_changes

    return stints_df


def get_stints_race(session):
    laps = session.laps
    drivers = session.drivers
    drivers = [session.get_driver(driver)["Abbreviation"] for driver in drivers]
    stints = laps[["Driver", "Stint", "Compound", "LapNumber"]]
    stints = stints.groupby(["Driver", "Stint", "Compound"])
    stints = stints.count().reset_index()
    stints = stints.rename(columns={"LapNumber": "StintLength"})
    return stints

In [None]:
for year in range(2019, 2019):
    # Get the calendar for the year
    calendar = fastf1.get_event_schedule(year, include_testing=False)
    
    # Initialize an empty list to store all stints data
    all_stints = []
    
    # Loop through each race in the calendar
    for idx, event in calendar.iterrows():
        try:
            # Load the race session
            session = fastf1.get_session(year, event['EventName'], 'R')
            session.load()
    
            # Get stints for this race
            race_stints = get_stints_race(session)
    
            # Add the team names for each row
            race_stints = add_team_info(session, race_stints)
    
            # Add the starting position for each driver
            race_stints = add_starting_positions(session, race_stints)
    
            # Add mean weather information
            race_stints = add_weather_info(session, race_stints)

            # Add braking telemetry information
            race_stints = add_braking_telemetry(session, race_stints)
    
            # Add year and circuit information
            race_stints['Year'] = year
            race_stints['Circuit'] = event['EventName']
    
            # Append to our list
            all_stints.append(race_stints)
    
            print(f"Processed {year} {event['EventName']}")
        except Exception as e:
            print(f"Error processing {year} {event['EventName']}: {e}")
    
    # If we have data for this year, concatenate it
    if all_stints:
        year_stints = pd.concat(all_stints, ignore_index=True)
    
        # Save the data for this year (optional)
        year_stints.to_csv(f"stints_data_{year}.csv", index=False)

In [None]:
session = fastf1.get_session(2023, 'Australian Grand Prix', 'R')
session.load()

In [21]:
#check if telemetry is loaded
for n, lap in session.laps.pick_drivers(["VER"])[session.laps.pick_drivers(["VER"])["Stint"]== 1].iterlaps():
    print(lap.telemetry.columns)

Index(['Date', 'SessionTime', 'DriverAhead', 'DistanceToDriverAhead', 'Time',
       'RPM', 'Speed', 'nGear', 'Throttle', 'Brake', 'DRS', 'Source',
       'Distance', 'RelativeDistance', 'Status', 'X', 'Y', 'Z'],
      dtype='object')
Index(['Date', 'SessionTime', 'DriverAhead', 'DistanceToDriverAhead', 'Time',
       'RPM', 'Speed', 'nGear', 'Throttle', 'Brake', 'DRS', 'Source',
       'Distance', 'RelativeDistance', 'Status', 'X', 'Y', 'Z'],
      dtype='object')




Index(['Date', 'SessionTime', 'DriverAhead', 'DistanceToDriverAhead', 'Time',
       'RPM', 'Speed', 'nGear', 'Throttle', 'Brake', 'DRS', 'Source',
       'Distance', 'RelativeDistance', 'Status', 'X', 'Y', 'Z'],
      dtype='object')
Index(['Date', 'SessionTime', 'DriverAhead', 'DistanceToDriverAhead', 'Time',
       'RPM', 'Speed', 'nGear', 'Throttle', 'Brake', 'DRS', 'Source',
       'Distance', 'RelativeDistance', 'Status', 'X', 'Y', 'Z'],
      dtype='object')
Index(['Date', 'SessionTime', 'DriverAhead', 'DistanceToDriverAhead', 'Time',
       'RPM', 'Speed', 'nGear', 'Throttle', 'Brake', 'DRS', 'Source',
       'Distance', 'RelativeDistance', 'Status', 'X', 'Y', 'Z'],
      dtype='object')
Index(['Date', 'SessionTime', 'DriverAhead', 'DistanceToDriverAhead', 'Time',
       'RPM', 'Speed', 'nGear', 'Throttle', 'Brake', 'DRS', 'Source',
       'Distance', 'RelativeDistance', 'Status', 'X', 'Y', 'Z'],
      dtype='object')
Index(['Date', 'SessionTime', 'DriverAhead', 'DistanceToDriv



Index(['Date', 'SessionTime', 'DriverAhead', 'DistanceToDriverAhead', 'Time',
       'RPM', 'Speed', 'nGear', 'Throttle', 'Brake', 'DRS', 'Source',
       'Distance', 'RelativeDistance', 'Status', 'X', 'Y', 'Z'],
      dtype='object')


In [6]:
# explore a telemetry of best lap of a driver
telemetry = session.laps.pick_drivers(['VER']).pick_fastest().get_telemetry()
telemetry



Unnamed: 0,Date,SessionTime,DriverAhead,DistanceToDriverAhead,Time,RPM,Speed,nGear,Throttle,Brake,DRS,Source,Distance,RelativeDistance,Status,X,Y,Z
2,2023-04-02 06:30:51.679,0 days 02:29:49.857000,,,0 days 00:00:00,11484,292,7,100,False,0,interpolation,0.086239,0.000017,OnTrack,-1239.906980,-1290.196892,84.249838
3,2023-04-02 06:30:51.734,0 days 02:29:49.912000,,,0 days 00:00:00.055000,11474,293,7,100,False,0,pos,4.587526,0.000878,OnTrack,-1279.000000,-1253.000000,84.000000
4,2023-04-02 06:30:51.769,0 days 02:29:49.947000,,,0 days 00:00:00.090000,11454,294,7,100,False,0,car,7.455556,0.001427,OnTrack,-1298.845624,-1234.134916,83.850912
5,2023-04-02 06:30:52.014,0 days 02:29:50.192000,,,0 days 00:00:00.335000,11051,295,7,100,False,0,pos,27.598383,0.005284,OnTrack,-1374.000000,-1163.000000,83.000000
6,2023-04-02 06:30:52.089,0 days 02:29:50.267000,,,0 days 00:00:00.410000,10648,296,8,100,False,0,car,33.766667,0.006465,OnTrack,-1407.411718,-1131.308315,82.790501
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
603,2023-04-02 06:32:11.449,0 days 02:31:09.627000,,,0 days 00:01:19.770000,11499,290,7,100,False,0,car,5174.295556,0.990672,OnTrack,-906.967914,-1606.979700,86.404863
604,2023-04-02 06:32:11.649,0 days 02:31:09.827000,,,0 days 00:01:19.970000,11488,291,7,100,False,0,car,5190.462222,0.993767,OnTrack,-1016.824346,-1503.039866,85.298380
605,2023-04-02 06:32:11.734,0 days 02:31:09.912000,,,0 days 00:01:20.055000,11506,292,7,100,False,0,pos,5197.370884,0.995090,OnTrack,-1058.000000,-1464.000000,85.000000
606,2023-04-02 06:32:11.849,0 days 02:31:10.027000,,,0 days 00:01:20.170000,11525,293,7,100,False,0,car,5206.740000,0.996883,OnTrack,-1109.268837,-1415.258605,84.795664


In [None]:
for year in range(2024, 2025):
    # Get the calendar for the year
    calendar = fastf1.get_event_schedule(year, include_testing=False)
    
    # Initialize an empty list to store all stints data
    all_stints = []
    
    # Loop through each race in the calendar
    for idx, event in calendar.iterrows():
        try:
            # Load the race session
            session = fastf1.get_session(year, event['EventName'], 'R')
            session.load()
    
            # Get stints for this race
            race_stints = get_stints_race(session)
    
            # Add the team names for each row
            race_stints = add_team_info(session, race_stints)
    
            # Add the starting position for each driver
            race_stints = add_starting_positions(session, race_stints)
    
            # Add mean weather information
            race_stints = add_weather_info(session, race_stints)
    
            # Add year and circuit information
            race_stints['Year'] = year
            race_stints['Circuit'] = event['EventName']
    
            # Append to our list
            all_stints.append(race_stints)
    
            print(f"Processed {year} {event['EventName']}")
        except Exception as e:
            print(f"Error processing {year} {event['EventName']}: {e}")
    
    # If we have data for this year, concatenate it
    if all_stints:
        year_stints = pd.concat(all_stints, ignore_index=True)
    
        # Save the data for this year (optional)
        year_stints.to_csv(f"stints_data_{year}.csv", index=False)

In [7]:
stints_2019 = pd.read_csv("..\\data\\stints_data_2019.csv", index_col=None)
stints_2020 = pd.read_csv("..\\data\\stints_data_2020.csv", index_col=None)
stints_2021 = pd.read_csv("..\\data\\stints_data_2021.csv", index_col=None)
stints_2022 = pd.read_csv("..\\data\\stints_data_2022.csv", index_col=None)
stints_2023 = pd.read_csv("..\\data\\stints_data_2023.csv", index_col=None)
stints_2024 = pd.read_csv("..\\data\\stints_data_2024.csv", index_col=None)

stints = pd.concat([stints_2019, stints_2020, stints_2021, stints_2022, stints_2023, stints_2024])
stints

Unnamed: 0,Driver,Stint,Compound,StintLength,Team,StartingPosition,Time,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,WindDirection,WindSpeed,Year,Circuit
0,ALB,1.0,SOFT,14,Toro Rosso,13.0,0 days 01:00:46.617934426,23.477869,70.453279,1015.334426,0.0,41.313115,155.327869,1.166393,2019,Australian Grand Prix
1,ALB,2.0,MEDIUM,43,Toro Rosso,13.0,0 days 01:00:46.617934426,23.477869,70.453279,1015.334426,0.0,41.313115,155.327869,1.166393,2019,Australian Grand Prix
2,BOT,1.0,SOFT,23,Mercedes,2.0,0 days 01:00:46.617934426,23.477869,70.453279,1015.334426,0.0,41.313115,155.327869,1.166393,2019,Australian Grand Prix
3,BOT,2.0,MEDIUM,35,Mercedes,2.0,0 days 01:00:46.617934426,23.477869,70.453279,1015.334426,0.0,41.313115,155.327869,1.166393,2019,Australian Grand Prix
4,GAS,1.0,MEDIUM,37,Red Bull Racing,17.0,0 days 01:00:46.617934426,23.477869,70.453279,1015.334426,0.0,41.313115,155.327869,1.166393,2019,Australian Grand Prix
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1297,VER,1.0,MEDIUM,29,Red Bull Racing,4.0,0 days 01:14:27.206128378,26.768243,51.445946,1017.426351,0.0,31.805405,137.033784,1.900676,2024,Abu Dhabi Grand Prix
1298,VER,2.0,HARD,29,Red Bull Racing,4.0,0 days 01:14:27.206128378,26.768243,51.445946,1017.426351,0.0,31.805405,137.033784,1.900676,2024,Abu Dhabi Grand Prix
1299,ZHO,1.0,MEDIUM,12,Kick Sauber,15.0,0 days 01:14:27.206128378,26.768243,51.445946,1017.426351,0.0,31.805405,137.033784,1.900676,2024,Abu Dhabi Grand Prix
1300,ZHO,2.0,HARD,27,Kick Sauber,15.0,0 days 01:14:27.206128378,26.768243,51.445946,1017.426351,0.0,31.805405,137.033784,1.900676,2024,Abu Dhabi Grand Prix


In [9]:
stints[(stints['Year'] == 2023) & (stints['Circuit'] == 'Australian Grand Prix') & (stints['Driver'] == 'VER')]

Unnamed: 0,Driver,Stint,Compound,StintLength,Team,StartingPosition,Time,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,WindDirection,WindSpeed,Year,Circuit
189,VER,1.0,MEDIUM,8,Red Bull Racing,1.0,0 days 01:51:06.001608108,17.44955,54.157658,1018.366667,0.0,30.13964,138.072072,1.127027,2023,Australian Grand Prix
190,VER,2.0,HARD,47,Red Bull Racing,1.0,0 days 01:51:06.001608108,17.44955,54.157658,1018.366667,0.0,30.13964,138.072072,1.127027,2023,Australian Grand Prix
191,VER,3.0,SOFT,2,Red Bull Racing,1.0,0 days 01:51:06.001608108,17.44955,54.157658,1018.366667,0.0,30.13964,138.072072,1.127027,2023,Australian Grand Prix
192,VER,4.0,SOFT,1,Red Bull Racing,1.0,0 days 01:51:06.001608108,17.44955,54.157658,1018.366667,0.0,30.13964,138.072072,1.127027,2023,Australian Grand Prix
