In [1]:
import fastf1
import pandas as pd
from fastf1 import utils
import os

In [3]:
fastf1.Cache.enable_cache('./cache')

In [8]:
def get_race_results(year, race_name):
    """
    Fetch the race results for a specific race and year.

    Parameters:
    year (int): The year of the race.
    race_name (str): The name of the race (e.g., 'British Grand Prix').

    Returns:
    pd.DataFrame: A DataFrame containing the race results, including driver positions, lap times, and more.
                  The DataFrame contains various columns like Driver, Position, Team, etc.
    """
    session = fastf1.get_session(year, race_name, 'R')
    session.load(laps=False, telemetry=False, messages=False)
    results = session.results
    return results

In [9]:
temp_result = get_race_results(2023, 'British Grand Prix')

core           INFO 	Loading data for British Grand Prix - Race [v3.4.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '81', '63', '11', '14', '23', '16', '55', '2', '77', '27', '18', '24', '22', '21', '10', '20', '31']


In [10]:
temp_result

Unnamed: 0,DriverNumber,BroadcastName,Abbreviation,DriverId,TeamName,TeamColor,TeamId,FirstName,LastName,FullName,...,CountryCode,Position,ClassifiedPosition,GridPosition,Q1,Q2,Q3,Time,Status,Points
1,1,M VERSTAPPEN,VER,max_verstappen,Red Bull Racing,3671C6,red_bull,Max,Verstappen,Max Verstappen,...,NED,1.0,1,1.0,NaT,NaT,NaT,0 days 01:25:16.938000,Finished,26.0
4,4,L NORRIS,NOR,norris,McLaren,F58020,mclaren,Lando,Norris,Lando Norris,...,GBR,2.0,2,2.0,NaT,NaT,NaT,0 days 00:00:03.798000,Finished,18.0
44,44,L HAMILTON,HAM,hamilton,Mercedes,6CD3BF,mercedes,Lewis,Hamilton,Lewis Hamilton,...,GBR,3.0,3,7.0,NaT,NaT,NaT,0 days 00:00:06.783000,Finished,15.0
81,81,O PIASTRI,PIA,piastri,McLaren,F58020,mclaren,Oscar,Piastri,Oscar Piastri,...,AUS,4.0,4,3.0,NaT,NaT,NaT,0 days 00:00:07.776000,Finished,12.0
63,63,G RUSSELL,RUS,russell,Mercedes,6CD3BF,mercedes,George,Russell,George Russell,...,GBR,5.0,5,6.0,NaT,NaT,NaT,0 days 00:00:11.206000,Finished,10.0
11,11,S PEREZ,PER,perez,Red Bull Racing,3671C6,red_bull,Sergio,Perez,Sergio Perez,...,MEX,6.0,6,15.0,NaT,NaT,NaT,0 days 00:00:12.882000,Finished,8.0
14,14,F ALONSO,ALO,alonso,Aston Martin,358C75,aston_martin,Fernando,Alonso,Fernando Alonso,...,ESP,7.0,7,9.0,NaT,NaT,NaT,0 days 00:00:17.193000,Finished,6.0
23,23,A ALBON,ALB,albon,Williams,37BEDD,williams,Alexander,Albon,Alexander Albon,...,THA,8.0,8,8.0,NaT,NaT,NaT,0 days 00:00:17.878000,Finished,4.0
16,16,C LECLERC,LEC,leclerc,Ferrari,F91536,ferrari,Charles,Leclerc,Charles Leclerc,...,MON,9.0,9,4.0,NaT,NaT,NaT,0 days 00:00:18.689000,Finished,2.0
55,55,C SAINZ,SAI,sainz,Ferrari,F91536,ferrari,Carlos,Sainz,Carlos Sainz,...,ESP,10.0,10,5.0,NaT,NaT,NaT,0 days 00:00:19.448000,Finished,1.0


In [6]:
def get_historical_results(start_year, end_year):
    """
    Fetch race results for multiple years and races within those years

    Parameters:
    start_year (int): The starting year for data retrieval (inclusive)
    end_year (int): The ending year for data retrieval (inclusive)

    Returns:
    pd.DataFrame: DataFrame containing all race results from the specified range of years
                  Each row represents a driver's result in a specific race, with additional columns
                  for the year and race name
    """
    all_results = []
    for year in range(start_year, end_year + 1):
        schedule = fastf1.get_event_schedule(year)
        for race in schedule['EventName']:
            results = get_race_results(year, race)
            results['Year'] = year
            results['RaceName'] = race
            all_results.append(results)
    return pd.concat(all_results, ignore_index=True)

In [7]:
temp = get_historical_results(2023, 2023)

core           INFO 	Loading data for British Grand Prix - Race [v3.4.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '81', '63', '11', '14', '23', '16', '55', '2', '77', '27', '18', '24', '22', '21', '10', '20', '31']
core           INFO 	Loading data for Bahrain Grand Prix - Ra

In [29]:
def get_qualifying_results(year, race_name):
    """
    Fetch qualifying results for a specific race.

    Parameters:
    year (int): The year of the race.
    race_name (str): The name of the race (e.g., 'British Grand Prix').

    Returns:
    pd.DataFrame: A DataFrame containing qualifying results, including qualifying positions and lap times.
    """
    qualifying = fastf1.get_session(year, race_name, 'Q')
    qualifying.load()
    qual_results = qualifying.results
    return qual_results[['DriverId', 'Position', 'Q1', 'Q2', 'Q3']]

In [18]:
get_qualifying_results(2023, 'British Grand Prix')

core           INFO 	Loading data for British Grand Prix - Qualifying [v3.4.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '55', '63', '44', '23', '14', '10', '27', '18', '31', '2', '77', '11', '22', '24', '21', '20']


Unnamed: 0,DriverId,Position,Q1,Q2,Q3,Time
1,max_verstappen,1.0,0 days 00:01:29.428000,0 days 00:01:27.702000,0 days 00:01:26.720000,NaT
4,norris,2.0,0 days 00:01:28.917000,0 days 00:01:28.042000,0 days 00:01:26.961000,NaT
81,piastri,3.0,0 days 00:01:29.874000,0 days 00:01:27.845000,0 days 00:01:27.092000,NaT
16,leclerc,4.0,0 days 00:01:29.143000,0 days 00:01:28.361000,0 days 00:01:27.136000,NaT
55,sainz,5.0,0 days 00:01:29.865000,0 days 00:01:28.265000,0 days 00:01:27.148000,NaT
63,russell,6.0,0 days 00:01:29.412000,0 days 00:01:28.782000,0 days 00:01:27.155000,NaT
44,hamilton,7.0,0 days 00:01:29.415000,0 days 00:01:28.545000,0 days 00:01:27.211000,NaT
23,albon,8.0,0 days 00:01:29.466000,0 days 00:01:28.067000,0 days 00:01:27.530000,NaT
14,alonso,9.0,0 days 00:01:29.949000,0 days 00:01:28.368000,0 days 00:01:27.659000,NaT
10,gasly,10.0,0 days 00:01:29.533000,0 days 00:01:28.751000,0 days 00:01:27.689000,NaT


In [33]:
def merge_race_and_qualifying(race_results, qual_results):
    """
    Merge race results with qualifying results.

    Parameters:
    race_results (pd.DataFrame): DataFrame containing race results.
    qual_results (pd.DataFrame): DataFrame containing qualifying results.

    Returns:
    pd.DataFrame: A DataFrame combining race and qualifying results.
    """
    # Merge on common columns such as 'Driver'
    merged_results = pd.merge(race_results, qual_results, on='DriverId', how='left', suffixes=('_Race', '_Qual'))
    return merged_results

In [26]:
def prepare_f1_data(start_year, end_year):
    """
    This function retrieves race history data from the specified start year to end year, 
    merges it with qualifying results, and outputs a single dataset for each race.

    Parameters:
    start_year (int): The first year to include in the data.
    end_year (int): The last year to include in the data.

    Outputs:
    - 'f1_data_<start_year>_<end_year>.csv': CSV file containing merged race and qualifying data for all races within the specified year range.
    """
    all_data = []
    for year in range(start_year, end_year + 1):
        schedule = fastf1.get_event_schedule(year)
        for race in schedule['EventName']:
            race_results = get_race_results(year, race)
            qual_results = get_qualifying_results(year, race)
            merged_results = merge_race_and_qualifying(race_results, qual_results)
            merged_results['Year'] = year
            merged_results['RaceName'] = race
            all_data.append(merged_results)
    
    final_data = pd.concat(all_data, ignore_index=True)
    output_filename = f'f1_data_{start_year}_{end_year}.csv'
    final_data.to_csv(output_filename, index=False)
    print(f"Data preparation complete. File saved as '{output_filename}'.")


In [30]:
temp_1 = get_qualifying_results(2023, 'British Grand Prix')

core           INFO 	Loading data for British Grand Prix - Qualifying [v3.4.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '55', '63', '44', '23', '14', '10', '27', '18', '31', '2', '77', '11', '22', '24', '21', '20']


In [34]:
merge_race_and_qualifying(temp_result, temp_1)

Unnamed: 0,DriverNumber,BroadcastName,Abbreviation,DriverId,TeamName,TeamColor,TeamId,FirstName,LastName,FullName,...,Q1_Race,Q2_Race,Q3_Race,Time,Status,Points,Position_Qual,Q1_Qual,Q2_Qual,Q3_Qual
0,1,M VERSTAPPEN,VER,max_verstappen,Red Bull Racing,3671C6,red_bull,Max,Verstappen,Max Verstappen,...,NaT,NaT,NaT,0 days 01:25:16.938000,Finished,26.0,1.0,0 days 00:01:29.428000,0 days 00:01:27.702000,0 days 00:01:26.720000
1,4,L NORRIS,NOR,norris,McLaren,F58020,mclaren,Lando,Norris,Lando Norris,...,NaT,NaT,NaT,0 days 00:00:03.798000,Finished,18.0,2.0,0 days 00:01:28.917000,0 days 00:01:28.042000,0 days 00:01:26.961000
2,44,L HAMILTON,HAM,hamilton,Mercedes,6CD3BF,mercedes,Lewis,Hamilton,Lewis Hamilton,...,NaT,NaT,NaT,0 days 00:00:06.783000,Finished,15.0,7.0,0 days 00:01:29.415000,0 days 00:01:28.545000,0 days 00:01:27.211000
3,81,O PIASTRI,PIA,piastri,McLaren,F58020,mclaren,Oscar,Piastri,Oscar Piastri,...,NaT,NaT,NaT,0 days 00:00:07.776000,Finished,12.0,3.0,0 days 00:01:29.874000,0 days 00:01:27.845000,0 days 00:01:27.092000
4,63,G RUSSELL,RUS,russell,Mercedes,6CD3BF,mercedes,George,Russell,George Russell,...,NaT,NaT,NaT,0 days 00:00:11.206000,Finished,10.0,6.0,0 days 00:01:29.412000,0 days 00:01:28.782000,0 days 00:01:27.155000
5,11,S PEREZ,PER,perez,Red Bull Racing,3671C6,red_bull,Sergio,Perez,Sergio Perez,...,NaT,NaT,NaT,0 days 00:00:12.882000,Finished,8.0,16.0,0 days 00:01:29.968000,NaT,NaT
6,14,F ALONSO,ALO,alonso,Aston Martin,358C75,aston_martin,Fernando,Alonso,Fernando Alonso,...,NaT,NaT,NaT,0 days 00:00:17.193000,Finished,6.0,9.0,0 days 00:01:29.949000,0 days 00:01:28.368000,0 days 00:01:27.659000
7,23,A ALBON,ALB,albon,Williams,37BEDD,williams,Alexander,Albon,Alexander Albon,...,NaT,NaT,NaT,0 days 00:00:17.878000,Finished,4.0,8.0,0 days 00:01:29.466000,0 days 00:01:28.067000,0 days 00:01:27.530000
8,16,C LECLERC,LEC,leclerc,Ferrari,F91536,ferrari,Charles,Leclerc,Charles Leclerc,...,NaT,NaT,NaT,0 days 00:00:18.689000,Finished,2.0,4.0,0 days 00:01:29.143000,0 days 00:01:28.361000,0 days 00:01:27.136000
9,55,C SAINZ,SAI,sainz,Ferrari,F91536,ferrari,Carlos,Sainz,Carlos Sainz,...,NaT,NaT,NaT,0 days 00:00:19.448000,Finished,1.0,5.0,0 days 00:01:29.865000,0 days 00:01:28.265000,0 days 00:01:27.148000


In [39]:
def get_weather_data(year, race_name):
    """
    Fetch weather data for a specific race.

    Parameters:
    year (int): The year of the race
    race_name (str): The name of the race (e.g., 'British Grand Prix')

    Returns:
    pd.DataFrame: A DataFrame containing weather data for the specified race, 
                  including columns like 'AirTemp', 'TrackTemp', 'Humidity', 
                  'WindSpeed', 'WindDirection', and 'Rainfall'.
    """
    session = fastf1.get_session(year, race_name, 'R')
    session.load()
    weather_data = session.weather_data
    avg_weather = weather_data.mean()  # Taking average weather conditions
    return avg_weather.to_frame().T  # Return as DataFrame for consistency

In [38]:
get_weather_data(2023, 'British Grand Prix')

core           INFO 	Loading data for British Grand Prix - Race [v3.4.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '81', '63', '11', '14', '23', '16', '55', '2', '77', '27', '18', '24', '22', '21', '10', '20', '31']


Unnamed: 0,Time,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,WindDirection,WindSpeed
0,0 days 01:15:12.524052980,21.47351,56.18543,1001.225828,0.0,30.94106,205.397351,3.023841
