### We want to load Historical data of all Belgian GPs, along with driver info, team info, tire compound, weather, sector and lap times

In [1]:
import fastf1
import pandas as pd

fastf1.Cache.enable_cache("../f1_cache")

In [52]:
years = [2022, 2023, 2024]
race = "Belgian"
all_laps = []

In [61]:
for year in years:
    try:
        session = fastf1.get_session(year, race, "Q")   # Get all details of the Qualifying sessions
        session.load()

        weather = session.weather_data
        avg_by_rain = weather.groupby(["Rainfall"]).mean()

        for drv in session.drivers:
            laps = session.laps.pick_driver(drv).pick_accurate()
            driver_best = laps[~laps['PitOutTime'].notna() & ~laps['PitInTime'].notna() & laps['IsPersonalBest'] == True]
            avg_sector_one = driver_best["Sector1Time"].dt.total_seconds().mean()
            avg_sector_two = driver_best["Sector2Time"].dt.total_seconds().mean()
            avg_sector_three = driver_best["Sector3Time"].dt.total_seconds().mean()
            avg_lap = driver_best['LapTime'].dt.total_seconds().mean()
            compound_counts = driver_best['Compound'].value_counts().to_dict()
            compound_used = max(compound_counts, key=compound_counts.get)

            team = session.get_driver(drv)['TeamName']

            all_laps.append({
                    'Year': year,
                    'Driver': drv,
                    'Team': team,
                    'AvgLapTime': avg_lap,
                    'Compound': compound_used,
                    'AvgSectorOneTime': avg_sector_one,
                    'AvgSectorTwoTime': avg_sector_two,
                    'AvgSectorThreeTime': avg_sector_three,
                    'AirTemp': avg_by_rain.loc[True]['AirTemp'],
                    'TrackTemp': avg_by_rain.loc[True]['TrackTemp'],
                    'Humidity': avg_by_rain.loc[True]['Humidity'],
                    'WindSpeed': avg_by_rain.loc[True]['WindSpeed']
                })
        
        
    except Exception as e:
        print(f"Failed for {year}: {e}")

core           INFO 	Loading data for Belgian Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '55', '11', '16', '31', '14', '44', '63', '23', '4', '3', '10', '24', '18', '47', '5', '6', '20', '22', '77']
core           INFO 	Loading data for Belgian Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cach

Failed for 2022: True


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '11', '44', '55', '81', '4', '63', '14', '18', '22', '10', '20', '77', '31', '23', '24', '2', '3', '27']
core           INFO 	Loading data for Belgian Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data


In [64]:
all_data = pd.DataFrame(all_laps)
all_data.dropna(inplace=True)
all_data.head()

Unnamed: 0,Year,Driver,Team,AvgLapTime,Compound,AvgSectorOneTime,AvgSectorTwoTime,AvgSectorThreeTime,AirTemp,TrackTemp,Humidity,WindSpeed
0,2023,1,Red Bull Racing,115.268125,INTERMEDIATE,32.8785,51.533,30.856625,18.0,21.6,93.0,0.6
1,2023,16,Ferrari,116.169375,INTERMEDIATE,32.846,52.033875,31.2895,18.0,21.6,93.0,0.6
2,2023,11,Red Bull Racing,115.084375,INTERMEDIATE,32.909625,51.42525,30.7495,18.0,21.6,93.0,0.6
3,2023,44,Mercedes,115.3945,INTERMEDIATE,32.688,51.8715,30.835,18.0,21.6,93.0,0.6
4,2023,55,Ferrari,115.801444,INTERMEDIATE,32.767333,52.079778,30.954333,18.0,21.6,93.0,0.6
