In [5]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import fastf1
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor

In [6]:
cache_folder = 'f1_data_cache'
if not os.path.exists(cache_folder):
    os.makedirs(cache_folder)
fastf1.Cache.enable_cache(cache_folder)

In [7]:
driver_numbers = {
    '81': 'PIA',  # Piastri
    '63': 'RUS',  # Russell
    '16': 'LEC',  # Leclerc
    '12': 'ANT',  # Antonelli
    '10': 'GAS',  # Gasly
    '4': 'NOR',   # Norris
    '1': 'VER',   # Verstappen
    '55': 'SAI',  # Sainz
    '44': 'HAM',  # Hamilton
    '22': 'TSU',  # Tsunoda
    '6': 'HAD',   # Hadjar
    '14': 'ALO',  # Alonso
    '31': 'OCO',  # Ocon
    '23': 'ALB',  # Albon
    '27': 'HUL',  # Hulkenberg
    '30': 'LAW',  # Lawson
    '18': 'STR',  # Stroll
    '87': 'BEA',  # Bearman
    '7': 'DOO', # Doohan
    '5': 'BOR' # Bortoleto
}
races = [
    ('Australia Prix'),
    ('China Prix'),
    ('Japan Prix'),
    ('Bahrain Prix'),
]

In [11]:
X_list = []
Y_list = []
for race_name in races:
    try:
        race = fastf1.get_session(2025, race_name, 'R')
        race.load()
        qualifying = fastf1.get_session(2025, race_name, 'Q')
        qualifying.load()

        race_results = race.results[['Abbreviation', 'Position']]
        qualifying_results = qualifying.results[['Abbreviation', 'Position']]

        merged = pd.merge(
            qualifying_results.rename(columns={'Position': 'QualifyingPosition'}),
            race_results.rename(columns={'Position': 'RaceResult'}),
            on='Abbreviation'
        )
        merged = merged.dropna()

        X_list.append(merged[['QualifyingPosition']])
        Y_list.append(merged['RaceResult'])

    except Exception as e:
        print(f"Error in {race_name}: {e}")

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '63', '12', '23', '18', '27', '16', '81', '44', '10', '22', '31', '87', '30', '5', '14', '55', '7', '6']
core           INFO 	Loading data for Australian Grand Prix 

In [12]:
X = pd.concat(X_list, ignore_index=True)
y = pd.concat(Y_list, ignore_index=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = GradientBoostingRegressor()
model.fit(X_train, y_train)

In [None]:
predict_quali = fastf1.get_session(2025, 'Saudi Arabia Prix', 'Q')
predict_quali.load()

predict_quali_results = predict_quali.results[['Abbreviation', 'Position']]
X_predict = predict_quali_results.rename(columns={'Position': 'QualifyingPosition'})

predictions = model.predict(X_predict[['QualifyingPosition']])

prediction_df = pd.DataFrame({
    'Driver': X_predict['Abbreviation'],
    'PredictedRacePosition': predictions
}).sort_values('PredictedRacePosition')

print(prediction_df)

core           INFO 	Loading data for Saudi Arabian Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '81', '63', '16', '12', '55', '44', '22', '10', '4', '23', '30', '14', '6', '87', '18', '7', '27', '31', '5']


    Driver  PredictedRacePosition
1     20.0                    1.0
63    16.0                    2.0
81    15.0                    3.0
16    12.0                    4.0
22    19.0                    5.0
44     9.0                    6.0
55    17.0                    7.0
10     7.0                    8.0
12     3.0                    9.0
4     13.0                   10.0
87     4.0                   11.0
7      6.0                   12.0
23     1.0                   13.0
18    18.0                   14.0
5      5.0                   15.0
14     2.0                   16.0
6      8.0                   17.0
30    11.0                   18.0
31    14.0                   19.0
27    10.0                   20.0
