In [94]:
import numpy as np 
import pandas as pd 
import fastf1
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

In [95]:
# Get data from fastf1 API
session_2023_r = fastf1.get_session(2023, "Japan", 'R')
session_2023_r.load()
print(session_2023_r.results)
# session_2023_q = fastf1.get_session(2023, "Japan", 'Q')
# session_2023_q.load()
# session_2023_q.results
session_2024_r = fastf1.get_session(2024, "Japan", 'R')
session_2024_r.load()


core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '44', '55', '63', '14', '31', '10', '40', '22', '24', '27', '20', '23', '2', '18', '11', '77']
core           INFO 	Loading data for Japanese Grand Prix - 

   DriverNumber BroadcastName Abbreviation         DriverId         TeamName  \
1             1  M VERSTAPPEN          VER   max_verstappen  Red Bull Racing   
4             4      L NORRIS          NOR           norris          McLaren   
81           81     O PIASTRI          PIA          piastri          McLaren   
16           16     C LECLERC          LEC          leclerc          Ferrari   
44           44    L HAMILTON          HAM         hamilton         Mercedes   
55           55       C SAINZ          SAI            sainz          Ferrari   
63           63     G RUSSELL          RUS          russell         Mercedes   
14           14      F ALONSO          ALO           alonso     Aston Martin   
31           31        E OCON          OCO             ocon           Alpine   
10           10       P GASLY          GAS            gasly           Alpine   
40           40      L LAWSON          LAW           lawson       AlphaTauri   
22           22     Y TSUNODA          T

req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '4', '14', '63', '81', '44', '22', '27', '18', '20', '77', '31', '10', '2', '24', '3', '23']


In [96]:
# Get lap times
laps_2023 = session_2023_r.laps[["Driver","LapTime"]].copy()
# Get grid positions
grid_2023 = session_2023_r.results[["Abbreviation","GridPosition","Position"]].copy()
# Clean data and convert to seconds
laps_2023.dropna(subset=["LapTime"],inplace=True)
laps_2023["LapTime (s)"] = laps_2023["LapTime"].dt.total_seconds()

# Map full driver names to driver abbreviations
driver_mapping = {
    "Lando Norris": "NOR", "Oscar Piastri": "PIA", "Max Verstappen": "VER", "George Russell": "RUS",
    "Yuki Tsunoda": "TSU", "Alexander Albon": "ALB", "Charles Leclerc": "LEC", "Lewis Hamilton": "HAM",
    "Pierre Gasly": "GAS", "Carlos Sainz": "SAI", "Lance Stroll": "STR", "Fernando Alonso": "ALO",
    "Esteban Ocon": "OCO", "Liam Lawson": "LAW", "Guanyu Zhou": "ZHO", "Nico Hulkenberg": "HUL",
    "Kevin Magnussen": "MAG", "Logan Sargeant": "SAR", "Sergio Perez": "PER", "Valtteri Bottas": "BOT"
}

laps_2023["Driver"].map(driver_mapping)
# print(laps_2023)
# print(grid_2023)

# Merge lap times and grid positions according to driver abbreviation
merged_data = pd.merge(laps_2023,grid_2023,left_on="Driver",right_on="Abbreviation")
print(merged_data)

## WORK IN PROGRESS: Get quali times: Q3 if possible, otherwise Q2 time, otherwise Q1 time 
# quali_2023 = session_2023_q.laps
# print(quali_2023)



    Driver                LapTime  LapTime (s) Abbreviation  GridPosition  \
0      VER 0 days 00:02:00.179000      120.179          VER           1.0   
1      VER 0 days 00:01:36.748000       96.748          VER           1.0   
2      VER 0 days 00:01:37.837000       97.837          VER           1.0   
3      VER 0 days 00:01:38.033000       98.033          VER           1.0   
4      VER 0 days 00:01:38.148000       98.148          VER           1.0   
..     ...                    ...          ...          ...           ...   
814    PER 0 days 00:02:12.535000      132.535          PER           5.0   
815    BOT 0 days 00:02:54.634000      174.634          BOT          16.0   
816    BOT 0 days 00:02:20.615000      140.615          BOT          16.0   
817    BOT 0 days 00:02:02.755000      122.755          BOT          16.0   
818    BOT 0 days 00:01:46.852000      106.852          BOT          16.0   

     Position  
0         1.0  
1         1.0  
2         1.0  
3         1

In [97]:
# Set lap times and grid position as features, race position as outcome
X = merged_data[["LapTime (s)","GridPosition"]]
y = merged_data[["Position"]]

# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print(X_train)

# Choose and fit model
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1)
model.fit(X_train, y_train)


     LapTime (s)  GridPosition
171       98.120           4.0
583       98.013           9.0
802       99.112          17.0
310      118.599           8.0
275       98.537           6.0
..           ...           ...
257       99.677           6.0
206       99.952           7.0
353       99.138          10.0
601      101.337          19.0
359      104.729          10.0

[655 rows x 2 columns]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?


In [98]:
# Create and clean 2024 data for prediction
# Get lap times
laps_2024 = session_2024_r.laps[["Driver","LapTime"]].copy()
# Get grid positions
grid_2024 = session_2024_r.results[["Abbreviation","GridPosition","Position"]].copy()
# Clean data and convert to seconds
laps_2024.dropna(subset=["LapTime"],inplace=True)
laps_2024["LapTime (s)"] = laps_2024["LapTime"].dt.total_seconds()

laps_2024["Driver"].map(driver_mapping)

# Merge lap times and grid positions according to driver abbreviation
merged_data_2024 = pd.merge(laps_2024,grid_2024,left_on="Driver",right_on="Abbreviation")

In [99]:
X_2024 = merged_data_2024[["LapTime (s)","GridPosition"]]
y_2024 = merged_data_2024[["Position"]]

pred_pos = model.predict(X_2024)
laps_2024["Predicted Position"] = pred_pos.round()
print(laps_2024)

# Rank by predicted position
laps_2024 = laps_2024.sort_values(by="Predicted Position")
laps_2024 = laps_2024.drop_duplicates()

# Print final predictions
print("\n🏁 Predicted 2024 Japanese GP Winner 🏁\n")
print(laps_2024[["Driver", "Predicted Position"]])



    Driver                LapTime  LapTime (s)  Predicted Position
0      VER 0 days 00:02:10.735000      130.735                 2.0
3      VER 0 days 00:01:36.472000       96.472                 1.0
4      VER 0 days 00:01:36.437000       96.437                 1.0
5      VER 0 days 00:01:36.855000       96.855                 1.0
6      VER 0 days 00:01:36.970000       96.970                 1.0
..     ...                    ...          ...                 ...
900    ZHO 0 days 00:01:58.402000      118.402                17.0
901    ZHO 0 days 00:01:37.160000       97.160                17.0
902    ZHO 0 days 00:01:37.500000       97.500                17.0
903    ZHO 0 days 00:01:41.117000      101.117                17.0
904    ZHO 0 days 00:02:08.193000      128.193                17.0

[876 rows x 4 columns]

🏁 Predicted 2024 Japanese GP Winner 🏁

    Driver  Predicted Position
50     VER                 1.0
30     VER                 1.0
48     VER                 1.0
47     V