In [1]:
import fastf1
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
import os

In [2]:
os.makedirs("f1_cache", exist_ok=True)

fastf1.Cache.enable_cache('f1_cache')  
session_2024=fastf1.get_session(2024,17,"R")
session_2024.load()
laps_2024=session_2024.laps[["Driver","LapTime","Sector1Time","Sector2Time","Sector3Time"]].copy()
laps_2024.dropna(inplace=True)

core           INFO 	Loading data for Azerbaijan Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
_api           INFO 	Fetching lap count data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No

In [3]:
for col in ["LapTime","Sector1Time","Sector2Time","Sector3Time"]:
    laps_2024[f"{col} in seconds"]=laps_2024[col].dt.total_seconds()

In [4]:
sector_times=laps_2024.groupby("Driver").agg({
    "Sector1Time in seconds":"mean",
    "Sector2Time in seconds":"mean",
    "Sector3Time in seconds":"mean",
}).reset_index()

In [5]:
sector_times["TotalSectorTime"]=sector_times["Sector1Time in seconds"]+sector_times["Sector2Time in seconds"]+sector_times["Sector3Time in seconds"]

In [6]:
clean_air_pace = {
    "VER": 100.950061,  
    "SAI": 101.482406,
    "LAW": 101.707000,
    "ANT": 101.717000,
    "RUS": 100.847000, 
    "TSU": 102.143000, 
    "NOR": 99.588472,   
    "HAD": 102.372000,
    "PIA": 99.408667, 
    "LEC": 101.163578, 
    "ALO": 100.790656,
    "HAM": 100.236000,
    "BOR": 102.100000, 
    "STR": 100.845844, 
    "BEA": 102.300000, 
    "COL": 102.200000, 
    "OCO": 101.886118, 
    "GAS": 103.053000, 
    "ALB": 103.203000,  
    "HUL": 100.389938   
}


In [7]:
qualifying_times = pd.DataFrame({
    "Driver": [
        "VER", "SAI", "LAW", "ANT", "RUS", "TSU", "NOR", "HAD",
        "PIA", "LEC", "ALO", "HAM", "BOR", "STR", "BEA", "COL",
        "OCO", "GAS", "ALB", "HUL"
    ],
    "QualifyingTime": [
        101.117,  
        101.595,
        101.707,
        101.717, 
        102.070,
        102.143, 
        102.239, 
        102.372, 
        102.000,  
        101.717,  
        101.857, 
        102.183, 
        102.100, 
        102.653, 
        102.300,  
        102.200, 
        102.500, 
        103.053, 
        103.203, 
        101.353   
    ]
})

In [8]:
drive_wet_perf = {
    "VER": 0.975196,"HAM": 0.976484,
    "LEC": 0.975862,"NOR": 0.978179,"ALO": 0.972655,
    "RUS": 0.968678,"SAI": 0.978754,"TSU": 0.996388,
    "OCO": 0.981810,"GAS": 0.978832,"STR": 0.979857,
    "LAW": 0.977500,"ANT": 0.974800,"HAD": 0.980200,
    "PIA": 0.979000,"BOR": 0.975000,
    "BEA": 0.974200,"COL": 0.973800,"ALB": 0.977200,"HUL": 0.972900
}

In [9]:
qualifying_times["wetpreformance"]=qualifying_times["Driver"].map(drive_wet_perf)
print(qualifying_times)

   Driver  QualifyingTime  wetpreformance
0     VER         101.117        0.975196
1     SAI         101.595        0.978754
2     LAW         101.707        0.977500
3     ANT         101.717        0.974800
4     RUS         102.070        0.968678
5     TSU         102.143        0.996388
6     NOR         102.239        0.978179
7     HAD         102.372        0.980200
8     PIA         102.000        0.979000
9     LEC         101.717        0.975862
10    ALO         101.857        0.972655
11    HAM         102.183        0.976484
12    BOR         102.100        0.975000
13    STR         102.653        0.979857
14    BEA         102.300        0.974200
15    COL         102.200        0.973800
16    OCO         102.500        0.981810
17    GAS         103.053        0.978832
18    ALB         103.203        0.977200
19    HUL         101.353        0.972900


In [10]:
api_key = "88f5540bbc3c150c0287244283f16ba0"
lat = 40.37256
lon = 49.8533

url = f"https://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={api_key}&units=metric"

res = requests.get(url)
data = res.json()

# First, check what we got back
if "list" not in data:
    print("Error in API response:", data)
else:
    forecast_time = "2025-09-21 15:00:00"
    forecast_data = next((item for item in data["list"] if item["dt_txt"] == forecast_time), None)

    rain = forecast_data.get("pop", 0) if forecast_data else 0
    temperature = forecast_data["main"]["temp"] if forecast_data else 20

    print(f"Rain probability during race is : {rain*100}%, Temperature: {temperature}°C")

Rain probability during race is : 0%, Temperature: 20°C


In [11]:
qualifying_times["Quali Times"]=qualifying_times["QualifyingTime"]
if rain>0.75:
    qualifying_times["Quali Times"]=qualifying_times["QualifyingTime"]*qualifying_times["wetpreformance"]


In [12]:
team_points = {
    "McLaren": 617,
    "Mercedes": 260,
    "Ferrari": 280,
    "Red Bull": 239,
    "Williams": 86,
    "Racing Bulls": 61,
    "Aston Martin": 62,
    "Kick Sauber": 55,
    "Haas": 44,
    "Alpine": 20
}


In [13]:
max_points=max(team_points.values())
team_performance_score={team:points/max_points for team,points in team_points.items()}


In [14]:
drive_to_team = {
    "VER": "Red Bull",
    "TSU": "Red Bull",
    "NOR": "McLaren",
    "PIA": "McLaren",
    "LEC": "Ferrari",
    "HAM": "Ferrari",
    "RUS": "Mercedes",
    "ANT": "Mercedes",
    "SAI": "Williams",
    "ALB": "Williams",
    "ALO": "Aston Martin",
    "STR": "Aston Martin",
    "GAS": "Alpine",
    "OCO": "Alpine",
    "HUL": "Kick Sauber",
    "BOR": "Kick Sauber",
    "LAW": "Racing Bulls",
    "HAD": "Racing Bulls",
    "BEA": "Haas",
    "COL": "Haas"
}


qualifying_times["Team"]=qualifying_times["Driver"].map(drive_to_team)
qualifying_times["team_performance_score"]=qualifying_times["Team"].map(team_performance_score)

In [15]:
merged_data=qualifying_times.merge(sector_times[["Driver","TotalSectorTime"]],on="Driver",how="left")

In [16]:
merged_data=merged_data[merged_data["Driver"].isin(clean_air_pace.keys())]

In [17]:
merged_data["CleanAirPace"]=merged_data["Driver"].map(clean_air_pace)

In [18]:
print(merged_data)

   Driver  QualifyingTime  wetpreformance  Quali Times          Team  \
0     VER         101.117        0.975196      101.117      Red Bull   
1     SAI         101.595        0.978754      101.595      Williams   
2     LAW         101.707        0.977500      101.707  Racing Bulls   
3     ANT         101.717        0.974800      101.717      Mercedes   
4     RUS         102.070        0.968678      102.070      Mercedes   
5     TSU         102.143        0.996388      102.143      Red Bull   
6     NOR         102.239        0.978179      102.239       McLaren   
7     HAD         102.372        0.980200      102.372  Racing Bulls   
8     PIA         102.000        0.979000      102.000       McLaren   
9     LEC         101.717        0.975862      101.717       Ferrari   
10    ALO         101.857        0.972655      101.857  Aston Martin   
11    HAM         102.183        0.976484      102.183       Ferrari   
12    BOR         102.100        0.975000      102.100   Kick Sa

In [19]:
x=merged_data[["QualifyingTime","team_performance_score","CleanAirPace"]]

In [20]:
y=laps_2024.groupby("Driver")["LapTime"].mean().reset_index()
y["LapTime"] = y["LapTime"].dt.total_seconds() 

In [21]:
imputer=SimpleImputer(strategy="median")
x_imputed=imputer.fit_transform(x)
x_train,x_test,y_train,y_test=train_test_split(x_imputed,y["LapTime"],test_size=0.2,random_state=39)

In [22]:
model=XGBRegressor(n_estimators=100,learning_rate=0.01, random_state=39,max_depth=4,monotone_constraints=(1,-1,1),
                   colsample_bytree=0.6)
merged_data["PredictedRaceTime(XGB)"]=model.fit(x_train,y_train).predict(x_imputed)
final_results=merged_data[["Driver","PredictedRaceTime(XGB)"]].sort_values(by="PredictedRaceTime(XGB)")
print(final_results)

   Driver  PredictedRaceTime(XGB)
11    HAM              109.503426
8     PIA              109.503426
6     NOR              109.610985
9     LEC              109.659225
3     ANT              109.846283
0     VER              109.846283
2     LAW              109.846283
1     SAI              109.846283
12    BOR              109.846283
5     TSU              109.846283
4     RUS              109.846283
10    ALO              109.846283
15    COL              109.846283
19    HUL              109.846283
14    BEA              109.953835
13    STR              109.981651
7     HAD              110.000809
18    ALB              110.584648
16    OCO              110.708450
17    GAS              111.311455


In [23]:
y_pred=model.predict(x_test)
print(f"mean absolute error: {mean_absolute_error(y_test,y_pred)}")

mean absolute error: 0.44448415314257517


In [24]:
model2=RandomForestRegressor(n_estimators=300, random_state=39)
model2.fit(x_train,y_train)
merged_data["PredictedRaceTime(RFR)"]=model.fit(x_train,y_train).predict(x_imputed)
final_results_2=merged_data[["Driver","PredictedRaceTime(RFR)"]].sort_values(by="PredictedRaceTime(RFR)")
print(final_results_2)
y_pred2=model2.predict(x_test)
print(f"mean absolute error: {mean_absolute_error(y_test,y_pred2)}")

   Driver  PredictedRaceTime(RFR)
11    HAM              109.503426
8     PIA              109.503426
6     NOR              109.610985
9     LEC              109.659225
3     ANT              109.846283
0     VER              109.846283
2     LAW              109.846283
1     SAI              109.846283
12    BOR              109.846283
5     TSU              109.846283
4     RUS              109.846283
10    ALO              109.846283
15    COL              109.846283
19    HUL              109.846283
14    BEA              109.953835
13    STR              109.981651
7     HAD              110.000809
18    ALB              110.584648
16    OCO              110.708450
17    GAS              111.311455
mean absolute error: 0.5628767560157257


In [25]:
model3=GradientBoostingRegressor(n_estimators=300, learning_rate=0.1, random_state=39)
model3.fit(x_train,y_train)
merged_data["PredictedRaceTime(GBR)"]=model3.predict(x_imputed)
final_results_3=merged_data[["Driver","PredictedRaceTime(GBR)"]].sort_values(by="PredictedRaceTime(GBR)")
print(final_results_3)
y_pred3=model3.predict(x_test)
print(f"mean absolute error: {mean_absolute_error(y_test,y_pred3)}")

   Driver  PredictedRaceTime(GBR)
12    BOR              108.403470
5     TSU              108.425151
15    COL              108.478562
11    HAM              108.484896
8     PIA              108.607878
18    ALB              108.929208
9     LEC              108.943061
14    BEA              109.951726
6     NOR              110.018918
0     VER              110.075021
1     SAI              110.077531
13    STR              110.088672
7     HAD              110.118367
4     RUS              110.127429
19    HUL              110.355854
10    ALO              110.547312
2     LAW              110.677076
16    OCO              110.683318
3     ANT              110.970458
17    GAS              115.307846
mean absolute error: 0.874960032013476


In [26]:
from sklearn.model_selection import RandomizedSearchCV
param_grid = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_depth': [3, 4, 5, 6],
    'learning_rate': [0.01, 0.05, 0.1, 0.3],
    'colsample_bytree': [0.6, 0.8, 1.0],
}

xgb_model = XGBRegressor(random_state=39)

random_search = RandomizedSearchCV(
    estimator=xgb_model,
    param_distributions=param_grid,
    n_iter=50,        
    scoring='neg_mean_absolute_error',
    cv=3,
    verbose=2,
    random_state=39,
    n_jobs=-1
)

random_search.fit(x_imputed, y["LapTime"])

Fitting 3 folds for each of 50 candidates, totalling 150 fits


In [27]:
model4=XGBRegressor(**random_search.best_params_,monotone_constraints=(1,-1,1), random_state=39)
model4.fit(x_train,y_train)
merged_data["PredictedRaceTime(Tuned XGB)"]=model4.predict(x_imputed)
final_results_4=merged_data[["Driver","PredictedRaceTime(Tuned XGB)"]].sort_values(by="PredictedRaceTime(Tuned XGB)")
print(final_results_4)
print(f"mean_absolute_error: {mean_absolute_error(y_test,model4.predict(x_test))}")

   Driver  PredictedRaceTime(Tuned XGB)
11    HAM                    109.503426
8     PIA                    109.503426
6     NOR                    109.610985
9     LEC                    109.659225
3     ANT                    109.846283
0     VER                    109.846283
2     LAW                    109.846283
1     SAI                    109.846283
12    BOR                    109.846283
5     TSU                    109.846283
4     RUS                    109.846283
10    ALO                    109.846283
15    COL                    109.846283
19    HUL                    109.846283
14    BEA                    109.953835
13    STR                    109.981651
7     HAD                    110.000809
18    ALB                    110.584648
16    OCO                    110.708450
17    GAS                    111.311455
mean_absolute_error: 0.44448415314257517


In [28]:
podium=final_results.head(3)
print(f"""🥇P1:{podium.iloc[0]["Driver"]} 
🥈P2:{podium.iloc[1]["Driver"]} 
🥉P3:{podium.iloc[2]["Driver"]}""")

🥇P1:HAM 
🥈P2:PIA 
🥉P3:NOR
