In [14]:
from sqlalchemy import create_engine, func
from sqlalchemy.orm import sessionmaker
import sys
import os
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../")))
from DB.models import init_db, Circuit, Season, RacingWeekend, Driver, Session, SessionResult, Lap, Team, DriverTeamSession, TeamCircuitStats, PitStop
from utils import setup_race_data

import numpy as np
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from itertools import product

# Initialize database connection
global db_session
engine, db_session = init_db()

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option('display.max_colwidth', None) 


## 1) Query race into df and pre process

In [15]:
def get_race_df(year, circuit):
	race_session = (db_session.query(Session)
				.join(RacingWeekend, Session.weekend_id == RacingWeekend.racing_weekend_id)
				.join(Circuit, RacingWeekend.circuit_id == Circuit.circuit_id)
				.filter(
					RacingWeekend.year == year,
					Circuit.circuit_name == circuit,
					Session.session_type == "Race"
				)
				.first())
	
	laps = race_session.laps

	session_results = (
		db_session.query(SessionResult.position, Driver.driver_num)
		.join(Session, Session.session_id == SessionResult.session_id)
		.join(Driver, Driver.driver_id == SessionResult.driver_id)
		.filter(SessionResult.session_id == race_session.session_id)
		.all()
	)

	# Convert session results to a dictionary
	starting_positions = {driver_num: position for position, driver_num in session_results}

	# Convert to DataFrame
	laps_data = []
	for lap in laps:
		# Add a row for Sector 1
		laps_data.append({
			"lap_num": lap.lap_num,
			"sector": 1,
			"stint_num": lap.stint_num,
			"stint_lap": lap.stint_lap,
			"position": lap.position,
			"driver_name": lap.driver.driver_name,
			"driver_number": lap.driver.driver_num,
			"sector_time": lap.s1_time,
			"tyre_type": lap.tyre_type,
			"tyre_laps": lap.tyre_laps,
			"pit": lap.pit,
			"track_status": lap.track_status
		})

		# Add a row for Sector 2
		laps_data.append({
			"lap_num": lap.lap_num,
			"sector": 2,
			"stint_num": lap.stint_num,
			"stint_lap": lap.stint_lap,
			"position": lap.position,
			"driver_name": lap.driver.driver_name,
			"driver_number": lap.driver.driver_num,
			"sector_time": lap.s2_time,
			"tyre_type": lap.tyre_type,
			"tyre_laps": lap.tyre_laps,
			"pit": lap.pit,
			"track_status": lap.track_status
		})

		# Add a row for Sector 3
		laps_data.append({
			"lap_num": lap.lap_num,
			"sector": 3,
			"stint_num": lap.stint_num,
			"stint_lap": lap.stint_lap,
			"position": lap.position,
			"driver_name": lap.driver.driver_name,
			"driver_number": lap.driver.driver_num,
			"sector_time": lap.s3_time,
			"tyre_type": lap.tyre_type,
			"tyre_laps": lap.tyre_laps,
			"pit": lap.pit,
			"track_status": lap.track_status
		})

	# Create a DataFrame from the list of dictionaries
	df = pd.DataFrame(laps_data)
	
	df["starting_position"] = None  # Initialize column with None
	for driver_num, grid_pos in starting_positions.items():
		# Find the first occurrence of the driver
		first_row_index = df[df["driver_number"] == driver_num].index[0]
		df.at[first_row_index, "starting_position"] = grid_pos

	df = df.sort_values(["lap_num", "sector", "position"]).reset_index(drop=True)

	return df


def add_race_data(df):
	# Calculate cumulative race time for each driver
	df["cumulative_time"] = df.groupby("driver_name")["sector_time"].cumsum()
 
	# Calculate rolling pace (average lap time over the last 5 laps)
	df["pace"] = (
		df.groupby(["driver_name", "sector"])["sector_time"]
		.rolling(window=5, min_periods=1)
		.mean()
		.reset_index(level=[0, 1], drop=True)
	)

	# Get car ahead"s cumulative time (car immediately ahead in position for each lap)
	df["front_cumulative_time"] = df.groupby(["lap_num", "sector"])["cumulative_time"].shift(1)
	# This gap is calculated only for drivers who are not in the lead position (position > 1)
	df["gap"] = df["cumulative_time"] - df["front_cumulative_time"]
	df["gap"] = df["gap"].fillna(0)  # Leader has no car ahead, so gap is 0

	# Calculate tyre difference (compared to car immediately ahead in THIS Sector)
	df["front_tyre"] = df.groupby(["lap_num", "sector"])["tyre_type"].shift(1)
	df["tyre_diff"] = df["front_tyre"] - df["tyre_type"]
	df["tyre_diff"] = df["tyre_diff"].fillna(0)  # Leader has no car ahead

	# Calculate tyre age difference (compared to car immediately ahead in THIS Sector)
	df["front_laps"] = df.groupby(["lap_num", "sector"])["stint_lap"].shift(1)
	df["stint_laps_diff"] = df["front_laps"] - df["stint_lap"]
	df["stint_laps_diff"] = df["stint_laps_diff"].fillna(0)  # Leader has no car ahead

	# Calculate DRS availability (within 1s of car ahead IN THIS Sector)
	df["drs_available"] = (
		(df["gap"] <= 1) &
		(df["position"] > 1) &
		(df["lap_num"] > 1)
	)

	# Create target variable for overtaking model (done where its 1 if the driver got overtaken (improved accuracy))
	df["next_position"] = df.groupby("driver_name")["position"].shift(1) 
	df["overtaken"] = ((df["next_position"] < df["position"]) | 
					  (df["next_position"].isna()))
	

	# Cleanup and final sorting
	df = df.drop(columns=["front_cumulative_time", "front_tyre", "next_position"])
	# df = df.sort_values(["lap_num", "sector", "position"]).reset_index(drop=True)

	try:
		new_order = [
			"lap_num", "sector", "stint_num", "stint_lap", "position", "driver_name",
			"driver_number", "sector_time", "gap", "cumulative_time", "tyre_type", "tyre_laps", 
			"pit", "drs_available", "overtaken", "tyre_diff", "front_laps", "stint_laps_diff", "track_status", "pace", "starting_position"
		]

		df = df[new_order]
	except:
		print("dont care")
	return df


df = get_race_df(2023, "Sakhir")
df = add_race_data(df)

# df[df["sector_time"].isna()]
# df[df["driver_name"]=="Logan Sargeant"]
df.head(5)

Unnamed: 0,lap_num,sector,stint_num,stint_lap,position,driver_name,driver_number,sector_time,gap,cumulative_time,tyre_type,tyre_laps,pit,drs_available,overtaken,tyre_diff,front_laps,stint_laps_diff,track_status,pace,starting_position
0,1,1,1,1,1,Max Verstappen,33,,0.0,,1,4,False,False,True,0.0,,0.0,12,,1
1,1,1,1,1,2,Charles Leclerc,16,,0.0,,1,1,False,False,True,0.0,1.0,0.0,12,,19
2,1,1,1,1,3,Sergio Perez,11,,0.0,,1,4,False,False,True,0.0,1.0,0.0,12,,2
3,1,1,1,1,4,Carlos Sainz,55,,0.0,,1,4,False,False,True,0.0,1.0,0.0,12,,4
4,1,1,1,1,5,Lewis Hamilton,44,,0.0,,1,4,False,False,True,0.0,1.0,0.0,12,,5


### Now create overtaking model

In [16]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.utils.class_weight import compute_sample_weight

# Define features and target
features = [
	"gap",
	"pace",
	"tyre_diff",
	"stint_laps_diff",
	"drs_available",
	"cumulative_time",
	"sector_time",
	"pit"
]

X = df[features]
y = df["overtaken"]

from imblearn.over_sampling import SMOTE



# Train the model on the resampled data

# Handle missing values
X = X.fillna(X.mean())

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)


# Base model
gbc = GradientBoostingClassifier(
	n_estimators=200,
	learning_rate=0.05,
	max_depth=3,
	
	subsample=0.8,
	random_state=42
)



# # Calibrate for better probabilities
model = CalibratedClassifierCV(gbc, method="sigmoid", cv=3)

# # Train on all data
model.fit(X_resampled, y_resampled)

# from lightgbm import LGBMClassifier

# lgbm = LGBMClassifier(
# 	n_estimators=100,
# 	learning_rate=0.1,
# 	max_depth=3,
# 	subsample=0.8,
# 	random_state=42
# )
# model = CalibratedClassifierCV(lgbm, method="isotonic", cv=3)
# model.fit(X, y, sample_weight=sample_weights)


feature_means = X.mean()

def predict_overtake(new_race_df):
	# Select the same features used during training
	X_new = new_race_df[features]
	
	# Handle missing values (if any)
	X_new = X_new.fillna(feature_means)
	
	# Make predictions using the trained model
	new_race_df["predicted_overtake"] = model.predict(X_new)
	
	return new_race_df

In [17]:
from sklearn.metrics import accuracy_score, classification_report

# Load the 2023 race data


# Predict overtakes using the rule-based function
new_race_df = predict_overtake(df)

# Calculate accuracy
actual_overtakes = new_race_df["overtaken"]
predicted_overtakes = new_race_df["predicted_overtake"]
accuracy = accuracy_score(actual_overtakes, predicted_overtakes)
print(f"Accuracy: {accuracy:.3f}")

# Generate classification report
print("\nClassification Report:")
print(classification_report(
	actual_overtakes,
	predicted_overtakes,
	target_names=["No Overtake", "Overtaken"]
))

Accuracy: 0.997

Classification Report:
              precision    recall  f1-score   support

 No Overtake       1.00      1.00      1.00      3068
   Overtaken       0.96      0.93      0.94        97

    accuracy                           1.00      3165
   macro avg       0.98      0.96      0.97      3165
weighted avg       1.00      1.00      1.00      3165



Accuracy: 0.997

Classification Report:
			  precision    recall  f1-score   support

 No Overtake       1.00      1.00      1.00      3068
	Overtaken       0.96      0.93      0.94        97

	accuracy                           1.00      3165
	macro avg       0.98      0.96      0.97      3165
weighted avg       1.00      1.00      1.00      3165

In [18]:
race_data = setup_race_data(df)




In [19]:
import pandas as pd

def race_sim(precomputed_data, given_driver=None, simulated_strategy=None):
	"""
	simulated_strategy
	"""
	# Extract precomputed data
	driver_tyre_coefficients = precomputed_data["driver_tyre_coefficients"]
	driver_strategies = precomputed_data["driver_strategies"]
	max_laps = precomputed_data["max_laps"]
	drivers = precomputed_data["drivers"]
	driver_names = precomputed_data["driver_names"]
	initial_positions = precomputed_data["initial_positions"]
	base_sector_times = precomputed_data["base_sector_times"]
	fuel_corrections = precomputed_data["fuel_corrections"]

	drivers_data = []

	if given_driver and simulated_strategy:
		driver_strategies[given_driver] = simulated_strategy

	for driver in drivers:
		drivers_data.append({
			"driver_number": driver,
			"driver_name": driver_names[driver],
			"pit_schedule": driver_strategies[driver],
			"tyre_type": driver_strategies[driver][1],
			"lap_num": 1,
			"sector": 0,
			"sector_time": 0,
			"stint_lap": 1,
			"cumulative_time": 0.0,
			"gap": 0,
			"pit": False,
			"position": initial_positions[driver],
			"consecutive_laps_within_2s": 0,
			"base_sector_times": base_sector_times[driver],
			"pace": 0,  # Initialize pace as 0
			"tyre_diff": 0,  # Initialize tyre difference as 0
			"stint_laps_diff": 0,  # Initialize stint laps difference as 0
			"drs_available": False,  # Initialize DRS availability as False
		})
		
	drivers_df = pd.DataFrame(drivers_data)

	simulated_data = []
	for lap in range(2, max_laps + 1):

		drivers_df["lap_num"] += 1
		drivers_df["stint_lap"] += 1

		for sector in range(1, 4):
			drivers_df["sector"] = sector
			for index, row in drivers_df.iterrows():

				# Handle pit stops at the start of a lap (sector 1)
				if sector == 1 and lap in row["pit_schedule"]:
					drivers_df.at[index, "pit"] = True  # Mark pit stop
					drivers_df.at[index, "cumulative_time"] += 20  # Add pit stop penalty
					drivers_df.at[index, "stint_lap"] = 1  # Reset stint lap
					drivers_df.at[index, "tyre_type"] = row["pit_schedule"][lap]  # Change tyre

				else:
					drivers_df.at[index, "pit"] = False

				# Calculate tyre degradation coefficients
				a, b, c = driver_tyre_coefficients[row["driver_number"]][sector][drivers_df.at[index, "tyre_type"]]
				sector_time = (
					row["base_sector_times"][sector]  # Base sector time
					+ (a * drivers_df.at[index, "stint_lap"]**2 + b * drivers_df.at[index, "stint_lap"] + c)  # Tyre degradation
					+ fuel_corrections[lap]  # Fuel effect
				)

				# Update sector time and cumulative time
				drivers_df.at[index, "sector_time"] = sector_time
				drivers_df.at[index, "cumulative_time"] += sector_time

				# drivers_df = drivers_df.sort_values(by="cumulative_time", ascending=True)

				# Reassign positions based on the sorted order
				# drivers_df["position"] = range(1, len(drivers_df) + 1)

				ahead_pos = row["position"] - 1

				if ahead_pos > 0:
					ahead_time = drivers_df.loc[drivers_df["position"] == ahead_pos, "cumulative_time"].values[0]

					gap = ahead_time - row["cumulative_time"]
					drivers_df.at[index, "gap"] = gap
					
				else:
					gap = 0
					drivers_df.at[index, "gap"] = 0

				# calc overtakes
				if gap < 0:
					if drivers_df.at[ahead_index, "cumulative_time"] > drivers_df.at[index, "cumulative_time"]:
						drivers_df.at[ahead_index, "cumulative_time"], drivers_df.at[index, "cumulative_time"] = (
							drivers_df.at[index, "cumulative_time"],
							drivers_df.at[ahead_index, "cumulative_time"],
						)

				elif gap < 0.8 and ahead_pos > 0:
					if ahead_pos > 0:
						ahead_row = drivers_df.loc[drivers_df["position"] == ahead_pos].iloc[0]
						   
						df["pace"] = (
							df.groupby(["driver_name", "sector"])["sector_time"]
							.rolling(window=5, min_periods=1)
							.mean()
							.reset_index(level=[0, 1], drop=True)
						)
						
						# Tyre difference
						drivers_df.at[index, "tyre_diff"] = ahead_row["tyre_type"] - row["tyre_type"]

						# Stint laps difference
						drivers_df.at[index, "stint_laps_diff"] = ahead_row["stint_lap"] - row["stint_lap"]

						# DRS availability
						drivers_df.at[index, "drs_available"] = gap <= 1.0

					else:
						drivers_df.at[index, "pace"] = (
							drivers_df.loc[
								(drivers_df["driver_number"] == row["driver_number"]) &
								(drivers_df["lap_num"] >= lap - 5),
								"sector_time"
							].mean()
						)

						drivers_df.at[index, "tyre_diff"] = 0

						# Stint laps difference
						drivers_df.at[index, "stint_laps_diff"] = 0

						# DRS availability
						drivers_df.at[index, "drs_available"] = 0

					
					drivers_df = predict_overtake(drivers_df)

					ahead_index = drivers_df[drivers_df["position"] == ahead_pos].index[0]
					# Check if an overtake is predicted
					if drivers_df.at[index, "predicted_overtake"]:
						# Swap positions between the current driver and the driver ahead
						current_position = drivers_df.at[index, "position"]

						# Swap positions
						drivers_df.at[index, "position"], drivers_df.at[ahead_index, "position"] = (
							drivers_df.at[ahead_index, "position"],
							drivers_df.at[index, "position"],
						)

						drivers_df.at[index, "cumulative_time"], drivers_df.at[ahead_index, "cumulative_time"] = (
							drivers_df.at[ahead_index, "cumulative_time"],
							drivers_df.at[index, "cumulative_time"],
						)

					else:
						# if the driver behind is faster overall, then keep them behind
						if drivers_df.at[ahead_index, "cumulative_time"] > drivers_df.at[index, "cumulative_time"]:
							drivers_df.at[ahead_index, "cumulative_time"], drivers_df.at[index, "cumulative_time"] = (
								drivers_df.at[index, "cumulative_time"],
								drivers_df.at[ahead_index, "cumulative_time"],
							)


	return drivers_df

# print(race_data)
sim_df = race_sim(race_data, 16, {1:1, 15:1, 35:3})
sim_df

drivers_df = sim_df.sort_values(by="position", ascending=True)


# Reset the index (optional, for cleaner output)
drivers_df = drivers_df.reset_index(drop=True)
drivers_df

  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


Unnamed: 0,driver_number,driver_name,pit_schedule,tyre_type,lap_num,sector,sector_time,stint_lap,cumulative_time,gap,pit,position,consecutive_laps_within_2s,base_sector_times,pace,tyre_diff,stint_laps_diff,drs_available,predicted_overtake
0,33,Max Verstappen,"{15: 1, 37: 3, 1: 1}",3,57,3,23.818352,21,5713.607516,0.0,False,1,0,"{1: 30.22119298245614, 2: 41.314, 3: 22.741999999999997}",0,0,0,False,False
1,11,Sergio Perez,"{18: 1, 35: 3, 1: 1}",3,57,3,23.875931,23,5712.180731,25.302716,False,2,0,"{1: 30.208192982456143, 2: 41.508736842105264, 3: 22.968999999999998}",0,0,0,False,False
2,14,Fernando Alonso,"{15: 3, 35: 3, 1: 1}",3,57,3,23.842212,23,5688.892229,47.130714,False,3,0,"{1: 30.154771929824562, 2: 41.03743859649123, 3: 23.05417543859649}",0,0,0,False,False
3,55,Carlos Sainz,"{14: 3, 32: 3, 1: 1}",3,57,3,24.176708,26,5752.380474,-60.532527,False,4,0,"{1: 30.02519298245614, 2: 41.93887719298245, 3: 23.09}",0,0,0,True,False
4,44,Lewis Hamilton,"{13: 3, 31: 3, 1: 1}",3,57,3,24.060567,27,5711.770085,64.670956,False,5,0,"{1: 29.933877192982457, 2: 41.58843859649123, 3: 22.93257894736842}",0,0,0,False,False
5,18,Lance Stroll,"{16: 3, 31: 3, 1: 1}",3,57,3,24.135214,27,5700.902104,35.003195,False,6,0,"{1: 30.112192982456143, 2: 41.051947368421054, 3: 23.033947368421053}",0,0,0,False,False
6,63,George Russell,"{14: 3, 32: 3, 1: 1}",3,57,3,23.987645,26,5749.759252,-47.882317,False,7,0,"{1: 29.982175438596492, 2: 41.70487719298246, 3: 23.003877192982458}",0,0,-17,True,True
7,77,Valtteri Bottas,"{12: 3, 30: 3, 1: 1}",3,57,3,24.229132,28,5740.167765,33.820618,False,8,0,"{1: 30.272877192982456, 2: 41.596649122807015, 3: 23.151280701754388}",0,0,0,False,False
8,10,Pierre Gasly,"{10: 3, 26: 3, 41: 1, 1: 1}",1,57,3,24.568446,17,5731.160264,33.575947,False,9,0,"{1: 29.760228070175437, 2: 41.08422807017544, 3: 23.101684210526315}",0,0,0,False,False
9,23,Alexander Albon,"{12: 1, 27: 3, 41: 1, 1: 1}",1,57,3,24.240605,17,5780.178799,-49.346375,False,10,0,"{1: 30.183894736842106, 2: 41.988052631578945, 3: 23.158280701754386}",0,0,0,True,False


In [20]:
def get_accuracy(year, circuit, sim_df):
	# Fetch the actual race session from the database
	race_session = (
		db_session.query(Session)
		.join(RacingWeekend, Session.weekend_id == RacingWeekend.racing_weekend_id)
		.join(Circuit, RacingWeekend.circuit_id == Circuit.circuit_id)
		.filter(
			RacingWeekend.year == year,
			Circuit.circuit_name == circuit,
			Session.session_type == "Race"
		)
		.first()
	)

	if not race_session:
		raise ValueError(f"No race session found for {year} at {circuit}")

	# Fetch the actual race laps and results
	laps = race_session.laps
	session_results = (
		db_session.query(SessionResult.position, Driver.driver_num)
		.join(Session, Session.session_id == SessionResult.session_id)
		.join(Driver, Driver.driver_id == SessionResult.driver_id)
		.filter(SessionResult.session_id == race_session.session_id)
		.all()
	)

	# Convert session_results to a dictionary for easy lookup
	actual_results = {driver_num: position for position, driver_num in session_results}

	# Extract simulated results from sim_df
	sim_results = sim_df.groupby("driver_number").last()["position"].to_dict()

	# Ensure both results have the same drivers
	common_drivers = set(actual_results.keys()).intersection(sim_results.keys())
	if not common_drivers:
		raise ValueError("No common drivers found between actual and simulated results")

	# Filter results to only include common drivers
	actual_positions = [actual_results[driver] for driver in common_drivers]
	sim_positions = [sim_results[driver] for driver in common_drivers]

	# Calculate accuracy metrics
	position_accuracy = sum(1 for a, s in zip(actual_positions, sim_positions) if a == s) / len(common_drivers)
	top_3_accuracy = sum(1 for a, s in zip(actual_positions, sim_positions) if (a <= 3 and s <= 3)) / len(common_drivers)
	mean_error = sum(abs(a - s) for a, s in zip(actual_positions, sim_positions)) / len(common_drivers)
	total_error = sum(abs(a - s) for a, s in zip(actual_positions, sim_positions))

	# Return accuracy metrics
	return {
		"position_accuracy": position_accuracy,
		"top_3_accuracy": top_3_accuracy,
		"mean_error": mean_error,
		"total_error": total_error,


	}

get_accuracy(2023, "Sakhir", drivers_df)

{'position_accuracy': 1.0,
 'top_3_accuracy': 0.15,
 'mean_error': 0.0,
 'total_error': 0}

In [21]:
import pandas as pd
from skopt import gp_minimize
from skopt.space import Integer, Categorical

def optimize_strategy(precomputed_data, n_calls=30):
    max_laps = precomputed_data["max_laps"]
    driver_number = 14  # Target driver to optimize for

    # Define the search space: start_tyre, pit1_lap, pit1_tyre, pit2_lap, pit2_tyre
    space = [
        Categorical([1, 2, 3], name='start_tyre'),
        Integer(2, max_laps - 1, name='pit1_lap'),
        Categorical([1, 2, 3], name='pit1_tyre'),
        Integer(2, max_laps - 1, name='pit2_lap'),
        Categorical([1, 2, 3], name='pit2_tyre'),
    ]

    def objective(params):
        start_tyre, pit1_lap, pit1_tyre, pit2_lap, pit2_tyre = params

        # Check if pit stops are in a valid order and laps are within bounds
        if pit1_lap >= pit2_lap or pit1_lap < 2 or pit2_lap >= max_laps:
            return 20.0  # Penalize invalid strategies with worst position

        # Construct the strategy dictionary
        strategy = {
            1: start_tyre,
            int(pit1_lap): pit1_tyre,
            int(pit2_lap): pit2_tyre
        }

        # Run the simulation with the current strategy
        try:
            drivers_df = race_sim(precomputed_data, given_driver=driver_number, simulated_strategy=strategy)
            driver_row = drivers_df[drivers_df['driver_number'] == driver_number].iloc[0]
            position = driver_row['position']
            return position
        except Exception as e:
            print(f"Error during simulation: {e}")
            return 20.0  # Return worst position in case of errors

    # Perform Bayesian optimization
    result = gp_minimize(
        objective,
        space,
        n_calls=n_calls,
        random_state=42,
        verbose=True
    )

    # Extract best parameters and construct the optimal strategy
    best_params = result.x
    best_strategy = {
        1: best_params[0],
        int(best_params[1]): best_params[2],
        int(best_params[3]): best_params[4]
    }

    return best_strategy, result.fun

# Example usage (assuming precomputed_data is available):
best_strategy, best_position = optimize_strategy(race_data, n_calls=100)
print("Best Strategy:", best_strategy)
print("Best Position:", best_position)

Iteration No: 1 started. Evaluating function at random point.


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


KeyboardInterrupt: 

In [None]:
from bayes_opt import BayesianOptimization
import numpy as np

def bayesian_strategy_optimization(race_data, given_driver, initial_strategy, max_iterations=50):
    """
    Optimize the pit strategy for a given driver using Bayesian Optimization.
    
    Args:
        race_data (dict): Precomputed race data.
        given_driver (int): The driver number to optimize the strategy for.
        initial_strategy (dict): Initial pit strategy (e.g., {1: 1, 15: 1, 35: 3}).
                                 Key is the lap number, value is the tyre type.
        max_iterations (int): Maximum number of iterations for optimization.
        
    Returns:
        tuple: Best strategy (dict) and best finishing position (int).
    """
    # Extract precomputed data
    max_laps = race_data["max_laps"]
    num_pits = len(initial_strategy) - 1  # Exclude the starting tyre
    
    # Define the objective function for Bayesian Optimization
    def objective_function(**kwargs):
        # Parse the arguments into a strategy
        pit_laps = sorted([int(kwargs[f"pit_lap_{i}"]) for i in range(num_pits)])
        tyres = [int(kwargs[f"tyre_{i}"]) for i in range(num_pits)]
        
        # Ensure pit laps are unique and within valid range
        pit_laps = sorted(set([max(1, min(max_laps, lap)) for lap in pit_laps]))
        
        # Ensure tyre choices are valid (1=Hard, 2=Medium, 3=Soft)
        tyres = [max(1, min(3, tyre)) for tyre in tyres]
        
        # Construct the strategy dictionary
        strategy = {1: int(kwargs["starting_tyre"])}  # Starting tyre
        for lap, tyre in zip(pit_laps, tyres):
            strategy[lap] = tyre
        
        # Evaluate the strategy using the race simulation
        sim_df = race_sim(race_data, given_driver=given_driver, simulated_strategy=strategy)
        
        # Get the final position of the given driver
        final_position = sim_df[sim_df["driver_number"] == given_driver]["position"].iloc[-1]
        
        # Minimize finishing position (lower is better)
        return -final_position  # Negative because BayesianOptimization maximizes by default
    
    # Set up the parameter bounds for Bayesian Optimization
    pbounds = {
        "starting_tyre": (1, 3),  # Starting tyre (1=Hard, 2=Medium, 3=Soft)
    }
    for i in range(num_pits):
        pbounds[f"pit_lap_{i}"] = (1, max_laps)  # Pit laps must be within the race
    for i in range(num_pits):
        pbounds[f"tyre_{i}"] = (1, 3)  # Tyre choices must be 1, 2, or 3
    
    # Initialize the Bayesian Optimizer
    optimizer = BayesianOptimization(
        f=objective_function,
        pbounds=pbounds,
        verbose=2,
        random_state=42
    )
    
    # Perform the optimization
    optimizer.maximize(init_points=5, n_iter=max_iterations)
    
    # Extract the best strategy from the optimizer
    best_params = optimizer.max["params"]
    best_starting_tyre = int(best_params["starting_tyre"])
    best_pit_laps = sorted(set([int(best_params[f"pit_lap_{i}"]) for i in range(num_pits)]))
    best_tyres = [int(best_params[f"tyre_{i}"]) for i in range(num_pits)]
    
    # Construct the best strategy dictionary
    best_strategy = {1: best_starting_tyre}
    for lap, tyre in zip(best_pit_laps, best_tyres):
        best_strategy[lap] = tyre
    
    # Evaluate the best strategy to get the finishing position
    sim_df = race_sim(race_data, given_driver=given_driver, simulated_strategy=best_strategy)
    best_position = sim_df[sim_df["driver_number"] == given_driver]["position"].iloc[-1]
    
    print(f"Best Strategy: {best_strategy}")
    print(f"Best Finishing Position: {best_position}")
    
    return best_strategy, best_position

# Example Usage
initial_strategy = {1: 1, 15: 1, 35: 3}  # Initial pit strategy for driver 16
best_strategy, best_position = bayesian_strategy_optimization(
    race_data=race_data,
    given_driver=14,
    initial_strategy=initial_strategy,
    max_iterations=50
)
print(f"Best Strategy: {best_strategy}")
print(f"Best Finishing Position: {best_position}")

|   iter    |  target   | pit_lap_0 | pit_lap_1 | starti... |  tyre_0   |  tyre_1   |
-------------------------------------------------------------------------------------


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m1        [39m | [39m-3.0     [39m | [39m21.97    [39m | [39m54.24    [39m | [39m2.464    [39m | [39m2.197    [39m | [39m1.312    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m2        [39m | [39m-3.0     [39m | [39m9.736    [39m | [39m4.253    [39m | [39m2.732    [39m | [39m2.202    [39m | [39m2.416    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m3        [39m | [39m-3.0     [39m | [39m2.153    [39m | [39m55.31    [39m | [39m2.665    [39m | [39m1.425    [39m | [39m1.364    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m4        [39m | [39m-3.0     [39m | [39m11.27    [39m | [39m18.04    [39m | [39m2.05     [39m | [39m1.864    [39m | [39m1.582    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m5        [39m | [39m-3.0     [39m | [39m35.26    [39m | [39m8.812    [39m | [39m1.584    [39m | [39m1.733    [39m | [39m1.912    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m6        [39m | [39m-3.0     [39m | [39m56.73    [39m | [39m56.53    [39m | [39m1.374    [39m | [39m1.426    [39m | [39m1.984    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m7        [39m | [39m-3.0     [39m | [39m56.68    [39m | [39m1.244    [39m | [39m1.898    [39m | [39m1.325    [39m | [39m2.362    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m8        [39m | [39m-3.0     [39m | [39m56.51    [39m | [39m56.78    [39m | [39m1.709    [39m | [39m2.697    [39m | [39m1.521    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m9        [39m | [39m-3.0     [39m | [39m1.017    [39m | [39m2.157    [39m | [39m2.478    [39m | [39m1.118    [39m | [39m2.499    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m10       [39m | [39m-3.0     [39m | [39m56.54    [39m | [39m1.498    [39m | [39m1.874    [39m | [39m2.287    [39m | [39m1.975    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m11       [39m | [39m-3.0     [39m | [39m1.022    [39m | [39m56.99    [39m | [39m1.448    [39m | [39m1.142    [39m | [39m2.475    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m12       [39m | [39m-3.0     [39m | [39m56.67    [39m | [39m56.98    [39m | [39m1.882    [39m | [39m2.748    [39m | [39m1.963    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m13       [39m | [39m-3.0     [39m | [39m1.111    [39m | [39m1.273    [39m | [39m1.048    [39m | [39m2.346    [39m | [39m1.998    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m14       [39m | [39m-3.0     [39m | [39m56.76    [39m | [39m1.652    [39m | [39m1.519    [39m | [39m1.543    [39m | [39m1.378    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m15       [39m | [39m-3.0     [39m | [39m1.034    [39m | [39m56.67    [39m | [39m2.026    [39m | [39m1.239    [39m | [39m1.713    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m16       [39m | [39m-3.0     [39m | [39m56.66    [39m | [39m56.95    [39m | [39m2.544    [39m | [39m2.694    [39m | [39m1.094    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m17       [39m | [39m-3.0     [39m | [39m1.128    [39m | [39m1.155    [39m | [39m1.412    [39m | [39m2.711    [39m | [39m2.517    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m18       [39m | [39m-3.0     [39m | [39m56.22    [39m | [39m1.54     [39m | [39m1.038    [39m | [39m2.62     [39m | [39m2.669    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m19       [39m | [39m-3.0     [39m | [39m1.455    [39m | [39m56.85    [39m | [39m1.438    [39m | [39m2.144    [39m | [39m1.923    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m20       [39m | [39m-3.0     [39m | [39m56.72    [39m | [39m56.66    [39m | [39m2.399    [39m | [39m2.22     [39m | [39m1.032    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m21       [39m | [39m-3.0     [39m | [39m1.006    [39m | [39m1.921    [39m | [39m1.877    [39m | [39m1.777    [39m | [39m1.957    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m22       [39m | [39m-3.0     [39m | [39m56.55    [39m | [39m1.316    [39m | [39m1.792    [39m | [39m1.519    [39m | [39m2.021    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m23       [39m | [39m-3.0     [39m | [39m1.167    [39m | [39m56.31    [39m | [39m2.386    [39m | [39m1.726    [39m | [39m2.836    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m24       [39m | [39m-3.0     [39m | [39m56.8     [39m | [39m56.66    [39m | [39m1.295    [39m | [39m1.345    [39m | [39m1.836    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m25       [39m | [39m-3.0     [39m | [39m56.7     [39m | [39m1.224    [39m | [39m1.115    [39m | [39m1.916    [39m | [39m2.454    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m26       [39m | [39m-3.0     [39m | [39m1.009    [39m | [39m1.706    [39m | [39m1.654    [39m | [39m1.259    [39m | [39m1.585    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m27       [39m | [39m-3.0     [39m | [39m1.222    [39m | [39m56.9     [39m | [39m1.101    [39m | [39m2.329    [39m | [39m2.672    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m28       [39m | [39m-3.0     [39m | [39m56.6     [39m | [39m56.99    [39m | [39m2.694    [39m | [39m2.552    [39m | [39m1.936    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m29       [39m | [39m-3.0     [39m | [39m56.88    [39m | [39m1.293    [39m | [39m1.923    [39m | [39m1.648    [39m | [39m2.891    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m30       [39m | [39m-3.0     [39m | [39m1.148    [39m | [39m1.832    [39m | [39m1.48     [39m | [39m1.911    [39m | [39m1.628    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m31       [39m | [39m-3.0     [39m | [39m1.446    [39m | [39m56.73    [39m | [39m2.566    [39m | [39m1.199    [39m | [39m2.993    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m32       [39m | [39m-3.0     [39m | [39m56.96    [39m | [39m56.93    [39m | [39m1.644    [39m | [39m2.763    [39m | [39m1.736    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m33       [39m | [39m-3.0     [39m | [39m1.123    [39m | [39m1.327    [39m | [39m2.57     [39m | [39m2.724    [39m | [39m2.863    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m34       [39m | [39m-3.0     [39m | [39m56.89    [39m | [39m1.172    [39m | [39m2.816    [39m | [39m2.732    [39m | [39m1.972    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m35       [39m | [39m-3.0     [39m | [39m1.009    [39m | [39m56.05    [39m | [39m2.402    [39m | [39m1.366    [39m | [39m2.372    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m36       [39m | [39m-3.0     [39m | [39m56.83    [39m | [39m56.53    [39m | [39m1.526    [39m | [39m1.777    [39m | [39m1.662    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m37       [39m | [39m-3.0     [39m | [39m1.201    [39m | [39m56.92    [39m | [39m1.056    [39m | [39m2.714    [39m | [39m1.172    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m38       [39m | [39m-3.0     [39m | [39m56.9     [39m | [39m1.06     [39m | [39m2.856    [39m | [39m1.449    [39m | [39m2.835    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m39       [39m | [39m-3.0     [39m | [39m2.123    [39m | [39m1.251    [39m | [39m1.527    [39m | [39m1.464    [39m | [39m2.895    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m40       [39m | [39m-3.0     [39m | [39m56.6     [39m | [39m56.74    [39m | [39m2.855    [39m | [39m1.4      [39m | [39m2.96     [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m41       [39m | [39m-3.0     [39m | [39m1.06     [39m | [39m1.451    [39m | [39m2.138    [39m | [39m1.672    [39m | [39m1.337    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m42       [39m | [39m-3.0     [39m | [39m56.86    [39m | [39m1.367    [39m | [39m1.221    [39m | [39m2.751    [39m | [39m2.691    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m43       [39m | [39m-3.0     [39m | [39m1.047    [39m | [39m56.76    [39m | [39m2.083    [39m | [39m2.542    [39m | [39m2.126    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m44       [39m | [39m-3.0     [39m | [39m56.77    [39m | [39m56.96    [39m | [39m2.992    [39m | [39m2.557    [39m | [39m2.34     [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m45       [39m | [39m-3.0     [39m | [39m56.82    [39m | [39m1.221    [39m | [39m2.792    [39m | [39m1.155    [39m | [39m1.416    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m46       [39m | [39m-3.0     [39m | [39m1.07     [39m | [39m1.889    [39m | [39m2.63     [39m | [39m2.92     [39m | [39m2.8      [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m47       [39m | [39m-3.0     [39m | [39m1.181    [39m | [39m56.27    [39m | [39m1.033    [39m | [39m1.154    [39m | [39m2.482    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m48       [39m | [39m-3.0     [39m | [39m1.289    [39m | [39m1.002    [39m | [39m2.192    [39m | [39m2.902    [39m | [39m2.827    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m49       [39m | [39m-3.0     [39m | [39m56.25    [39m | [39m56.79    [39m | [39m1.901    [39m | [39m1.034    [39m | [39m2.19     [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m50       [39m | [39m-3.0     [39m | [39m56.89    [39m | [39m1.063    [39m | [39m1.435    [39m | [39m1.376    [39m | [39m2.712    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m51       [39m | [39m-3.0     [39m | [39m1.029    [39m | [39m56.82    [39m | [39m2.656    [39m | [39m2.581    [39m | [39m1.956    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m52       [39m | [39m-3.0     [39m | [39m56.98    [39m | [39m56.67    [39m | [39m2.266    [39m | [39m2.283    [39m | [39m2.899    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m53       [39m | [39m-3.0     [39m | [39m1.118    [39m | [39m1.199    [39m | [39m1.566    [39m | [39m2.982    [39m | [39m2.703    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m54       [39m | [39m-3.0     [39m | [39m56.44    [39m | [39m1.741    [39m | [39m2.381    [39m | [39m2.333    [39m | [39m2.95     [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


| [39m55       [39m | [39m-3.0     [39m | [39m1.025    [39m | [39m56.35    [39m | [39m2.444    [39m | [39m1.518    [39m | [39m1.281    [39m |


  drivers_df.at[index, "sector_time"] = sector_time
  drivers_df.at[index, "gap"] = gap


Best Strategy: {1: 2, 21: 2, 54: 1}
Best Finishing Position: 3
Best Strategy: {1: 2, 21: 2, 54: 1}
Best Finishing Position: 3
