In [1]:
from sqlalchemy import create_engine, func
from sqlalchemy.orm import sessionmaker
import sys
import os
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../")))
from DB.models import init_db, Circuit, Season, RacingWeekend, Driver, Session, SessionResult, Lap, Team, DriverTeamSession, TeamCircuitStats, PitStop
from utils import setup_race_data

import numpy as np
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from itertools import product

# Initialize database connection
global db_session
engine, db_session = init_db()

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option('display.max_colwidth', None) 


## 1) Query race into df and pre process

In [2]:
def get_race_df(year, circuit):
	race_session = (db_session.query(Session)
				.join(RacingWeekend, Session.weekend_id == RacingWeekend.racing_weekend_id)
				.join(Circuit, RacingWeekend.circuit_id == Circuit.circuit_id)
				.filter(
					RacingWeekend.year == year,
					Circuit.circuit_name == circuit,
					Session.session_type == "Race"
				)
				.first())
	
	laps = race_session.laps

	session_results = (
		db_session.query(SessionResult.position, Driver.driver_num)
		.join(Session, Session.session_id == SessionResult.session_id)
		.join(Driver, Driver.driver_id == SessionResult.driver_id)
		.filter(SessionResult.session_id == race_session.session_id)
		.all()
	)

	# Convert session results to a dictionary
	starting_positions = {driver_num: position for position, driver_num in session_results}

	# Convert to DataFrame
	laps_data = []
	for lap in laps:
		# Add a row for Sector 1
		laps_data.append({
			"lap_num": lap.lap_num,
			"sector": 1,
			"stint_num": lap.stint_num,
			"stint_lap": lap.stint_lap,
			"position": lap.position,
			"driver_name": lap.driver.driver_name,
			"driver_number": lap.driver.driver_num,
			"sector_time": lap.s1_time,
			"tyre_type": lap.tyre_type,
			"tyre_laps": lap.tyre_laps,
			"pit": lap.pit,
			"track_status": lap.track_status
		})

		# Add a row for Sector 2
		laps_data.append({
			"lap_num": lap.lap_num,
			"sector": 2,
			"stint_num": lap.stint_num,
			"stint_lap": lap.stint_lap,
			"position": lap.position,
			"driver_name": lap.driver.driver_name,
			"driver_number": lap.driver.driver_num,
			"sector_time": lap.s2_time,
			"tyre_type": lap.tyre_type,
			"tyre_laps": lap.tyre_laps,
			"pit": lap.pit,
			"track_status": lap.track_status
		})

		# Add a row for Sector 3
		laps_data.append({
			"lap_num": lap.lap_num,
			"sector": 3,
			"stint_num": lap.stint_num,
			"stint_lap": lap.stint_lap,
			"position": lap.position,
			"driver_name": lap.driver.driver_name,
			"driver_number": lap.driver.driver_num,
			"sector_time": lap.s3_time,
			"tyre_type": lap.tyre_type,
			"tyre_laps": lap.tyre_laps,
			"pit": lap.pit,
			"track_status": lap.track_status
		})

	# Create a DataFrame from the list of dictionaries
	df = pd.DataFrame(laps_data)
	
	df["starting_position"] = None  # Initialize column with None
	for driver_num, grid_pos in starting_positions.items():
		# Find the first occurrence of the driver
		first_row_index = df[df["driver_number"] == driver_num].index[0]
		df.at[first_row_index, "starting_position"] = grid_pos

	df = df.sort_values(["lap_num", "sector", "position"]).reset_index(drop=True)

	return df


def add_race_data(df):
	# Calculate cumulative race time for each driver
	df["cumulative_time"] = df.groupby("driver_name")["sector_time"].cumsum()
 
	# Calculate rolling pace (average lap time over the last 5 laps)
	df["pace"] = (
		df.groupby(["driver_name", "sector"])["sector_time"]
		.rolling(window=5, min_periods=1)
		.mean()
		.reset_index(level=[0, 1], drop=True)
	)

	# Get car ahead"s cumulative time (car immediately ahead in position for each lap)
	df["front_cumulative_time"] = df.groupby(["lap_num", "sector"])["cumulative_time"].shift(1)
	# This gap is calculated only for drivers who are not in the lead position (position > 1)
	df["gap"] = df["cumulative_time"] - df["front_cumulative_time"]
	df["gap"] = df["gap"].fillna(0)  # Leader has no car ahead, so gap is 0

	# Calculate tyre difference (compared to car immediately ahead in THIS Sector)
	df["front_tyre"] = df.groupby(["lap_num", "sector"])["tyre_type"].shift(1)
	df["tyre_diff"] = df["front_tyre"] - df["tyre_type"]
	df["tyre_diff"] = df["tyre_diff"].fillna(0)  # Leader has no car ahead

	# Calculate tyre age difference (compared to car immediately ahead in THIS Sector)
	df["front_laps"] = df.groupby(["lap_num", "sector"])["stint_lap"].shift(1)
	df["stint_laps_diff"] = df["front_laps"] - df["stint_lap"]
	df["stint_laps_diff"] = df["stint_laps_diff"].fillna(0)  # Leader has no car ahead

	# Calculate DRS availability (within 1s of car ahead IN THIS Sector)
	df["drs_available"] = (
		(df["gap"] <= 1) &
		(df["position"] > 1) &
		(df["lap_num"] > 1)
	)

	# Create target variable for overtaking model (done where its 1 if the driver got overtaken (improved accuracy))
	df["next_position"] = df.groupby("driver_name")["position"].shift(1) 
	df["overtaken"] = ((df["next_position"] < df["position"]) | 
					  (df["next_position"].isna()))
	

	# Cleanup and final sorting
	df = df.drop(columns=["front_cumulative_time", "front_tyre", "next_position"])
	# df = df.sort_values(["lap_num", "sector", "position"]).reset_index(drop=True)

	try:
		new_order = [
			"lap_num", "sector", "stint_num", "stint_lap", "position", "driver_name",
			"driver_number", "sector_time", "gap", "cumulative_time", "tyre_type", "tyre_laps", 
			"pit", "drs_available", "overtaken", "tyre_diff", "front_laps", "stint_laps_diff", "track_status", "pace", "starting_position"
		]

		df = df[new_order]
	except:
		print("dont care")
	return df


df = get_race_df(2023, "Sakhir")
df = add_race_data(df)

# df[df["sector_time"].isna()]
# df[df["driver_name"]=="Logan Sargeant"]
df.head(5)

Unnamed: 0,lap_num,sector,stint_num,stint_lap,position,driver_name,driver_number,sector_time,gap,cumulative_time,tyre_type,tyre_laps,pit,drs_available,overtaken,tyre_diff,front_laps,stint_laps_diff,track_status,pace,starting_position
0,1,1,1,1,1,Max Verstappen,33,,0.0,,1,4,False,False,True,0.0,,0.0,12,,1
1,1,1,1,1,2,Charles Leclerc,16,,0.0,,1,1,False,False,True,0.0,1.0,0.0,12,,19
2,1,1,1,1,3,Sergio Perez,11,,0.0,,1,4,False,False,True,0.0,1.0,0.0,12,,2
3,1,1,1,1,4,Carlos Sainz,55,,0.0,,1,4,False,False,True,0.0,1.0,0.0,12,,4
4,1,1,1,1,5,Lewis Hamilton,44,,0.0,,1,4,False,False,True,0.0,1.0,0.0,12,,5


### Now create overtaking model

In [3]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.utils.class_weight import compute_sample_weight

# Define features and target
features = [
	"gap",
	"pace",
	"tyre_diff",
	"stint_laps_diff",
	"drs_available",
	"cumulative_time",
	"sector_time",
	"pit"
]

X = df[features]
y = df["overtaken"]

from imblearn.over_sampling import SMOTE



# Train the model on the resampled data

# Handle missing values
X = X.fillna(X.mean())

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)


# Base model
gbc = GradientBoostingClassifier(
	n_estimators=200,
	learning_rate=0.05,
	max_depth=3,
	
	subsample=0.8,
	random_state=42
)



# # Calibrate for better probabilities
model = CalibratedClassifierCV(gbc, method="sigmoid", cv=3)

# # Train on all data
model.fit(X_resampled, y_resampled)

# from lightgbm import LGBMClassifier

# lgbm = LGBMClassifier(
# 	n_estimators=100,
# 	learning_rate=0.1,
# 	max_depth=3,
# 	subsample=0.8,
# 	random_state=42
# )
# model = CalibratedClassifierCV(lgbm, method="isotonic", cv=3)
# model.fit(X, y, sample_weight=sample_weights)


feature_means = X.mean()

def predict_overtake(new_race_df):
	# Select the same features used during training
	X_new = new_race_df[features]
	
	# Handle missing values (if any)
	X_new = X_new.fillna(feature_means)
	
	# Make predictions using the trained model
	new_race_df["predicted_overtake"] = model.predict(X_new)
	
	return new_race_df

In [4]:
from sklearn.metrics import accuracy_score, classification_report

# Load the 2023 race data


# Predict overtakes using the rule-based function
new_race_df = predict_overtake(df)

# Calculate accuracy
actual_overtakes = new_race_df["overtaken"]
predicted_overtakes = new_race_df["predicted_overtake"]
accuracy = accuracy_score(actual_overtakes, predicted_overtakes)
print(f"Accuracy: {accuracy:.3f}")

# Generate classification report
print("\nClassification Report:")
print(classification_report(
	actual_overtakes,
	predicted_overtakes,
	target_names=["No Overtake", "Overtaken"]
))

Accuracy: 0.997

Classification Report:
              precision    recall  f1-score   support

 No Overtake       1.00      1.00      1.00      3068
   Overtaken       0.96      0.93      0.94        97

    accuracy                           1.00      3165
   macro avg       0.98      0.96      0.97      3165
weighted avg       1.00      1.00      1.00      3165



Accuracy: 0.997

Classification Report:
			  precision    recall  f1-score   support

 No Overtake       1.00      1.00      1.00      3068
	Overtaken       0.96      0.93      0.94        97

	accuracy                           1.00      3165
	macro avg       0.98      0.96      0.97      3165
weighted avg       1.00      1.00      1.00      3165

In [5]:
race_data = setup_race_data(df)

race_data["base_sector_times"]

{np.int64(33): {np.int64(1): {1: np.float64(30.22119298245614),
   2: np.float64(30.516543859649122),
   3: np.float64(30.451333333333334)},
  np.int64(2): {1: np.float64(41.314),
   2: np.float64(41.66635087719298),
   3: np.float64(41.3860350877193)},
  np.int64(3): {1: np.float64(22.741999999999997),
   2: np.float64(23.330350877192984),
   3: np.float64(23.105035087719298)}},
 np.int64(16): {np.int64(1): {1: np.float64(30.176596491228068),
   2: np.float64(30.516543859649122),
   3: np.float64(30.048473684210528)},
  np.int64(2): {1: np.float64(41.449),
   2: np.float64(41.66635087719298),
   3: np.float64(41.827666666666666)},
  np.int64(3): {1: np.float64(23.022),
   2: np.float64(23.330350877192984),
   3: np.float64(23.111877192982455)}},
 np.int64(11): {np.int64(1): {1: np.float64(30.208192982456143),
   2: np.float64(30.516543859649122),
   3: np.float64(30.4610350877193)},
  np.int64(2): {1: np.float64(41.58685964912281),
   2: np.float64(41.66635087719298),
   3: np.float64

In [9]:
import pandas as pd

def race_sim(precomputed_data, given_driver=None, simulated_strategy=None):
	"""
	simulated_strategy
	"""
	# Extract precomputed data
	driver_tyre_coefficients = precomputed_data["driver_tyre_coefficients"]
	driver_strategies = precomputed_data["driver_strategies"]
	max_laps = precomputed_data["max_laps"]
	drivers = precomputed_data["drivers"]
	driver_names = precomputed_data["driver_names"]
	initial_positions = precomputed_data["initial_positions"]
	base_sector_times = precomputed_data["base_sector_times"]
	fuel_corrections = precomputed_data["fuel_corrections"]

	drivers_data = []

	if given_driver and simulated_strategy:
		driver_strategies[given_driver] = simulated_strategy

	for driver in drivers:
		drivers_data.append({
			"driver_number": driver,
			"driver_name": driver_names[driver],
			"pit_schedule": driver_strategies[driver],
			"tyre_type": driver_strategies[driver][1],
			"lap_num": 1,
			"sector": 0,
			"sector_time": 0.0,
			"stint_lap": 1,
			"cumulative_time": 0.0,
			"gap": 0.0,
			"pit": False,
			"position": initial_positions[driver],
			"base_sector_times": base_sector_times[driver],
			"pace": 0,  # Initialize pace as 0
			"tyre_diff": 0,  # Initialize tyre difference as 0
			"stint_laps_diff": 0,  # Initialize stint laps difference as 0
			"drs_available": False,  # Initialize DRS availability as False
		})
		
	drivers_df = pd.DataFrame(drivers_data)

	simulated_data = []
	for lap in range(2, max_laps + 1):

		drivers_df["lap_num"] += 1
		drivers_df["stint_lap"] += 1

		for sector in range(1, 4):
			drivers_df["sector"] = sector
			for index, row in drivers_df.iterrows():

				# Handle pit stops at the start of a lap (sector 1)
				if sector == 1 and lap in row["pit_schedule"]:
					drivers_df.at[index, "pit"] = True  # Mark pit stop
					drivers_df.at[index, "cumulative_time"] += 20  # Add pit stop penalty
					drivers_df.at[index, "stint_lap"] = 1  # Reset stint lap
					drivers_df.at[index, "tyre_type"] = row["pit_schedule"][lap]  # Change tyre

				else:
					drivers_df.at[index, "pit"] = False

				# Calculate tyre degradation coefficients
				a, b, c = driver_tyre_coefficients[row["driver_number"]][sector][row["tyre_type"]]
				sector_time = (
					row["base_sector_times"][sector][row["tyre_type"]]  # Base sector time for specific driver
					+ (a * drivers_df.at[index, "stint_lap"]**2 + b * drivers_df.at[index, "stint_lap"] + c)  # Tyre degradation
					+ fuel_corrections[lap]  # Fuel effect
				)

				# Update sector time and cumulative time
				drivers_df.at[index, "sector_time"] = sector_time
				drivers_df.at[index, "cumulative_time"] += sector_time

				# drivers_df = drivers_df.sort_values(by="cumulative_time", ascending=True)

				# Reassign positions based on the sorted order
				# drivers_df["position"] = range(1, len(drivers_df) + 1)

				ahead_pos = row["position"] - 1

				if ahead_pos > 0:
					ahead_time = drivers_df.loc[drivers_df["position"] == ahead_pos, "cumulative_time"].values[0]

					gap = row["cumulative_time"] - ahead_time
					
					# if gap < 0:
					# 	print(gap)
					drivers_df.at[index, "gap"] = gap

					
				else:
					gap = 0
					drivers_df.at[index, "gap"] = 0

				# calc overtakes
				if gap < 0:
					ahead_index = drivers_df[drivers_df["position"] == ahead_pos].index[0]

					if drivers_df.at[ahead_index, "cumulative_time"] > drivers_df.at[index, "cumulative_time"]:
						drivers_df.at[index, "cumulative_time"] = drivers_df.at[ahead_index, "cumulative_time"] + 1

				elif gap < 0.8 and ahead_pos > 0:
					# if ahead_pos > 0:
					ahead_row = drivers_df.loc[drivers_df["position"] == ahead_pos].iloc[0]
						
					df["pace"] = (
						df.groupby(["driver_name", "sector"])["sector_time"]
						.rolling(window=5, min_periods=1)
						.mean()
						.reset_index(level=[0, 1], drop=True)
					)
					
					# Tyre difference
					drivers_df.at[index, "tyre_diff"] = ahead_row["tyre_type"] - row["tyre_type"]

					# Stint laps difference
					drivers_df.at[index, "stint_laps_diff"] = ahead_row["stint_lap"] - row["stint_lap"]

					# DRS availability
					drivers_df.at[index, "drs_available"] = gap <= 1.0

					# else:
					# 	drivers_df.at[index, "pace"] = (
					# 		drivers_df.loc[
					# 			(drivers_df["driver_number"] == row["driver_number"]) &
					# 			(drivers_df["lap_num"] >= lap - 5),
					# 			"sector_time"
					# 		].mean()
					# 	)

					# 	drivers_df.at[index, "tyre_diff"] = 0

					# 	# Stint laps difference
					# 	drivers_df.at[index, "stint_laps_diff"] = 0

					# 	# DRS availability
					# 	drivers_df.at[index, "drs_available"] = 0

					
					drivers_df = predict_overtake(drivers_df)

					ahead_index = drivers_df[drivers_df["position"] == ahead_pos].index[0]
					# Check if an overtake is predicted
					if drivers_df.at[index, "predicted_overtake"]:
						# Swap positions between the current driver and the driver ahead
						current_position = drivers_df.at[index, "position"]

						# Swap positions
						drivers_df.at[index, "position"], drivers_df.at[ahead_index, "position"] = (
							drivers_df.at[ahead_index, "position"],
							drivers_df.at[index, "position"],
						)

						drivers_df.at[index, "cumulative_time"], drivers_df.at[ahead_index, "cumulative_time"] = (
							drivers_df.at[ahead_index, "cumulative_time"],
							drivers_df.at[index, "cumulative_time"] + 0.5,
						)



					else:
						# if the driver behind is faster overall, then keep them behind
						if drivers_df.at[ahead_index, "cumulative_time"] > drivers_df.at[index, "cumulative_time"]:
							drivers_df.at[ahead_index, "cumulative_time"], drivers_df.at[index, "cumulative_time"] = (
								drivers_df.at[index, "cumulative_time"],
								drivers_df.at[ahead_index, "cumulative_time"],
							)

	drivers_df.drop(columns=["base_sector_times"], inplace=True)

	return drivers_df

# print(race_data)
sim_df = race_sim(race_data)
sim_df

drivers_df = sim_df.sort_values(by="position", ascending=True)


# Reset the index (optional, for cleaner output)
drivers_df = drivers_df.reset_index(drop=True)
drivers_df

Unnamed: 0,driver_number,driver_name,pit_schedule,tyre_type,lap_num,sector,sector_time,stint_lap,cumulative_time,gap,pit,position,pace,tyre_diff,stint_laps_diff,drs_available,predicted_overtake
0,33,Max Verstappen,"{15: 1, 37: 3, 1: 1}",3,57,3,23.902567,21,5680.861228,0.0,False,1,0,0,0,False,False
1,11,Sergio Perez,"{18: 1, 35: 3, 1: 1}",3,57,3,23.705806,23,5706.355612,1.788578,False,2,0,0,-2,True,False
2,14,Fernando Alonso,"{15: 3, 35: 3, 1: 1}",3,57,3,23.745855,23,5707.395661,-22.705806,False,3,0,0,0,False,False
3,55,Carlos Sainz,"{14: 3, 32: 3, 1: 1}",3,57,3,24.26158,26,5728.630627,20.719241,False,4,0,0,-3,True,False
4,44,Lewis Hamilton,"{13: 3, 31: 3, 1: 1}",3,57,3,23.947662,27,5732.09495,-20.483339,False,5,0,0,0,False,False
5,18,Lance Stroll,"{16: 3, 31: 3, 1: 1}",3,57,3,24.075991,27,5732.686788,-23.484154,False,6,0,0,3,True,False
6,63,George Russell,"{14: 3, 32: 3, 1: 1}",3,57,3,23.953245,26,5727.778246,-4.785796,False,7,0,0,1,True,False
7,77,Valtteri Bottas,"{12: 3, 30: 3, 1: 1}",3,57,3,24.141116,28,5729.532314,-22.387049,False,8,0,0,0,False,False
8,10,Pierre Gasly,"{10: 3, 26: 3, 41: 1, 1: 1}",1,57,3,23.813948,17,5740.477116,-12.869146,False,9,0,0,14,True,False
9,23,Alexander Albon,"{12: 1, 27: 3, 41: 1, 1: 1}",1,57,3,24.035447,17,5753.697432,12.998817,False,10,0,0,0,True,False


In [None]:
def get_accuracy(year, circuit, sim_df):
	# Fetch the actual race session from the database
	race_session = (
		db_session.query(Session)
		.join(RacingWeekend, Session.weekend_id == RacingWeekend.racing_weekend_id)
		.join(Circuit, RacingWeekend.circuit_id == Circuit.circuit_id)
		.filter(
			RacingWeekend.year == year,
			Circuit.circuit_name == circuit,
			Session.session_type == "Race"
		)
		.first()
	)

	if not race_session:
		raise ValueError(f"No race session found for {year} at {circuit}")

	# Fetch the actual race laps and results
	laps = race_session.laps
	session_results = (
		db_session.query(SessionResult.position, Driver.driver_num)
		.join(Session, Session.session_id == SessionResult.session_id)
		.join(Driver, Driver.driver_id == SessionResult.driver_id)
		.filter(SessionResult.session_id == race_session.session_id)
		.all()
	)

	# Convert session_results to a dictionary for easy lookup
	actual_results = {driver_num: position for position, driver_num in session_results}

	# Extract simulated results from sim_df
	sim_results = sim_df.groupby("driver_number").last()["position"].to_dict()

	# Ensure both results have the same drivers
	common_drivers = set(actual_results.keys()).intersection(sim_results.keys())
	if not common_drivers:
		raise ValueError("No common drivers found between actual and simulated results")

	# Filter results to only include common drivers
	actual_positions = [actual_results[driver] for driver in common_drivers]
	sim_positions = [sim_results[driver] for driver in common_drivers]

	# Calculate accuracy metrics
	position_accuracy = sum(1 for a, s in zip(actual_positions, sim_positions) if a == s) / len(common_drivers)
	top_3_accuracy = sum(1 for a, s in zip(actual_positions, sim_positions) if (a <= 3 and s <= 3)) / 3
	mean_error = sum(abs(a - s) for a, s in zip(actual_positions, sim_positions)) / len(common_drivers)
	total_error = sum(abs(a - s) for a, s in zip(actual_positions, sim_positions))

	# Return accuracy metrics
	return {
		"position_accuracy": position_accuracy,
		"top_3_accuracy": top_3_accuracy,
		"mean_error": mean_error,
		"total_error": total_error,


	}

get_accuracy(2023, "Sakhir", drivers_df)

{'position_accuracy': 1.0,
 'top_3_accuracy': 1.0,
 'mean_error': 0.0,
 'total_error': 0}

In [None]:
from skopt import gp_minimize
from skopt.space import Integer, Categorical

def optimize_strategy_up_to_3_stops(precomputed_data, driver_number=14, n_calls=100):

	max_laps = precomputed_data["max_laps"]

	# Define the search space
	space = [
		Categorical([1, 2, 3], name='start_tyre'),       # Starting tyre (1=Hard, 2=Medium, 3=Soft)
		Integer(2, max_laps - 1, name='pit1_lap'),      # First pit stop lap (optional)
		Categorical([1, 2, 3], name='pit1_tyre'),        # First pit stop tyre (optional)
		Integer(2, max_laps - 1, name='pit2_lap'),      # Second pit stop lap (optional)
		Categorical([1, 2, 3], name='pit2_tyre'),        # Second pit stop tyre (optional)
		Integer(2, max_laps - 1, name='pit3_lap'),      # Third pit stop lap (optional)
		Categorical([1, 2, 3], name='pit3_tyre'),        # Third pit stop tyre (optional)
	]

	def objective(params):
		start_tyre, pit1_lap, pit1_tyre, pit2_lap, pit2_tyre, pit3_lap, pit3_tyre = params

		# Construct the strategy dictionary
		strategy = {1: start_tyre}  # Always include the starting tyre

		# Add pit stops only if their laps are valid and in ascending order
		pit_laps = sorted(set([pit1_lap, pit2_lap, pit3_lap]))
		tyres = [pit1_tyre, pit2_tyre, pit3_tyre]

		# Ensure pit laps are within bounds and in ascending order
		valid_pit_laps = []
		valid_tyres = []
		for lap, tyre in zip(pit_laps, tyres):
			if 2 <= lap < max_laps:  # Only include valid pit laps
				valid_pit_laps.append(lap)
				valid_tyres.append(tyre)

		# Add valid pit stops to the strategy
		for lap, tyre in zip(valid_pit_laps, valid_tyres):
			strategy[lap] = tyre

		# Penalize strategies with fewer than 2 distinct tyre types
		tyre_types_used = set(strategy.values())
		if len(tyre_types_used) < 2:
			return 20.0  # Penalize with worst position

		# Run the simulation
		try:
			drivers_df = race_sim(precomputed_data, given_driver=driver_number, simulated_strategy=strategy)
			final_position = drivers_df[drivers_df['driver_number'] == driver_number]['position'].iloc[-1]
			return final_position  # Minimize finishing position
		except Exception as e:
			print(f"Error during simulation: {e}")
			return 20.0  # Return worst position in case of errors

	# Perform Bayesian optimization
	result = gp_minimize(
		objective,
		space,
		n_calls=n_calls,
		random_state=42,
		verbose=True
	)

	# Extract best parameters and construct the optimal strategy
	best_params = result.x
	best_strategy = {1: best_params[0]}  # Always include the starting tyre

	# Add valid pit stops to the strategy
	pit_laps = sorted(set([best_params[1], best_params[3], best_params[5]]))
	tyres = [best_params[2], best_params[4], best_params[6]]

	for lap, tyre in zip(pit_laps, tyres):
		if 2 <= lap < precomputed_data["max_laps"]:
			best_strategy[lap] = tyre

	best_position = result.fun

	print(f"Best Strategy: {best_strategy}")
	print(f"Best Finishing Position: {best_position}")

	return best_strategy, best_position

# Example usage
# best_strategy, best_position = optimize_strategy_up_to_3_stops(race_data, driver_number=10, n_calls=100)
# print(f"Best Strategy: {best_strategy}")
# print(f"Best Finishing Position: {best_position}")

ModuleNotFoundError: No module named 'skopt'

In [None]:
%pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 KB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting pyaml>=16.9
  Downloading pyaml-25.1.0-py3-none-any.whl (26 kB)
Collecting PyYAML
  Downloading PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (751 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m751.2/751.2 KB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: PyYAML, pyaml, scikit-optimize
Successfully installed PyYAML-6.0.2 pyaml-25.1.0 scikit-optimize-0.10.2
Note: you may need to restart the kernel to use updated packages.


In [None]:
from bayes_opt import BayesianOptimization
import numpy as np

def bayesian_strategy_optimization(race_data, given_driver, max_iterations=50):
	"""
	Optimize the pit strategy for a given driver using Bayesian Optimization.
	
	Args:
		race_data (dict): Precomputed race data.
		given_driver (int): The driver number to optimize the strategy for.
		max_iterations (int): Maximum number of iterations for optimization.
		
	Returns:
		tuple: Best strategy (dict) and best finishing position (int).
	"""
	# Extract precomputed data
	max_laps = race_data["max_laps"]
	initial_strategy = race_data["driver_strategies"][given_driver]  # Use the driver's actual race strategy

	# Define the objective function for Bayesian Optimization
	def objective_function(starting_tyre, pit1_lap, pit1_tyre, pit2_lap, pit2_tyre, pit3_lap, pit3_tyre):
		# Ensure pit laps are unique and within valid range
		pit_laps = sorted(set([int(pit1_lap), int(pit2_lap), int(pit3_lap)]))
		tyres = [int(pit1_tyre), int(pit2_tyre), int(pit3_tyre)]
		
		# Filter out invalid pit laps (e.g., out of bounds or overlapping)
		valid_pit_laps = []
		valid_tyres = []
		for lap, tyre in zip(pit_laps, tyres):
			if 2 <= lap < max_laps:  # Only include valid pit laps
				valid_pit_laps.append(lap)
				valid_tyres.append(tyre)

		# Construct the strategy dictionary
		strategy = {1: int(starting_tyre)}  # Starting tyre
		for lap, tyre in zip(valid_pit_laps, valid_tyres):
			strategy[lap] = tyre

		# Penalize strategies with fewer than 2 distinct tyre types
		tyre_types_used = set(strategy.values())
		if len(tyre_types_used) < 2:
			return -20.0  # Penalize with worst position (negative because we're maximizing)

		# Evaluate the strategy using the race simulation
		try:
			sim_df = race_sim(race_data, given_driver=given_driver, simulated_strategy=strategy)
			final_position = sim_df[sim_df["driver_number"] == given_driver]["position"].iloc[-1]
			return -final_position  # Negative because BayesianOptimization maximizes by default
		except Exception as e:
			print(f"Error during simulation: {e}")
			return -20.0  # Return worst position in case of errors

	# Set up the parameter bounds for Bayesian Optimization
	pbounds = {
		"starting_tyre": (1, 3),  # Starting tyre (1=Hard, 2=Medium, 3=Soft)
		"pit1_lap": (2, max_laps - 1),  # First pit stop lap (optional)
		"pit1_tyre": (1, 3),  # First pit stop tyre (optional)
		"pit2_lap": (2, max_laps - 1),  # Second pit stop lap (optional)
		"pit2_tyre": (1, 3),  # Second pit stop tyre (optional)
		"pit3_lap": (2, max_laps - 1),  # Third pit stop lap (optional)
		"pit3_tyre": (1, 3),  # Third pit stop tyre (optional)
	}

	# Initialize the Bayesian Optimizer
	optimizer = BayesianOptimization(
		f=objective_function,
		pbounds=pbounds,
		verbose=2,
		random_state=42
	)

	# Perform the optimization
	optimizer.maximize(init_points=5, n_iter=max_iterations)

	# Extract the best strategy from the optimizer
	best_params = optimizer.max["params"]
	best_starting_tyre = int(best_params["starting_tyre"])
	pit_laps = sorted(set([int(best_params["pit1_lap"]), int(best_params["pit2_lap"]), int(best_params["pit3_lap"])]))
	tyres = [int(best_params["pit1_tyre"]), int(best_params["pit2_tyre"]), int(best_params["pit3_tyre"])]

	# Construct the best strategy dictionary
	best_strategy = {1: best_starting_tyre}
	for lap, tyre in zip(pit_laps, tyres):
		if 2 <= lap < max_laps:  # Only include valid pit laps
			best_strategy[lap] = tyre

	# Evaluate the best strategy to get the finishing position
	sim_df = race_sim(race_data, given_driver=given_driver, simulated_strategy=best_strategy)
	best_position = sim_df[sim_df["driver_number"] == given_driver]["position"].iloc[-1]

	print(f"Best Strategy: {best_strategy}")
	print(f"Best Finishing Position: {best_position}")

	return best_strategy, best_position

# Example Usage
best_strategy, best_position = bayesian_strategy_optimization(
	race_data=race_data,
	given_driver=10,
	max_iterations=20
)


|   iter    |  target   | pit1_lap  | pit1_tyre | pit2_lap  | pit2_tyre | pit3_lap  | pit3_tyre | starti... |
-------------------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m-9.0     [39m | [39m22.23    [39m | [39m2.901    [39m | [39m41.53    [39m | [39m2.197    [39m | [39m10.43    [39m | [39m1.312    [39m | [39m1.116    [39m |
| [39m2        [39m | [39m-9.0     [39m | [39m48.77    [39m | [39m2.202    [39m | [39m40.24    [39m | [39m1.041    [39m | [39m54.38    [39m | [39m2.665    [39m | [39m1.425    [39m |
| [39m3        [39m | [39m-9.0     [39m | [39m11.82    [39m | [39m1.367    [39m | [39m18.43    [39m | [39m2.05     [39m | [39m25.33    [39m | [39m1.582    [39m | [39m2.224    [39m |
| [39m4        [39m | [39m-9.0     [39m | [39m9.533    [39m | [39m1.584    [39m | [39m21.78    [39m | [39m1.912    [39m | [39m44.4     [39m | [39m1.399    [39m | [