In [5]:
from sqlalchemy import create_engine, func
from sqlalchemy.orm import sessionmaker
import sys
import os
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../")))
from DB.models import init_db, Circuit, Season, RacingWeekend, Driver, Session, SessionResult, Lap, TyreRaceData, Team, DriverTeamSession, TeamCircuitStats

import numpy as np
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from itertools import product

# Initialize database connection
global db_session
engine, db_session = init_db()


def all_drivers_tyre(year, round):
	# Query to get the last 30 race sessions globally prior to the specified year/round
	global_past_races = (
		db_session.query(Session.session_id)
		.join(RacingWeekend, RacingWeekend.racing_weekend_id == Session.weekend_id)
		.filter(
			Session.session_type == "Race",
			# Filter races strictly before the specified year/round
			(RacingWeekend.year < year) |
			((RacingWeekend.year == year) & (RacingWeekend.round < round))
		)
		.order_by(RacingWeekend.year.desc(), RacingWeekend.round.desc())  # Order by most recent first
		.limit(30)  # Limit to the last 30 races
		.all()
	)

	# Extract session IDs from the query result
	global_session_ids = [race.session_id for race in global_past_races]

	# Query tyre data for ALL drivers across the last 30 races globally
	global_tyre_data = (
		db_session.query(TyreRaceData.tyre_type, TyreRaceData.a, TyreRaceData.b, TyreRaceData.c)
		.filter(TyreRaceData.race_id.in_(global_session_ids))
		.all()
	)

	# Group and calculate global averages
	global_tyre_stats = {}
	for tyre_type, a, b, c in global_tyre_data:
		if tyre_type not in global_tyre_stats:
			global_tyre_stats[tyre_type] = {"a": [], "b": [], "c": []}

		global_tyre_stats[tyre_type]["a"].append(a)
		global_tyre_stats[tyre_type]["b"].append(b)
		global_tyre_stats[tyre_type]["c"].append(c)

	# Calculate the global averages for each tyre type
	global_averaged_tyre_stats = {}
	for tyre_type, stats in global_tyre_stats.items():
		avg_a = sum(stats["a"]) / len(stats["a"]) if stats["a"] else 0
		avg_b = sum(stats["b"]) / len(stats["b"]) if stats["b"] else 0
		avg_c = sum(stats["c"]) / len(stats["c"]) if stats["c"] else 0

		global_averaged_tyre_stats[tyre_type] = {
			"avg_a": avg_a,
			"avg_b": avg_b,
			"avg_c": avg_c,
		}
	
	return global_averaged_tyre_stats

def get_starting_grid(session_id):
	session_results = db_session.query(SessionResult).filter_by(session_id=session_id).all()

	starting_grid = {}
	for pos in session_results:
		starting_grid[pos.position] = pos.driver_id

	return starting_grid

def get_laps(session_id):
	max_lap = db_session.query(func.max(Lap.lap_num)).filter(Lap.session_id == session_id).scalar()

	# If no laps are found, return 0
	return max_lap if max_lap is not None else 0

def get_all_data(year, round):
	# get all drivers that competed
	racing_weekend = db_session.query(RacingWeekend).filter_by(year=year, round=round).first()

	quali_session = db_session.query(Session).filter_by(weekend_id=racing_weekend.racing_weekend_id, session_type="Qualifying").first()
	race_session = db_session.query(Session).filter_by(weekend_id=racing_weekend.racing_weekend_id, session_type="Race").first()

	# find drivers
	drivers = db_session.query(DriverTeamSession).filter_by(session_id=race_session.session_id).all()

	all_driver_tyre_deg = all_drivers_tyre(year, round)
	driver_tyre_deg = {}
	for driver_entry in drivers:
		driver = driver_entry.driver

		# Query to get the last 30 race sessions the driver participated in
		past_races = (
			db_session.query(Session.session_id)
			.join(DriverTeamSession, DriverTeamSession.session_id == Session.session_id)
			.join(RacingWeekend, RacingWeekend.racing_weekend_id == Session.weekend_id)
			.filter(
				DriverTeamSession.driver_id == driver.driver_id,
				Session.session_type == "Race",
				# Filter races strictly before the specified year/round
				(RacingWeekend.year < year) |
				((RacingWeekend.year == year) & (RacingWeekend.round < round))
			)
			.order_by(RacingWeekend.year.desc(), RacingWeekend.round.desc())  # Order by most recent first
			.limit(30)  # Limit to the last 30 races
			.all()
		)

		# for race in past_races:
		# 	print(race.round)
		# print("\n\n\n")

		if len(past_races) < 20:
			driver_tyre_deg[driver.driver_id] = all_driver_tyre_deg
			continue

		session_ids = [race.session_id for race in past_races]

		# Get all tyre data for those sessions and driver
		tyre_data = db_session.query(TyreRaceData.tyre_type, TyreRaceData.a, TyreRaceData.b, TyreRaceData.c)\
			.filter(TyreRaceData.driver_id == driver.driver_id,
					TyreRaceData.race_id.in_(session_ids))\
			.all()
		
		# print(len(tyre_data))
		tyre_stats = {}

		# Loop over the queried tyre_data
		for tyre_type, a, b, c in tyre_data:
			if tyre_type == 4:
				continue
			if tyre_type not in tyre_stats:
				tyre_stats[tyre_type] = {"a": [], "b": [], "c": []}
			
			# Append the values of a, b, and c for this tyre type
			tyre_stats[tyre_type]["a"].append(a)
			tyre_stats[tyre_type]["b"].append(b)
			tyre_stats[tyre_type]["c"].append(c)

		# Calculate the averages for each tyre type
		averaged_tyre_stats = {}
		for tyre_type, stats in tyre_stats.items():
			avg_a = sum(stats["a"]) / len(stats["a"]) if stats["a"] else 0
			avg_b = sum(stats["b"]) / len(stats["b"]) if stats["b"] else 0
			avg_c = sum(stats["c"]) / len(stats["c"]) if stats["c"] else 0
			
			averaged_tyre_stats[tyre_type] = {
				"avg_a": avg_a,
				"avg_b": avg_b,
				"avg_c": avg_c,
			}

		driver_tyre_deg[driver.driver_id] = averaged_tyre_stats

	starting_grid = get_starting_grid(quali_session.session_id)
	
	num_laps = get_laps(race_session.session_id)

	return starting_grid, driver_tyre_deg, num_laps


starting_grid, driver_tyre_deg, num_laps = get_all_data(2024,1)

Exhaustive search

In [6]:
def simulate_pit_stops(driver_tyre_deg, num_laps, pit_time=20):
    optimal_pit_stops = {}
    for driver_id, tyre_data in driver_tyre_deg.items():
        possible_tyres = list(tyre_data.keys())
        if len(possible_tyres) < 2:
            # Not enough tyre types to change; skip or handle accordingly
            optimal_pit_stops[driver_id] = {
                'pit_lap': None,
                'start_tyre': None,
                'new_tyre': None,
                'total_time': float('inf')
            }
            continue
        
        # Precompute cumulative lap times for each tyre
        cumulative = {}
        for tyre in possible_tyres:
            a = tyre_data[tyre]['avg_a']
            b = tyre_data[tyre]['avg_b']
            c = tyre_data[tyre]['avg_c']
            cumulative[tyre] = [0.0]  # index 0 is 0 laps
            total_time = 0.0
            for x in range(1, num_laps + 1):
                lap_time = a * (x ** 2) + b * x + c
                total_time += lap_time
                cumulative[tyre].append(total_time)
        
        # Find the optimal pit strategy
        min_total = float('inf')
        best_pit_lap = None
        best_start_tyre = None
        best_new_tyre = None
        
        for start_tyre in possible_tyres:
            for new_tyre in possible_tyres:
                if start_tyre == new_tyre:
                    continue
                for pit_lap in range(1, num_laps):
                    stint1_length = pit_lap
                    stint2_length = num_laps - pit_lap
                    if stint2_length < 1:
                        continue
                    # Ensure we don't exceed precomputed values
                    if stint1_length > len(cumulative[start_tyre]) - 1:
                        continue
                    if stint2_length > len(cumulative[new_tyre]) - 1:
                        continue
                    total = (cumulative[start_tyre][stint1_length] +
                             cumulative[new_tyre][stint2_length] +
                             pit_time)
                    if total < min_total:
                        min_total = total
                        best_pit_lap = pit_lap
                        best_start_tyre = start_tyre
                        best_new_tyre = new_tyre
        
        optimal_pit_stops[driver_id] = {
            'pit_lap': best_pit_lap,
            'start_tyre': best_start_tyre,
            'new_tyre': best_new_tyre,
            'total_time': min_total
        }
    
    return optimal_pit_stops

# Example usage:
starting_grid, driver_tyre_deg, num_laps = get_all_data(2024, 1)
optimal_pits = simulate_pit_stops(driver_tyre_deg, num_laps)

# Convert the optimal_pits dictionary to a DataFrame
df_optimal_pits = pd.DataFrame.from_dict(
    optimal_pits, 
    orient='index', 
    columns=['pit_lap', 'start_tyre', 'new_tyre', 'total_time']
).reset_index().rename(columns={'index': 'driver_id'})

(df_optimal_pits)

Unnamed: 0,driver_id,pit_lap,start_tyre,new_tyre,total_time
0,25,38,3,2,92.914044
1,1,18,2,3,96.747515
2,32,33,3,2,90.481664
3,4,20,1,3,90.630709
4,14,20,1,3,94.081075
5,3,21,2,3,98.332537
6,5,26,2,3,103.763483
7,8,33,3,2,104.579841
8,35,20,2,3,113.998458
9,17,23,2,3,112.012373


In [7]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
from sqlalchemy import and_

def create_dataset(db_session):
    data = []
    # Query all race sessions from 2019 to 2024
    races = db_session.query(Session).join(RacingWeekend).filter(
        Session.session_type == "Race",
        RacingWeekend.year.between(2020, 2024)
    ).all()

    for race in races:
        year = race.racing_weekend.year
        round_num = race.racing_weekend.round
        circuit_id = race.racing_weekend.circuit.circuit_id
        wet = race.wet
        num_laps = db_session.query(func.max(Lap.lap_num)).filter(Lap.session_id == race.session_id).scalar() or 0

        # Get all drivers in the race
        drivers = db_session.query(DriverTeamSession).filter_by(session_id=race.session_id).all()
        
        for dts in drivers:
            driver_id = dts.driver_id
            team_id = dts.team_id
            
            # Get starting position
            start_pos = db_session.query(SessionResult.position).filter_by(
                session_id=race.session_id,
                driver_id=driver_id
            ).scalar()

            # Get team-circuit stats
            team_circuit_stats = db_session.query(TeamCircuitStats).filter_by(
                circuit_id=circuit_id,
                team_id=team_id
            ).first()
            pit_time = team_circuit_stats.pit_time if team_circuit_stats else 2.5  # Default
            quali_diff = team_circuit_stats.quali_to_race_percent_diff if team_circuit_stats else 0

            # Get historical tyre data (up to before current race)
            tyre_data = db_session.query(TyreRaceData).join(Session).join(RacingWeekend).filter(
                TyreRaceData.driver_id == driver_id,
                Session.session_type == "Race",
                (RacingWeekend.year < year) | 
                ((RacingWeekend.year == year) & (RacingWeekend.round < round_num))
            ).all()

            # Aggregate tyre parameters
            tyre_params = {}
            for td in tyre_data:
                if td.tyre_type not in tyre_params:
                    tyre_params[td.tyre_type] = {'a': [], 'b': [], 'c': []}
                tyre_params[td.tyre_type]['a'].append(td.a)
                tyre_params[td.tyre_type]['b'].append(td.b)
                tyre_params[td.tyre_type]['c'].append(td.c)

            # Calculate average tyre parameters
            avg_tyre_params = {}
            for tyre, params in tyre_params.items():
                avg_a = sum(params['a'])/len(params['a']) if params['a'] else 0
                avg_b = sum(params['b'])/len(params['b']) if params['b'] else 0
                avg_c = sum(params['c'])/len(params['c']) if params['c'] else 0
                avg_tyre_params[tyre] = (avg_a, avg_b, avg_c)

            # Simulate optimal strategy
            optimal_pit_lap = None
            start_tyre = None
            new_tyre = None
            
            if avg_tyre_params and num_laps > 0:
                cumulative = {}
                for tyre, (a, b, c) in avg_tyre_params.items():
                    cumulative[tyre] = [0.0]
                    total = 0.0
                    for x in range(1, num_laps+1):
                        total += a*x**2 + b*x + c
                        cumulative[tyre].append(total)

                min_time = float('inf')
                for t1 in avg_tyre_params:
                    for t2 in avg_tyre_params:
                        if t1 == t2: continue
                        for pit_lap in range(1, num_laps):
                            t = cumulative[t1][pit_lap] + cumulative[t2][num_laps - pit_lap] + pit_time
                            if t < min_time:
                                min_time = t
                                optimal_pit_lap = pit_lap
                                start_tyre = t1
                                new_tyre = t2

            # Get actual pit data
            laps = db_session.query(Lap).filter_by(
                session_id=race.session_id,
                driver_id=driver_id
            ).order_by(Lap.lap_num).all()

            actual_pits = [lap.lap_num for lap in laps if lap.pit]
            actual_pit_lap = actual_pits[0] if actual_pits else None

            # Get actual tyre changes
            stints = {}
            for lap in laps:
                if lap.stint_num not in stints:
                    stints[lap.stint_num] = lap.tyre
            actual_tyres = list(stints.values())
            start_tyre_actual = actual_tyres[0] if actual_tyres else None
            new_tyre_actual = actual_tyres[1] if len(actual_tyres) > 1 else None

            # Add to dataset
            data.append({
                'circuit_id': circuit_id,
                'team_id': team_id,
                'starting_pos': start_pos,
                'wet': wet,
                'pit_time': pit_time,
                'quali_diff': quali_diff,
                'optimal_pit_lap': optimal_pit_lap,
                'sim_start_tyre': start_tyre,
                'sim_new_tyre': new_tyre,
                'actual_pit_lap': actual_pit_lap,
                'actual_start_tyre': start_tyre_actual,
                'actual_new_tyre': new_tyre_actual
            })

    return pd.DataFrame(data)

# Create dataset
df = create_dataset(db_session)

# Preprocessing
df = df.dropna(subset=['actual_pit_lap', 'actual_start_tyre'])

# Features and targets
X = df[['circuit_id', 'team_id', 'starting_pos', 'wet', 'pit_time', 
        'quali_diff', 'optimal_pit_lap', 'sim_start_tyre', 'sim_new_tyre']]
y_pit = df['actual_pit_lap'].apply(lambda x: 1 if x is not None else 0)  # Binary classification
y_tyre = df[['actual_start_tyre', 'actual_new_tyre']]

from sklearn.impute import SimpleImputer

# Preprocessing pipeline with imputation
numerical_features = ['starting_pos', 'wet', 'pit_time', 'quali_diff', 'optimal_pit_lap']
categorical_features = ['circuit_id', 'team_id', 'sim_start_tyre', 'sim_new_tyre']

# Imputers for numerical and categorical features
numerical_imputer = SimpleImputer(strategy='median')
categorical_imputer = SimpleImputer(strategy='most_frequent')

# Preprocessor with imputation
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[
            ('imputer', numerical_imputer),
            ('scaler', StandardScaler())
        ]), numerical_features),
        ('cat', Pipeline(steps=[
            ('imputer', categorical_imputer),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ]), categorical_features)
    ])

# Model for pit stop prediction
X_train, X_test, y_train, y_test = train_test_split(X, y_pit, test_size=0.2, random_state=42)

model_pit = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, class_weight='balanced'))
])

model_pit.fit(X_train, y_train)
y_pred = model_pit.predict(X_test)
print("Pit Stop Prediction Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Model for tyre prediction (example for start tyre)
X_tyre = df[df['actual_start_tyre'].notna()]
y_tyre_start = X_tyre['actual_start_tyre']

X_train_t, X_test_t, y_train_t, y_test_t = train_test_split(
    X_tyre[categorical_features + numerical_features], 
    y_tyre_start, 
    test_size=0.2, 
    random_state=42
)

model_tyre = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100))
])

model_tyre.fit(X_train_t, y_train_t)
y_pred_t = model_tyre.predict(X_test_t)
print("\nTyre Prediction Accuracy:", accuracy_score(y_test_t, y_pred_t))
print(classification_report(y_test_t, y_pred_t))

Pit Stop Prediction Accuracy: 1.0
              precision    recall  f1-score   support

           1       1.00      1.00      1.00       374

    accuracy                           1.00       374
   macro avg       1.00      1.00      1.00       374
weighted avg       1.00      1.00      1.00       374


Tyre Prediction Accuracy: 0.6684491978609626
              precision    recall  f1-score   support

        -1.0       0.33      0.50      0.40         2
         1.0       0.64      0.60      0.62        84
         2.0       0.71      0.80      0.75       211
         3.0       0.26      0.12      0.17        40
         4.0       0.73      0.76      0.75        29
         5.0       0.57      0.50      0.53         8

    accuracy                           0.67       374
   macro avg       0.54      0.55      0.54       374
weighted avg       0.64      0.67      0.65       374



In [8]:
df

Unnamed: 0,circuit_id,team_id,starting_pos,wet,pit_time,quali_diff,optimal_pit_lap,sim_start_tyre,sim_new_tyre,actual_pit_lap,actual_start_tyre,actual_new_tyre
0,9,1,9.0,False,21.892827,3.453284,66.0,1.0,3.0,21.0,2.0,3.0
1,9,1,14.0,False,21.892827,3.453284,27.0,2.0,3.0,20.0,2.0,3.0
2,9,11,7.0,False,22.298379,8.066211,30.0,2.0,3.0,22.0,2.0,3.0
3,9,11,12.0,False,22.298379,8.066211,27.0,2.0,3.0,22.0,2.0,3.0
4,9,2,2.0,False,22.802583,6.721642,1.0,3.0,4.0,22.0,1.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2133,21,17,,False,25.002750,6.627080,1.0,-1.0,4.0,23.0,2.0,3.0
2134,21,17,,False,25.002750,6.627080,57.0,1.0,3.0,24.0,2.0,3.0
2135,21,7,,False,27.125453,5.330593,1.0,3.0,5.0,30.0,2.0,3.0
2137,21,10,,False,22.798089,5.745819,21.0,2.0,3.0,13.0,2.0,3.0
