In [3]:
from sqlalchemy import create_engine, func
from sqlalchemy.orm import sessionmaker
import sys
import os
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../")))
from DB.models import init_db, Circuit, Season, RacingWeekend, Driver, Session, SessionResult, Lap, TyreRaceData, Team, DriverTeamSession, TeamCircuitStats

import numpy as np
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from itertools import product

# Initialize database connection
global db_session
engine, db_session = init_db()


def all_drivers_tyre(year, round):
	# Query to get the last 30 race sessions globally prior to the specified year/round
	global_past_races = (
		db_session.query(Session.session_id)
		.join(RacingWeekend, RacingWeekend.racing_weekend_id == Session.weekend_id)
		.filter(
			Session.session_type == "Race",
			# Filter races strictly before the specified year/round
			(RacingWeekend.year < year) |
			((RacingWeekend.year == year) & (RacingWeekend.round < round))
		)
		.order_by(RacingWeekend.year.desc(), RacingWeekend.round.desc())  # Order by most recent first
		.limit(30)  # Limit to the last 30 races
		.all()
	)

	# Extract session IDs from the query result
	global_session_ids = [race.session_id for race in global_past_races]

	# Query tyre data for ALL drivers across the last 30 races globally
	global_tyre_data = (
		db_session.query(TyreRaceData.tyre_type, TyreRaceData.a, TyreRaceData.b, TyreRaceData.c)
		.filter(TyreRaceData.race_id.in_(global_session_ids))
		.all()
	)

	# Group and calculate global averages
	global_tyre_stats = {}
	for tyre_type, a, b, c in global_tyre_data:
		if tyre_type not in global_tyre_stats:
			global_tyre_stats[tyre_type] = {"a": [], "b": [], "c": []}

		global_tyre_stats[tyre_type]["a"].append(a)
		global_tyre_stats[tyre_type]["b"].append(b)
		global_tyre_stats[tyre_type]["c"].append(c)

	# Calculate the global averages for each tyre type
	global_averaged_tyre_stats = {}
	for tyre_type, stats in global_tyre_stats.items():
		avg_a = sum(stats["a"]) / len(stats["a"]) if stats["a"] else 0
		avg_b = sum(stats["b"]) / len(stats["b"]) if stats["b"] else 0
		avg_c = sum(stats["c"]) / len(stats["c"]) if stats["c"] else 0

		global_averaged_tyre_stats[tyre_type] = {
			"avg_a": avg_a,
			"avg_b": avg_b,
			"avg_c": avg_c,
		}
	
	return global_averaged_tyre_stats

def get_starting_grid(session_id):
	session_results = db_session.query(SessionResult).filter_by(session_id=session_id).all()

	starting_grid = {}
	for pos in session_results:
		starting_grid[pos.position] = pos.driver_id

	return starting_grid

def get_laps(session_id):
	max_lap = db_session.query(func.max(Lap.lap_num)).filter(Lap.session_id == session_id).scalar()

	# If no laps are found, return 0
	return max_lap if max_lap is not None else 0

def get_all_data(year, round):
	# get all drivers that competed
	racing_weekend = db_session.query(RacingWeekend).filter_by(year=year, round=round).first()

	quali_session = db_session.query(Session).filter_by(weekend_id=racing_weekend.racing_weekend_id, session_type="Qualifying").first()
	race_session = db_session.query(Session).filter_by(weekend_id=racing_weekend.racing_weekend_id, session_type="Race").first()

	print(race_session.session_id)
	# find drivers
	drivers = db_session.query(DriverTeamSession).filter_by(session_id=race_session.session_id).all()

	all_driver_tyre_deg = all_drivers_tyre(year, round)
	driver_tyre_deg = {}
	for driver_entry in drivers:
		driver = driver_entry.driver

		# Query to get the last 30 race sessions the driver participated in
		past_races = (
			db_session.query(Session.session_id)
			.join(DriverTeamSession, DriverTeamSession.session_id == Session.session_id)
			.join(RacingWeekend, RacingWeekend.racing_weekend_id == Session.weekend_id)
			.filter(
				DriverTeamSession.driver_id == driver.driver_id,
				Session.session_type == "Race",
				# Filter races strictly before the specified year/round
				(RacingWeekend.year < year) |
				((RacingWeekend.year == year) & (RacingWeekend.round < round))
			)
			.order_by(RacingWeekend.year.desc(), RacingWeekend.round.desc())  # Order by most recent first
			.limit(30)  # Limit to the last 30 races
			.all()
		)

		# for race in past_races:
		# 	print(race.round)
		# print("\n\n\n")

		if len(past_races) < 20:
			driver_tyre_deg[driver.driver_id] = all_driver_tyre_deg
			continue

		session_ids = [race.session_id for race in past_races]

		# Get all tyre data for those sessions and driver
		tyre_data = db_session.query(TyreRaceData.tyre_type, TyreRaceData.a, TyreRaceData.b, TyreRaceData.c)\
			.filter(TyreRaceData.driver_id == driver.driver_id,
					TyreRaceData.race_id.in_(session_ids))\
			.all()
		
		# print(len(tyre_data))
		tyre_stats = {}

		# Loop over the queried tyre_data
		for tyre_type, a, b, c in tyre_data:
			if tyre_type in [4, 5]:
				continue
			if tyre_type not in tyre_stats:
				tyre_stats[tyre_type] = {"a": [], "b": [], "c": []}
			
			# Append the values of a, b, and c for this tyre type
			tyre_stats[tyre_type]["a"].append(a)
			tyre_stats[tyre_type]["b"].append(b)
			tyre_stats[tyre_type]["c"].append(c)

		# Calculate the averages for each tyre type
		averaged_tyre_stats = {}
		for tyre_type, stats in tyre_stats.items():
			avg_a = sum(stats["a"]) / len(stats["a"]) if stats["a"] else 0
			avg_b = sum(stats["b"]) / len(stats["b"]) if stats["b"] else 0
			avg_c = sum(stats["c"]) / len(stats["c"]) if stats["c"] else 0
			
			averaged_tyre_stats[tyre_type] = {
				"avg_a": avg_a,
				"avg_b": avg_b,
				"avg_c": avg_c,
			}

		driver_tyre_deg[driver.driver_id] = averaged_tyre_stats

	starting_grid = get_starting_grid(quali_session.session_id)
	
	num_laps = get_laps(race_session.session_id)

	return starting_grid, driver_tyre_deg, num_laps


starting_grid, driver_tyre_deg, num_laps = get_all_data(2024,1)
print(driver_tyre_deg)

505
{25: {1: {'avg_a': 0.013959121313157947, 'avg_b': -0.19267459781836774, 'avg_c': 1.5955269230154245}, 3: {'avg_a': 0.0012235357921884635, 'avg_b': -0.00847019105287892, 'avg_c': 0.8724541658007837}, 2: {'avg_a': 0.005937339654564435, 'avg_b': -0.03138335319381013, 'avg_c': 0.7402304206675017}}, 1: {1: {'avg_a': 0.016392714560850417, 'avg_b': -0.23682892861431495, 'avg_c': 1.9488591722309105}, 2: {'avg_a': 0.00550034567149313, 'avg_b': -0.07327124387719856, 'avg_c': 1.4118415892566305}, 3: {'avg_a': 0.0004848420237898069, 'avg_b': 0.007881688059935879, 'avg_c': 0.9271063691921159}}, 32: {1: {'avg_a': 0.006440127342656401, 'avg_b': -0.10399869100419472, 'avg_c': 1.8748416101882757}, 3: {'avg_a': 0.0020283460675474467, 'avg_b': -0.03916682819256352, 'avg_c': 1.1154032029967325}, 2: {'avg_a': 0.002577678897648065, 'avg_b': -0.023458988520653066, 'avg_c': 1.0266613027115332}}, 4: {1: {'avg_a': 0.004530539695706685, 'avg_b': -0.005770012371732021, 'avg_c': 0.5638280792471472}, 2: {'avg_a

In [None]:
from sqlalchemy.orm import sessionmaker
import pandas as pd

# Initialize the database connection
engine, db_session = init_db()

def get_laps_for_session(session_id):
    # Query the database
    query = (
        db_session.query(Driver.driver_id, Driver.driver_name, Lap.lap_num, Lap.pit)
        .join(Lap, Driver.driver_id == Lap.driver_id)
        .filter(Lap.session_id == session_id)
        .order_by(Driver.driver_name, Lap.lap_num)
    )
    
    # Execute the query and fetch results
    results = query.all()
    
    # Convert results to a DataFrame
    df = pd.DataFrame(results, columns=["driver_id", "driver_name", "lap_num", "pit"])
    
    return df

session_id = 505
laps_df = get_laps_for_session(session_id)

# Display the DataFrame
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
laps_df

Unnamed: 0,driver_id,driver_name,lap_num,pit
0,6,Alexander Albon,1,False
1,6,Alexander Albon,2,False
2,6,Alexander Albon,3,False
3,6,Alexander Albon,4,False
4,6,Alexander Albon,5,False
...,...,...,...,...
1122,33,Yuki Tsunoda,52,False
1123,33,Yuki Tsunoda,53,False
1124,33,Yuki Tsunoda,54,False
1125,33,Yuki Tsunoda,55,False


In [5]:
# Assuming laps_df already exists
for driver_id, driver_df in laps_df.groupby("driver_id"):
    
    # Loop over each unique lap_num in the driver-specific DataFrame
    for lap_num, lap_df in driver_df.groupby("lap_num"):

        for tyre, tyre_data in driver_tyre_deg[driver_id].items():
            a = tyre_data['avg_a']
            b = tyre_data['avg_b']
            c = tyre_data['avg_c']
            
            # For each lap, calculate the lap time and assign it to a new column
            for x in range(lap_num, num_laps):
                lap_time = a * (x ** 2) + b * x + c

                # Create a unique column name for the current tire and lap number
                col_name = f"{tyre}_{lap_num}"

                # Add the calculated lap time to the appropriate column in the dataframe
                # Ensure that the column exists or create it if necessary
                if col_name not in laps_df.columns:
                    laps_df[col_name] = None 

                # Assign the lap time to the column for the current row
                laps_df.loc[(laps_df['driver_id'] == driver_id) & (laps_df['lap_num'] == x), col_name] = lap_time



  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn't exist
  laps_df[col_name] = None  # Initialize the column if it doesn'

In [6]:
laps_df.head(10)

Unnamed: 0,driver_id,driver_name,lap_num,pit,1_1,2_1,3_1,1_2,2_2,3_2,1_3,2_3,3_3,1_4,2_4,3_4,1_5,2_5,3_5,1_6,2_6,3_6,1_7,2_7,3_7,1_8,2_8,3_8,1_9,2_9,3_9,1_10,2_10,3_10,1_11,2_11,3_11,1_12,2_12,3_12,1_13,2_13,3_13,1_14,2_14,3_14,1_15,2_15,3_15,1_16,...,2_40,3_40,1_41,2_41,3_41,1_42,2_42,3_42,1_43,2_43,3_43,1_44,2_44,3_44,1_45,2_45,3_45,1_46,2_46,3_46,1_47,2_47,3_47,1_48,2_48,3_48,1_49,2_49,3_49,1_50,2_50,3_50,1_51,2_51,3_51,1_52,2_52,3_52,1_53,2_53,3_53,1_54,2_54,3_54,1_55,2_55,3_55,1_56,2_56,3_56
0,6,Alexander Albon,1,False,1.146535,1.266747,1.02469,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,6,Alexander Albon,2,False,1.234206,1.196206,1.044917,1.234206,1.196206,1.044917,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,6,Alexander Albon,3,False,1.306437,1.141347,1.066375,1.306437,1.141347,1.066375,1.306437,1.141347,1.066375,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,6,Alexander Albon,4,False,1.363231,1.10217,1.089065,1.363231,1.10217,1.089065,1.363231,1.10217,1.089065,1.363231,1.10217,1.089065,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,6,Alexander Albon,5,False,1.404586,1.078676,1.112985,1.404586,1.078676,1.112985,1.404586,1.078676,1.112985,1.404586,1.078676,1.112985,1.404586,1.078676,1.112985,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,6,Alexander Albon,6,False,1.430502,1.070864,1.138137,1.430502,1.070864,1.138137,1.430502,1.070864,1.138137,1.430502,1.070864,1.138137,1.430502,1.070864,1.138137,1.430502,1.070864,1.138137,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,6,Alexander Albon,7,False,1.440981,1.078734,1.16452,1.440981,1.078734,1.16452,1.440981,1.078734,1.16452,1.440981,1.078734,1.16452,1.440981,1.078734,1.16452,1.440981,1.078734,1.16452,1.440981,1.078734,1.16452,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,6,Alexander Albon,8,False,1.43602,1.102287,1.192133,1.43602,1.102287,1.192133,1.43602,1.102287,1.192133,1.43602,1.102287,1.192133,1.43602,1.102287,1.192133,1.43602,1.102287,1.192133,1.43602,1.102287,1.192133,1.43602,1.102287,1.192133,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,6,Alexander Albon,9,False,1.415622,1.141521,1.220978,1.415622,1.141521,1.220978,1.415622,1.141521,1.220978,1.415622,1.141521,1.220978,1.415622,1.141521,1.220978,1.415622,1.141521,1.220978,1.415622,1.141521,1.220978,1.415622,1.141521,1.220978,1.415622,1.141521,1.220978,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,6,Alexander Albon,10,False,1.379785,1.196438,1.251054,1.379785,1.196438,1.251054,1.379785,1.196438,1.251054,1.379785,1.196438,1.251054,1.379785,1.196438,1.251054,1.379785,1.196438,1.251054,1.379785,1.196438,1.251054,1.379785,1.196438,1.251054,1.379785,1.196438,1.251054,1.379785,1.196438,1.251054,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [9]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score


y = laps_df['pit']

X = laps_df.drop('pit', axis=1)

X = pd.get_dummies(X)

# Split the data into training and testing sets (e.g., 80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

# Initialize the Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=45)

# Train the model on the training data
rf.fit(X_train, y_train)

# Predict on the test set
y_pred = rf.predict(X_test)

print(classification_report(y_test, y_pred))



              precision    recall  f1-score   support

       False       0.95      1.00      0.98       215
        True       0.00      0.00      0.00        11

    accuracy                           0.95       226
   macro avg       0.48      0.50      0.49       226
weighted avg       0.91      0.95      0.93       226



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
