## Imports & Paths

In [10]:
#Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import LabelEncoder
import joblib
import matplotlib.pyplot as plt

#Paths
processed_path = r"C:\UNI\Code\F1_Race_Predictor\data\processed"
models_path = r"C:\UNI\Code\F1_Race_Predictor\models"

season = 2023
gp = 'Bahrain'


## Load the Model

In [12]:
# Load saved model
rf = joblib.load(fr"{models_path}\f1_race_position_predictor(2).pkl")
team_encoder = joblib.load(fr"{models_path}\label_encoder_TeamName.pkl")
gp_encoder = joblib.load(fr"{models_path}\label_encoder_GrandPrix.pkl")

# Load new qualifying data
new_qual = pd.read_csv(fr"{processed_path}\{season}_{gp}_qualifying_clean.csv")


In [6]:
print(new_qual.columns.tolist())


['DriverNumber', 'BroadcastName', 'Abbreviation', 'DriverId', 'TeamName', 'TeamColor', 'TeamId', 'FirstName', 'LastName', 'FullName', 'HeadshotUrl', 'CountryCode', 'Position', 'ClassifiedPosition', 'GridPosition', 'Q1', 'Q2', 'Q3', 'Time', 'Status', 'Points', 'Laps', 'GrandPrix']


## Feature Selection

In [21]:
# --- Prepare features (same as training) ---
features = ['Position', 'TeamName', 'GrandPrix']

# Check columns
missing_cols = [f for f in features if f not in new_qual.columns]
if missing_cols:
    raise ValueError(f"Missing columns in qualifying data: {missing_cols}")

# Prepare feature DataFrame
X_new = new_qual[features].copy()

# Fill missing Q2/Q3 values (for drivers eliminated early)
for col in ['Q2', 'Q3']:
    if col in X_new.columns:
        X_new[col] = X_new[col].fillna(999.0)
        

## Load Encodes

In [22]:
X_new['TeamName'] = team_encoder.transform(X_new['TeamName'])
X_new['GrandPrix'] = gp_encoder.transform(X_new['GrandPrix'])


In [23]:
print(rf.feature_names_in_)


['Position_qual' 'TeamName' 'GrandPrix']


In [24]:
# Rename to match training
X_new = X_new.rename(columns={
    'Position': 'Position_qual',
    })

# --- Make predictions ---
predictions = rf.predict(X_new)

# --- Attach predictions to dataframe ---
new_qual['PredictedRacePosition'] = predictions

# --- Sort by predicted position (1 = best) ---
new_qual = new_qual.sort_values(by='PredictedRacePosition', ascending=True)

# --- Display top predicted finishers ---
print(new_qual[['FullName', 'TeamName', 'Position', 'PredictedRacePosition']])

           FullName         TeamName  Position  PredictedRacePosition
0    Max Verstappen  Red Bull Racing       1.0                  1.150
1      Sergio Perez  Red Bull Racing       2.0                  2.180
4   Fernando Alonso     Aston Martin       5.0                  4.575
3      Carlos Sainz          Ferrari       4.0                  6.020
5    George Russell         Mercedes       6.0                  6.350
6    Lewis Hamilton         Mercedes       7.0                  7.075
7      Lance Stroll     Aston Martin       8.0                  7.705
8      Esteban Ocon           Alpine       9.0                  9.940
11  Valtteri Bottas       Alfa Romeo      12.0                 10.295
14  Alexander Albon         Williams      15.0                 11.710
19     Pierre Gasly           Alpine      20.0                 11.815
9   Nico Hulkenberg     Haas F1 Team      10.0                 12.505
13     Yuki Tsunoda       AlphaTauri      14.0                 12.670
2   Charles Leclerc 