## Imports & Paths

In [64]:
#Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import LabelEncoder
import joblib
import matplotlib.pyplot as plt

#Paths
processed_path = r"C:\UNI\Code\F1_Race_Predictor\data\processed"
models_path = r"C:\UNI\Code\F1_Race_Predictor\models"

season = 2023
gp = 'Mexico'


## Load the Model

In [65]:
# Load saved model
rf = joblib.load(fr"{models_path}\f1_race_position_predictor(2).pkl")
team_encoder = joblib.load(fr"{models_path}\label_encoder_TeamName.pkl")
gp_encoder = joblib.load(fr"{models_path}\label_encoder_GrandPrix.pkl")

# Load new qualifying data
new_qual = pd.read_csv(fr"{processed_path}\{season}_{gp}_qualifying_clean.csv")


In [66]:
print(new_qual.columns.tolist())


['DriverNumber', 'BroadcastName', 'Abbreviation', 'DriverId', 'TeamName', 'TeamColor', 'TeamId', 'FirstName', 'LastName', 'FullName', 'HeadshotUrl', 'CountryCode', 'Position', 'ClassifiedPosition', 'GridPosition', 'Q1', 'Q2', 'Q3', 'Time', 'Status', 'Points', 'Laps', 'GrandPrix']


## Feature Selection

In [67]:
# --- Prepare features (same as training) ---
features = ['Position', 'TeamName', 'GrandPrix']

# Check columns
missing_cols = [f for f in features if f not in new_qual.columns]
if missing_cols:
    raise ValueError(f"Missing columns in qualifying data: {missing_cols}")

# Prepare feature DataFrame
X_new = new_qual[features].copy()

# Fill missing Q2/Q3 values (for drivers eliminated early)
for col in ['Q2', 'Q3']:
    if col in X_new.columns:
        X_new[col] = X_new[col].fillna(999.0)
        

## Load Encodes

In [69]:
def safe_transform(encoder, series):
    known_classes = set(encoder.classes_)
    transformed = []
    for val in series:
        if val in known_classes:
            transformed.append(encoder.transform([val])[0])
        else:
            # unseen category — assign -1
            transformed.append(-1)
    return transformed

# Use safe transform for both encoders
X_new['TeamName'] = safe_transform(team_encoder, X_new['TeamName'])
X_new['GrandPrix'] = safe_transform(gp_encoder, X_new['GrandPrix'])


In [70]:
print(rf.feature_names_in_)


['Position_qual' 'TeamName' 'GrandPrix']


In [71]:
# Rename to match training
X_new = X_new.rename(columns={
    'Position': 'Position_qual',
    })

# --- Make predictions ---
predictions = rf.predict(X_new)

# --- Attach predictions to dataframe ---
new_qual['PredictedRacePosition'] = predictions

# --- Sort by predicted position (1 = best) ---
new_qual = new_qual.sort_values(by='PredictedRacePosition', ascending=True)

# --- Display top predicted finishers ---
print(new_qual[['FullName', 'TeamName', 'Position', 'PredictedRacePosition']])

            FullName         TeamName  Position  PredictedRacePosition
3   Daniel Ricciardo       AlphaTauri       4.0                  5.110
2     Max Verstappen  Red Bull Racing       3.0                  5.885
0    Charles Leclerc          Ferrari       1.0                  6.450
4       Sergio Perez  Red Bull Racing       5.0                  6.510
5     Lewis Hamilton         Mercedes       6.0                  6.685
1       Carlos Sainz          Ferrari       2.0                  7.310
11   Nico Hulkenberg     Haas F1 Team      12.0                 10.745
10      Pierre Gasly           Alpine      11.0                 11.765
9        Guanyu Zhou       Alfa Romeo      10.0                 11.855
6      Oscar Piastri          McLaren       7.0                 11.930
19    Logan Sargeant         Williams      20.0                 12.180
12   Fernando Alonso     Aston Martin      13.0                 12.440
8    Valtteri Bottas       Alfa Romeo       9.0                 12.605
16   K