In [12]:
import joblib
import pandas as pd

### Load the model & scaler

In [14]:
trained_model = joblib.load('trained_model.pkl')
scaler = joblib.load('scaler.pkl')

### Load 2024 DF

In [15]:
df = pd.read_csv('../Data-collection/final_df.csv')
df = df[(df['year'] == 2024)].copy()
df.head()

Unnamed: 0,grid,position,year,round,driver_age,driver_experience,driver_constructor_experience,driver_wins,constructor_wins,driver_points,...,constructor_manor,constructor_marussia,constructor_mclaren,constructor_mercedes,constructor_racing_point,constructor_red_bull,constructor_renault,constructor_sauber,constructor_toro_rosso,constructor_williams
4671,9,7.0,2024,1,39,332,222,103.0,1.0,0.0,...,False,False,False,True,False,False,False,False,False,False
4672,6,9.0,2024,1,42,380,22,32.0,0.0,0.0,...,False,False,False,False,False,False,False,False,False,False
4673,10,16.0,2024,1,36,206,22,0.0,0.0,0.0,...,False,False,False,False,False,False,False,False,False,False
4674,5,2.0,2024,1,34,259,66,6.0,53.0,0.0,...,False,False,False,False,False,True,False,False,False,False
4675,14,13.0,2024,1,34,239,0,8.0,0.0,0.0,...,False,False,False,False,False,False,False,False,False,False


### Define Functions

In [16]:
numerical_columns = [
    'grid', 'driver_age', 'driver_experience', 'driver_constructor_experience',
    'driver_points', 'driver_standing', 'constructor_points', 
    'constructor_standing', 'driver_wins', 'constructor_wins', 'circuit_danger', 
    'year', 'round'
]

In [84]:
def make_predictions(df, trained_model):

    pred_winner, pred_position, actual_winner, round, actual_position, actual_winner_pred_position = [], [], [], [], [], []

    actual_winner_predicted_position = []

    for round_number in df['round'].unique():
        df_current_round = df[df['round'] == round_number].copy()

        df_current_round[numerical_columns] = scaler.transform(df_current_round[numerical_columns])
        
        # Convert one-hot encoding columns from boolean to int
        one_hot_columns = [col for col in df_current_round.columns if df_current_round[col].dtype == 'bool']
        df_current_round[one_hot_columns] = df_current_round[one_hot_columns].astype(int)

        X_test_current_round = df_current_round.drop(columns=['position'])

        predictions_df = pd.DataFrame({'predicted_position': trained_model.predict(X_test_current_round)}) # Make predictions
        predictions_df.index = df_current_round.index # Ensure the indices align for merging
        df_current_round = pd.concat([df_current_round, predictions_df], axis=1) # Merge predictions back into the original DataFrame

        # Identify the predicted winner
        predicted_winner_idx = df_current_round['predicted_position'].idxmin()
        predicted_winner_row = df_current_round.loc[predicted_winner_idx]
        predicted_winner_name = next(col for col in one_hot_columns if predicted_winner_row[col] == 1)
        predicted_winner_name = predicted_winner_name.replace("driver_", "").replace("_", " ").title()
        predicted_winner_name = predicted_winner_name.split()[-1]
        predicted_position = predicted_winner_row['predicted_position']

        # Actual position of predicted winner
        predicted_winner_actual_position = predicted_winner_row['position']
        
        # Identify the actual winner
        actual_winner_row = df_current_round[df_current_round['position'] == 1.0]
        if not actual_winner_row.empty:
            actual_winner_name = next(col for col in one_hot_columns if actual_winner_row.iloc[0][col] == 1)
            actual_winner_name = actual_winner_name.replace("driver_", "").replace("_", " ").title()
            actual_winner_name = actual_winner_name.split()[-1]
            actual_winner_predicted_position = actual_winner_row['predicted_position'].iloc[0]
        else:
            actual_winner_name = "NaN"
            actual_winner_predicted_position = None

        round.append(round_number)
        pred_winner.append(predicted_winner_name)
        pred_position.append(predicted_position)
        actual_position.append(predicted_winner_actual_position)
        actual_winner.append(actual_winner_name)
        actual_winner_pred_position.append(actual_winner_predicted_position)
        
    predictions = pd.DataFrame({
        "round": round, 
        "predicted_winner": pred_winner, 
        "predicted_position": pred_position,
        "actual_position": actual_position,
        "actual_winner": actual_winner,
        "acutal_pred_position": actual_winner_pred_position
        })

    return predictions


In [66]:
def model_winner_accuracy (df):
    new_df = df['predicted_winner'] == df['actual_winner']
    return new_df.mean()

In [41]:
def model_podium_accuracy (df):
    new_df = df['actual_position'] <= 3.0
    return new_df.mean()

### Make predictions

In [85]:
results_df = make_predictions(df, trained_model)
winner_accuracy = model_winner_accuracy(results_df)
podium_accuracy = model_podium_accuracy(results_df)

In [87]:
print(results_df.to_string(index=False))
print("")
print(f"Model Winner Accuracy: {winner_accuracy:.2%}")
print(f"Model Podium Accuracy: {podium_accuracy:.2%}")

 round predicted_winner  predicted_position  actual_position actual_winner  acutal_pred_position
     1       Verstappen                 1.4              1.0    Verstappen                   1.4
     2       Verstappen                 1.2              1.0    Verstappen                   1.2
     3            Sainz                 3.7              1.0         Sainz                   3.7
     4       Verstappen                 2.0              1.0    Verstappen                   2.0
     5       Verstappen                 1.9              1.0    Verstappen                   1.9
     6       Verstappen                 1.2              2.0        Norris                   5.0
     7       Verstappen                 1.3              1.0    Verstappen                   1.3
     8          Leclerc                 1.8              1.0       Leclerc                   1.8
     9       Verstappen                 1.6              1.0    Verstappen                   1.6
    10       Verstappen       