In [43]:
from sklearn.preprocessing import StandardScaler
import joblib
import pandas as pd

### Load the model

In [44]:
trained_model = joblib.load('trained_model.pkl')

### Load 2024 DF

In [45]:
df = pd.read_csv('../Data-collection/df_2024.csv')
df.head()

Unnamed: 0,grid,position,year,round,driver_age,driver_experience,driver_constructor_experience,driver_points,driver_standing,constructor_points,...,constructor_mclaren,constructor_mercedes,constructor_racing_point,constructor_rb,constructor_red_bull,constructor_renault,constructor_sauber,constructor_toro_rosso,constructor_virgin,constructor_williams
0,1,1.0,2024,1,26,185,162,0.0,0.0,0.0,...,False,False,False,False,True,False,False,False,False,False
1,5,2.0,2024,1,34,259,66,0.0,0.0,0.0,...,False,False,False,False,True,False,False,False,False,False
2,4,3.0,2024,1,29,185,66,0.0,0.0,0.0,...,False,False,False,False,False,False,False,False,False,False
3,2,4.0,2024,1,26,125,104,0.0,0.0,0.0,...,False,False,False,False,False,False,False,False,False,False
4,3,5.0,2024,1,26,104,45,0.0,0.0,0.0,...,False,True,False,False,False,False,False,False,False,False


### Define Functions

In [46]:
def process_round(df, scaler, trained_model):
    numerical_columns = [
    'grid', 'driver_age', 'driver_experience', 'driver_constructor_experience',
    'driver_points', 'driver_standing', 'constructor_points', 
    'constructor_standing', 'driver_wins', 'constructor_wins', 'circuit_danger', 
    'year', 'round'
    ]
    
    df[numerical_columns] = scaler.fit_transform(df[numerical_columns])
    
    # Convert one-hot encoding columns from boolean to int
    one_hot_columns = [col for col in df.columns if df[col].dtype == 'bool']
    df[one_hot_columns] = df[one_hot_columns].astype(int)
    
    X_current_round = df.drop(columns=['position'])

    predictions_df = pd.DataFrame({'predicted_position': trained_model.predict(X_current_round)}) # Make predictions
    predictions_df.index = df.index # Ensure the indices align for merging
    df = pd.concat([df, predictions_df], axis=1) # Merge predictions back into the original DataFrame
    
    # Identify the predicted winner
    predicted_winner_idx = df['predicted_position'].idxmin()
    predicted_winner_row = df.loc[predicted_winner_idx]
    predicted_winner_name = next(col for col in one_hot_columns if predicted_winner_row[col] == 1)
    predicted_winner_name = predicted_winner_name.replace("driver_", "").replace("_", " ").title()
    predicted_winner_name = predicted_winner_name.split()[-1]
    predicted_position = predicted_winner_row['predicted_position']
    
    # Identify the actual winner
    actual_winner_row = df[df['position'] == 1.0]
    if not actual_winner_row.empty:
        actual_winner_name = next(col for col in one_hot_columns if actual_winner_row.iloc[0][col] == 1)
        actual_winner_name = actual_winner_name.replace("driver_", "").replace("_", " ").title()
        actual_winner_name = actual_winner_name.split()[-1]
    else:
        actual_winner_name = "Null"
    
    return {
        'predicted_winner': predicted_winner_name,
        'predicted_position': predicted_position,
        'actual_winner': actual_winner_name
    }

In [47]:
def process_all_rounds(df, trained_model):
    scaler = StandardScaler()
    unique_rounds = df['round'].unique()
    results = []
    
    for round_number in unique_rounds:
        df_current_round = df[df['round'] == round_number].copy()
        result = process_round(df_current_round, scaler, trained_model)
        result['round'] = round_number
        results.append(result)
    
    return pd.DataFrame(results)

### Make predictions

In [48]:
results_df = process_all_rounds(df, trained_model)
print(results_df)

   predicted_winner  predicted_position actual_winner  round
0             Sainz                4.93    Verstappen      1
1        Verstappen                1.13    Verstappen      2
2           Leclerc                2.05          Null      3
3        Verstappen                1.19    Verstappen      4
4        Verstappen                1.16    Verstappen      5
5        Verstappen                1.08        Norris      6
6        Verstappen                1.13    Verstappen      7
7           Leclerc                2.54       Leclerc      8
8        Verstappen                3.72    Verstappen      9
9        Verstappen                1.58    Verstappen     10
10           Norris                3.46       Russell     11
11       Verstappen                3.32      Hamilton     12
12       Verstappen                2.45       Piastri     13
13            Perez                2.88      Hamilton     14
14       Verstappen                1.35        Norris     15
15          Piastri     