In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, precision_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn import svm
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
import joblib

np.set_printoptions(precision=4)

In [None]:
data = pd.read_csv('final_df.csv')

In [None]:
df = data.copy()

train = df[df.season<2023]
X_train = train.drop(['driver', 'podium'], axis = 1)
y_train = train.podium

scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)

In [None]:
# For Regression


model_path = 'C:/ChuyenNganh_8/DSP_F1/f1-predictor/best_svm_model_gamma_0.004641588833612777_C_2.154434690031882_kernel_rbf.pkl'
model = joblib.load(model_path)

# Specify the circuit (round) you want to predict
circuit = 22
test = df[(df.season == 2023) & (df['round'] == circuit)]

X_test = test.drop(['driver', 'podium'], axis=1)
y_test = test.podium
drivers = test['driver'].reset_index(drop=True)  # Store driver names

X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

prediction_df = pd.DataFrame(model.predict(X_test), columns=['results'])
prediction_df['driver'] = drivers  # Include driver names
prediction_df['podium'] = y_test.reset_index(drop=True)  # Actual podium values

prediction_df.sort_values('results', ascending=True, inplace=True)
prediction_df.reset_index(drop=True, inplace=True)

print(f"Predicted results for Circuit {circuit} in 2023:")
print(prediction_df[['driver', 'results', 'podium']])


Predicted results for Circuit 22 in 2023:
             driver    results  podium
0    max_verstappen   3.365365       1
1           leclerc   3.644557       2
2             perez   4.388833       4
3           russell   4.926000       3
4            norris   6.105546       5
5          hamilton   6.615695       9
6           piastri   7.182066       6
7            alonso   7.435966       7
8             sainz   8.523422      18
9           tsunoda  10.876460       8
10            gasly  10.952102      13
11           stroll  11.691363      10
12             ocon  11.879811      12
13        ricciardo  12.099676      11
14       hulkenberg  12.852573      15
15            albon  13.764871      14
16           bottas  14.914381      19
17  kevin_magnussen  15.135083      20
18             zhou  16.490173      17


In [34]:
# For Classification 


import joblib
import pandas as pd

# Load the model and scaler
model_path = 'C:/ChuyenNganh_8/DSP_F1/f1-predictor/nn_classifier_weight.pkl'
model = joblib.load(model_path)

# Specify the circuit (round) you want to predict
circuit = 22
test = df[(df.season == 2023) & (df['round'] == circuit)]

X_test = test.drop(['driver', 'podium'], axis=1)
y_test = test.podium
drivers = test['driver'].reset_index(drop=True)  # Store driver names

X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

# Predict probabilities for each class (assuming binary classification)
prediction_probs = model.predict_proba(X_test)
prediction_scores = prediction_probs[:, 1]  # Adjust based on target class or classification structure

# Prepare prediction DataFrame
prediction_df = pd.DataFrame(prediction_scores, columns=['Probability'])
prediction_df['driver'] = drivers  # Include driver names
prediction_df['podium'] = y_test.reset_index(drop=True)  # Actual podium values

# Sort by scores
prediction_df.sort_values('Probability', ascending=False, inplace=True)  # Higher scores might indicate higher likelihood
prediction_df.reset_index(drop=True, inplace=True)

print(f"Predicted probability for Circuit {circuit} in 2023:")
print(prediction_df[['driver', 'Probability', 'podium']])


Predicted probability for Circuit 22 in 2023:
             driver  Probability  podium
0    max_verstappen     0.795223       1
1           leclerc     0.067574       2
2            norris     0.017945       5
3           russell     0.017884       3
4             perez     0.014479       4
5           piastri     0.010068       6
6            alonso     0.008451       7
7           tsunoda     0.005586       8
8          hamilton     0.004410       9
9        hulkenberg     0.002973      15
10            gasly     0.002598      13
11            sainz     0.002495      18
12        ricciardo     0.002495      11
13           stroll     0.002468      10
14             ocon     0.002252      12
15            albon     0.002055      14
16  kevin_magnussen     0.001876      20
17           bottas     0.001771      19
18             zhou     0.001719      17


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
