In [3]:
import pandas as pd
import warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Load Dataset
data1 = pd.read_csv("/content/drive/MyDrive/f1_data.csv/dataframe.csv")

# Encode categorical features
le = LabelEncoder()
for col in ['GP_name', 'driver', 'constructor']:
    data1[col] = le.fit_transform(data1[col])

# Select relevant features
X = data1[['GP_name', 'quali_pos', 'driver', 'age_at_gp_in_days', 'driver_confidence', 'constructor_reliability']]
y = data1['position'].apply(lambda x: 1 if x < 4 else (3 if x > 10 else 2))  # Classify position

# Define models
models = {
    'RandomForest': RandomForestClassifier(),
    'SVC': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'KNN': KNeighborsClassifier()
}

# Define hyperparameter grids
param_grids = {
    'RandomForest': {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5]
    },
    'SVC': {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf'],
        'gamma': ['scale', 'auto']
    },
    'DecisionTree': {
        'max_depth': [None, 5, 10, 20],
        'min_samples_split': [2, 5, 10]
    },
    'KNN': {
        'n_neighbors': [3, 5, 7, 10],
        'weights': ['uniform', 'distance']
    }
}

# Perform GridSearchCV for each model
best_params = {}
best_scores = {}

for model_name, model in models.items():
    print(f"Tuning {model_name}...")
    grid_search = GridSearchCV(model, param_grids[model_name], cv=StratifiedKFold(n_splits=5), scoring='accuracy', n_jobs=-1)
    grid_search.fit(X, y)

    best_params[model_name] = grid_search.best_params_
    best_scores[model_name] = grid_search.best_score_

    print(f"Best Params for {model_name}: {grid_search.best_params_}")
    print(f"Best Accuracy: {grid_search.best_score_:.4f}\n")

# Print final results
print("Hyperparameter Tuning Completed!")
for model, params in best_params.items():
    print(f"{model} Best Parameters: {params}, Accuracy: {best_scores[model]:.4f}")


Tuning RandomForest...
Best Params for RandomForest: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 100}
Best Accuracy: 0.9565

Tuning SVC...
Best Params for SVC: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
Best Accuracy: 0.9507

Tuning DecisionTree...
Best Params for DecisionTree: {'max_depth': 5, 'min_samples_split': 5}
Best Accuracy: 0.9553

Tuning KNN...
Best Params for KNN: {'n_neighbors': 7, 'weights': 'distance'}
Best Accuracy: 0.5522

Hyperparameter Tuning Completed!
RandomForest Best Parameters: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 100}, Accuracy: 0.9565
SVC Best Parameters: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}, Accuracy: 0.9507
DecisionTree Best Parameters: {'max_depth': 5, 'min_samples_split': 5}, Accuracy: 0.9553
KNN Best Parameters: {'n_neighbors': 7, 'weights': 'distance'}, Accuracy: 0.5522
