# GridSearchCV - finding best hyperparameters

+ Comparisson of 3 different models to predict a categorical variable (CO2 emission)
+ Linear Regression 
+ RandomForestRegressor 
+ SVR 

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_val_score
from sklearn.metrics import accuracy_score

# Initialize the models
clf_lr = LogisticRegression(random_state=22, max_iter=2000)
clf_rf = RandomForestClassifier(random_state=22)
clf_svc = SVC(random_state=22)

# Define parameter grids
param_grid_lr = {'solver': ['liblinear', 'lbfgs'], 'C': np.logspace(-4, 2, 9)}

param_grid_rf = [{'n_estimators': [10, 50, 100, 250, 500, 1000],
                  'min_samples_leaf': [1, 3, 5],
                  'max_features': ['sqrt', 'log2']}]

param_grid_svc = [{'kernel': ['rbf'], 'C': np.logspace(-4, 4, 9), 'gamma': np.logspace(-4, 0, 4)},
                  {'kernel': ['linear'], 'C': np.logspace(-4, 4, 9)}]

# Create empty dictionary for GridSearchCV objects
gridcvs = {}

# Loop through parameter grids and models to create GridSearchCV objects
for pgrid, clf, name in zip((param_grid_lr, param_grid_rf, param_grid_svc),
                            (clf_lr, clf_rf, clf_svc),
                            ('LogisticRegression', 'RandomForest', 'SVM')):
    gcv = GridSearchCV(clf, pgrid, cv=3, refit=True)
    gridcvs[name] = gcv

# Outer cross-validation
outer_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=22)
outer_scores = {}

# Assume X_train, y_train, X_test, y_test are defined elsewhere in the code
for name, gs in gridcvs.items():
    # Perform cross-validation and fit the model
    gs.fit(X_train, y_train)
    
    # Get the best parameters after fitting
    best_params = gs.best_params_
    print(f'{name} Best Parameters: {best_params}')
    
    # Calculate accuracy on training and test data
    train_acc = accuracy_score(y_true=y_train, y_pred=gs.predict(X_train))
    test_acc = accuracy_score(y_true=y_test, y_pred=gs.predict(X_test))
    
    print(f'{name} Training Accuracy: {100*train_acc:.2f}%')
    print(f'{name} Test Accuracy: {100*test_acc:.2f}%')
    
    # Store the outer cross-validation score
    nested_score = cross_val_score(gs, X_train, y_train, cv=outer_cv)
    outer_scores[name] = nested_score
    print(f'{name}: Outer Accuracy {100*nested_score.mean():.2f}% +/- {100*nested_score.std():.2f}%\n')

