In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [2]:
# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [3]:
# Define a pipeline with scaling and classifier
def create_pipeline(classifier):
    return Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', classifier)
    ])

In [4]:
classifiers = {
    'SVM': (SVC(), {
        'classifier__C': [0.1, 1, 10, 100],
        'classifier__gamma': [1, 0.1, 0.01, 0.001],
        'classifier__kernel': ['rbf', 'linear']
    }),
    'RandomForest': (RandomForestClassifier(), {
        'classifier__n_estimators': [10, 50, 100, 200],
        'classifier__max_features': ['auto', 'sqrt', 'log2'],
        'classifier__max_depth': [None, 10, 20, 30]
    }),
    'KNN': (KNeighborsClassifier(), {
        'classifier__n_neighbors': [3, 5, 7, 9],
        'classifier__weights': ['uniform', 'distance'],
        'classifier__metric': ['euclidean', 'manhattan']
    })
}

In [5]:
best_model = None
best_score = 0
best_params = None

In [6]:
for name, (classifier, params) in classifiers.items():
    pipeline = create_pipeline(classifier)
    grid_search = GridSearchCV(pipeline, params, cv=5, n_jobs=-1, scoring='accuracy')
    grid_search.fit(X_train, y_train)

    print(f"Best parameters for {name}: {grid_search.best_params_}")
    print(f"Best cross-validation accuracy for {name}: {grid_search.best_score_}")

    if grid_search.best_score_ > best_score:
        best_score = grid_search.best_score_
        best_model = grid_search.best_estimator_
        best_params = grid_search.best_params_

Best parameters for SVM: {'classifier__C': 1, 'classifier__gamma': 0.1, 'classifier__kernel': 'rbf'}
Best cross-validation accuracy for SVM: 0.9523809523809523


  warn(


Best parameters for RandomForest: {'classifier__max_depth': None, 'classifier__max_features': 'auto', 'classifier__n_estimators': 50}
Best cross-validation accuracy for RandomForest: 0.9428571428571428
Best parameters for KNN: {'classifier__metric': 'euclidean', 'classifier__n_neighbors': 5, 'classifier__weights': 'distance'}
Best cross-validation accuracy for KNN: 0.9428571428571428


In [7]:
y_pred = best_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)

print(f"Best model: {best_model}")
print(f"Best hyperparameters: {best_params}")
print(f"Test set accuracy: {test_accuracy}")

Best model: Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier', SVC(C=1, gamma=0.1))])
Best hyperparameters: {'classifier__C': 1, 'classifier__gamma': 0.1, 'classifier__kernel': 'rbf'}
Test set accuracy: 1.0
