In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features (optional but often beneficial for certain models)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize classifiers
svc_classifier = SVC(random_state=42)
rf_classifier = RandomForestClassifier(random_state=42)
knn_classifier = KNeighborsClassifier()

# Define the hyperparameter grid for each classifier
svc_param_grid = {'kernel': ['linear', 'rbf', 'poly'], 'C': [0.1, 1, 10]}
rf_param_grid = {'n_estimators': [50, 100, 200], 'max_depth': [None, 5, 10]}
knn_param_grid = {'n_neighbors': [3, 5, 7]}

# Perform GridSearchCV for each classifier
svc_grid_search = GridSearchCV(svc_classifier, svc_param_grid, cv=5)
rf_grid_search = GridSearchCV(rf_classifier, rf_param_grid, cv=5)
knn_grid_search = GridSearchCV(knn_classifier, knn_param_grid, cv=5)

# Fit the models with GridSearchCV
svc_grid_search.fit(X_train, y_train)
rf_grid_search.fit(X_train, y_train)
knn_grid_search.fit(X_train, y_train)

# Get the best hyperparameters and model for each classifier
best_svc_params = svc_grid_search.best_params_
best_svc_model = svc_grid_search.best_estimator_

best_rf_params = rf_grid_search.best_params_
best_rf_model = rf_grid_search.best_estimator_

best_knn_params = knn_grid_search.best_params_
best_knn_model = knn_grid_search.best_estimator_

# Make predictions with the best models
svc_pred = best_svc_model.predict(X_test)
rf_pred = best_rf_model.predict(X_test)
knn_pred = best_knn_model.predict(X_test)

# Evaluate accuracy of the best models
svc_accuracy = accuracy_score(y_test, svc_pred)
rf_accuracy = accuracy_score(y_test, rf_pred)
knn_accuracy = accuracy_score(y_test, knn_pred)

# Select the best-performing model
best_classifier = None
best_accuracy = 0

classifiers = {
    "SVM": svc_accuracy,
    "Random Forest": rf_accuracy,
    "K-Nearest Neighbors": knn_accuracy
}

for clf, acc in classifiers.items():
    print(f"{clf} accuracy: {acc:.4f}")
    if acc > best_accuracy:
        best_accuracy = acc
        best_classifier = clf

print("\nBest performing model:", best_classifier)


SVM accuracy: 0.9778
Random Forest accuracy: 1.0000
K-Nearest Neighbors accuracy: 1.0000

Best performing model: Random Forest
