In [1]:
#Lab 4 - Choose the best model for iris dataset using hyperparameter tuning

In [2]:
#Questions
#This program does the following:
#Loads the Iris dataset.
#Splits it into training and testing sets.
#Defines a set of machine learning models (Random Forest, Support Vector Machine, and k-Nearest Neighbors).
#Specifies hyperparameters to tune for each model.
#Uses GridSearchCV to search for the best hyperparameters for each model.
#Prints the best accuracy and best parameters found for each model.
#Selects the best model based on the highest accuracy found during the hyperparameter tuning.
#Evaluates the best model on the test set and prints the test accuracy.

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [4]:
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [5]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Define models
models = {
    'RandomForest': RandomForestClassifier(),
    'SVM': SVC(),
    'KNN': KNeighborsClassifier()
}

# Define hyperparameters to tune for each model
params = {
    'RandomForest': {'n_estimators': [50, 100, 200], 'max_depth': [None, 5, 10]},
    'SVM': {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']},
    'KNN': {'n_neighbors': [3, 5, 7], 'weights': ['uniform', 'distance']}
}

In [7]:
best_model = None
best_accuracy = 0
# hyperparameter tuning using GridSearchCV
for name, model in models.items():
    grid_search = GridSearchCV(estimator=model, param_grid=params[name], cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    accuracy = grid_search.best_score_
    print(f"{name}: Best Accuracy - {accuracy}, Best Parameters - {grid_search.best_params_}")
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = grid_search.best_estimator_

print(f"The best model is: {best_model}")
print(f"Best accuracy is: {best_accuracy}")

RandomForest: Best Accuracy - 0.95, Best Parameters - {'max_depth': None, 'n_estimators': 50}
SVM: Best Accuracy - 0.9583333333333334, Best Parameters - {'C': 1, 'kernel': 'linear'}
KNN: Best Accuracy - 0.9583333333333334, Best Parameters - {'n_neighbors': 3, 'weights': 'uniform'}
The best model is: SVC(C=1, kernel='linear')
Best accuracy is: 0.9583333333333334
