In [5]:
%%Markdown
pip install scikit-optimize

UsageError: Cell magic `%%Markdown` not found.


In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from skopt import BayesSearchCV

# Step 1: Load and preprocess the Iris dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

# Preprocessing: Encode categorical labels (not needed for this dataset)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Model Training
# Train four different models: SVM, Decision Tree, Random Forest, Logistic Regression

svm_model = SVC(random_state=42)
svm_model.fit(X_train, y_train)

dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

lr_model = LogisticRegression(random_state=42)
lr_model.fit(X_train, y_train)

# Step 3: Hyperparameter Tuning
# Using Grid Search, Random Search, and Bayesian Optimization for hyperparameter tuning

# Define hyperparameter grids for each model
svm_param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

dt_param_grid = {
    'max_depth': [None, 10, 20, 30, 40],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf_param_dist = {
    'n_estimators': [int(x) for x in np.linspace(start=10, stop=200, num=10).astype(int)],
    'max_features': ['auto', 'sqrt'],
    'max_depth': [int(x) for x in np.linspace(10, 110, num=11).astype(int)],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

lr_param_grid = {
    'C': [0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],
    'max_iter': [100, 200, 300]  # Adjusted max_iter values
}


# Grid Search
svm_grid_search = GridSearchCV(SVC(random_state=42), svm_param_grid, cv=5)
svm_grid_search.fit(X_train, y_train)

dt_grid_search = GridSearchCV(DecisionTreeClassifier(random_state=42), dt_param_grid, cv=5)
dt_grid_search.fit(X_train, y_train)

rf_grid_search = GridSearchCV(RandomForestClassifier(random_state=42), rf_param_dist, cv=5)
rf_grid_search.fit(X_train, y_train)

lr_grid_search = GridSearchCV(LogisticRegression(random_state=42), lr_param_grid, cv=5)
lr_grid_search.fit(X_train, y_train)

# Random Search
svm_random_search = RandomizedSearchCV(SVC(random_state=42), svm_param_grid, n_iter=10, cv=5)
svm_random_search.fit(X_train, y_train)

dt_random_search = RandomizedSearchCV(DecisionTreeClassifier(random_state=42), dt_param_grid, n_iter=10, cv=5)
dt_random_search.fit(X_train, y_train)

rf_random_search = RandomizedSearchCV(RandomForestClassifier(random_state=42), rf_param_dist, n_iter=10, cv=5)
rf_random_search.fit(X_train, y_train)

lr_random_search = RandomizedSearchCV(LogisticRegression(random_state=42), lr_param_grid, n_iter=10, cv=5)
lr_random_search.fit(X_train, y_train)

# Bayesian Optimization
svm_bayes_search = BayesSearchCV(SVC(random_state=42), svm_param_grid, n_iter=10, cv=5)
svm_bayes_search.fit(X_train, y_train)

dt_bayes_search = BayesSearchCV(DecisionTreeClassifier(random_state=42), dt_param_grid, n_iter=10, cv=5)
dt_bayes_search.fit(X_train, y_train)

rf_bayes_search = BayesSearchCV(RandomForestClassifier(random_state=42), rf_param_dist, n_iter=10, cv=5)
rf_bayes_search.fit(X_train, y_train)

lr_bayes_search = BayesSearchCV(LogisticRegression(random_state=42), lr_param_grid, n_iter=10, cv=5)
lr_bayes_search.fit(X_train, y_train)

# Step 4: Model Evaluation
# Evaluate the models with the best-tuned hyperparameters using appropriate metrics

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    return accuracy, f1

# Evaluate models after Grid Search
svm_grid_accuracy, svm_grid_f1 = evaluate_model(svm_grid_search, X_test, y_test)
dt_grid_accuracy, dt_grid_f1 = evaluate_model(dt_grid_search, X_test, y_test)
rf_grid_accuracy, rf_grid_f1 = evaluate_model(rf_grid_search, X_test, y_test)
lr_grid_accuracy, lr_grid_f1 = evaluate_model(lr_grid_search, X_test, y_test)

# Evaluate models after Random Search
svm_random_accuracy, svm_random_f1 = evaluate_model(svm_random_search, X_test, y_test)
dt_random_accuracy, dt_random_f1 = evaluate_model(dt_random_search, X_test, y_test)
rf_random_accuracy, rf_random_f1 = evaluate_model(rf_random_search, X_test, y_test)
lr_random_accuracy, lr_random_f1 = evaluate_model(lr_random_search, X_test, y_test)

# Evaluate models after Bayesian Optimization
svm_bayes_accuracy, svm_bayes_f1 = evaluate_model(svm_bayes_search, X_test, y_test)
dt_bayes_accuracy, dt_bayes_f1 = evaluate_model(dt_bayes_search, X_test, y_test)
rf_bayes_accuracy, rf_bayes_f1 = evaluate_model(rf_bayes_search, X_test, y_test)
lr_bayes_accuracy, lr_bayes_f1 = evaluate_model(lr_bayes_search, X_test, y_test)

# Step 5: Comparison
# Compare the performance of each model after hyperparameter tuning using different methods

print("Model Comparison after Grid Search:")
print("SVM: Accuracy =", svm_grid_accuracy, ", F1 Score =", svm_grid_f1)
print("Decision Tree: Accuracy =", dt_grid_accuracy, ", F1 Score =", dt_grid_f1)
print("Random Forest: Accuracy =", rf_grid_accuracy, ", F1 Score =", rf_grid_f1)
print("Logistic Regression: Accuracy =", lr_grid_accuracy, ", F1 Score =", lr_grid_f1)
print("\n")

print("Model Comparison after Random Search:")
print("SVM: Accuracy =", svm_random_accuracy, ", F1 Score =", svm_random_f1)
print("Decision Tree: Accuracy =", dt_random_accuracy, ", F1 Score =", dt_random_f1)
print("Random Forest: Accuracy =", rf_random_accuracy, ", F1 Score =", rf_random_f1)
print("Logistic Regression: Accuracy =", lr_random_accuracy, ", F1 Score =", lr_random_f1)
print("\n")

print("Model Comparison after Bayesian Optimization:")
print("SVM: Accuracy =", svm_bayes_accuracy, ", F1 Score =", svm_bayes_f1)
print("Decision Tree: Accuracy =", dt_bayes_accuracy, ", F1 Score =", dt_bayes_f1)
print("Random Forest: Accuracy =", rf_bayes_accuracy, ", F1 Score =", rf_bayes_f1)
print("Logistic Regression: Accuracy =", lr_bayes_accuracy, ", F1 Score =", lr_bayes_f1)
