In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
data = load_iris()
X = data.data
y = data.target

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features (especially important for SVM and Logistic Regression)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [2]:
# Define models and hyperparameters
models = {
    'AdaBoost': AdaBoostClassifier(),
    'RandomForest': RandomForestClassifier(),
    'SVM': SVC(),
    'LogisticRegression': LogisticRegression(max_iter=200),
    'DecisionTree': DecisionTreeClassifier()
}

param_grids = {
    'AdaBoost': {
        'n_estimators': [50, 100, 150],
        'learning_rate': [0.01, 0.1, 1]
    },
    'RandomForest': {
        'n_estimators': [50, 100, 150],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    },
    'SVM': {
        'C': [0.1, 1, 10, 100],
        'gamma': [1, 0.1, 0.01, 0.001],
        'kernel': ['rbf', 'linear']
    },
    'LogisticRegression': {
        'C': [0.1, 1, 10, 100],
        'solver': ['liblinear', 'lbfgs']
    },
    'DecisionTree': {
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    }
}


In [3]:
# Initialize dictionaries to store best estimators and their performance
best_estimators = {}
best_scores = {}

# Hyperparameter tuning using GridSearchCV
for model_name in models.keys():
    print(f"Training {model_name}...")
    grid_search = GridSearchCV(models[model_name], param_grids[model_name], cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    best_estimators[model_name] = grid_search.best_estimator_
    best_scores[model_name] = grid_search.best_score_
    print(f"Best parameters for {model_name}: {grid_search.best_params_}")
    print(f"Best cross-validation score for {model_name}: {grid_search.best_score_}")


Training AdaBoost...
Best parameters for AdaBoost: {'learning_rate': 1, 'n_estimators': 50}
Best cross-validation score for AdaBoost: 0.9333333333333333
Training RandomForest...
Best parameters for RandomForest: {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 100}
Best cross-validation score for RandomForest: 0.9666666666666666
Training SVM...
Best parameters for SVM: {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
Best cross-validation score for SVM: 0.9666666666666668
Training LogisticRegression...
Best parameters for LogisticRegression: {'C': 1, 'solver': 'lbfgs'}
Best cross-validation score for LogisticRegression: 0.9583333333333334
Training DecisionTree...
Best parameters for DecisionTree: {'max_depth': 30, 'min_samples_split': 2}
Best cross-validation score for DecisionTree: 0.9583333333333334


In [5]:
# Define models and hyperparameters with reduced search space
models = {
    'AdaBoost': AdaBoostClassifier(),
    'RandomForest': RandomForestClassifier(),
    'SVM': SVC(),
    'LogisticRegression': LogisticRegression(max_iter=200),
    'DecisionTree': DecisionTreeClassifier()
}

param_grids = {
    'AdaBoost': {
        'n_estimators': [50, 100],
        'learning_rate': [0.1, 1]
    },
    'RandomForest': {
        'n_estimators': [50, 100],
        'max_depth': [10, 20],
        'min_samples_split': [2, 5]
    },
    'SVM': {
        'C': [1, 10],
        'gamma': [0.1, 0.01],
        'kernel': ['rbf']
    },
    'LogisticRegression': {
        'C': [1, 10],
        'solver': ['liblinear']
    },
    'DecisionTree': {
        'max_depth': [10, 20],
        'min_samples_split': [2, 5]
    }
}

# Initialize dictionaries to store best estimators and their performance
best_estimators = {}
best_scores = {}

# Hyperparameter tuning using GridSearchCV
for model_name in models.keys():
    print(f"Training {model_name}...")
    grid_search = GridSearchCV(models[model_name], param_grids[model_name], cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    best_estimators[model_name] = grid_search.best_estimator_
    best_scores[model_name] = grid_search.best_score_
    print(f"Best parameters for {model_name}: {grid_search.best_params_}")
    print(f"Best cross-validation score for {model_name}: {grid_search.best_score_}")

# Evaluate the models on the test set
results = {}
for model_name in best_estimators.keys():
    print(f"Evaluating {model_name}...")
    y_pred = best_estimators[model_name].predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    cm = confusion_matrix(y_test, y_pred)
    results[model_name] = {
        'classification_report': report,
        'confusion_matrix': cm
    }
    print(f"Classification report for {model_name}:\n{classification_report(y_test, y_pred)}")
    print(f"Confusion matrix for {model_name}:\n{confusion_matrix(y_test, y_pred)}\n")

results


Training AdaBoost...
Best parameters for AdaBoost: {'learning_rate': 1, 'n_estimators': 50}
Best cross-validation score for AdaBoost: 0.9333333333333333
Training RandomForest...
Best parameters for RandomForest: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 100}
Best cross-validation score for RandomForest: 0.9583333333333334
Training SVM...
Best parameters for SVM: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
Best cross-validation score for SVM: 0.95
Training LogisticRegression...
Best parameters for LogisticRegression: {'C': 10, 'solver': 'liblinear'}
Best cross-validation score for LogisticRegression: 0.9333333333333332
Training DecisionTree...
Best parameters for DecisionTree: {'max_depth': 20, 'min_samples_split': 2}
Best cross-validation score for DecisionTree: 0.9583333333333334
Evaluating AdaBoost...
Classification report for AdaBoost:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00     

{'AdaBoost': {'classification_report': {'0': {'precision': 1.0,
    'recall': 1.0,
    'f1-score': 1.0,
    'support': 10},
   '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 9},
   '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11},
   'accuracy': 1.0,
   'macro avg': {'precision': 1.0,
    'recall': 1.0,
    'f1-score': 1.0,
    'support': 30},
   'weighted avg': {'precision': 1.0,
    'recall': 1.0,
    'f1-score': 1.0,
    'support': 30}},
  'confusion_matrix': array([[10,  0,  0],
         [ 0,  9,  0],
         [ 0,  0, 11]], dtype=int64)},
 'RandomForest': {'classification_report': {'0': {'precision': 1.0,
    'recall': 1.0,
    'f1-score': 1.0,
    'support': 10},
   '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 9},
   '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11},
   'accuracy': 1.0,
   'macro avg': {'precision': 1.0,
    'recall': 1.0,
    'f1-score': 1.0,
    'support': 30},
   'weighted avg