In [1]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import optuna

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

# Create DataFrame for visualization
df = pd.DataFrame(X, columns=feature_names)
df['target'] = y
df['species'] = df['target'].map({i: target_names[i] for i in range(len(target_names))})

print("Iris Dataset Overview:")
print(df.head())
print("\nTarget distribution:")
print(df['species'].value_counts())

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the objective function for Optuna
def objective(trial):
    model_name = trial.suggest_categorical("model", ["RandomForest", "SVC"])
    
    if model_name == "RandomForest":
        n_estimators = trial.suggest_int("n_estimators", 50, 300)
        max_depth = trial.suggest_int("max_depth", 2, 20)
        model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            random_state=42
        )
    
    elif model_name == "SVC":
        C = trial.suggest_float("C", 1e-3, 1e2, log=True)
        gamma = trial.suggest_float("gamma", 1e-4, 1e-1, log=True)
        model = SVC(
            C=C,
            gamma=gamma,
            random_state=42
        )
    
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    accuracy = accuracy_score(y_test, preds)
    return accuracy

# Create and run the study
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50, show_progress_bar=True)

# Print optimization results
print("\nOptimization Results:")
print("Best parameters:", study.best_params)
print("Best accuracy:", study.best_value)

# Train final model with best parameters
best_params = study.best_params.copy()
model_type = best_params.pop("model")

if model_type == "RandomForest":
    final_model = RandomForestClassifier(**best_params, random_state=42)
elif model_type == "SVC":
    final_model = SVC(**best_params, random_state=42)

final_model.fit(X_train, y_train)
y_pred = final_model.predict(X_test)

# Evaluate final model
print("\nFinal Model Evaluation:")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=target_names))

# Feature importance (if RandomForest was selected)
if model_type == "RandomForest":
    print("\nFeature Importances:")
    importances = final_model.feature_importances_
    for name, importance in zip(feature_names, importances):
        print(f"{name}: {importance:.4f}")

ModuleNotFoundError: No module named 'optuna'