In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import warnings
warnings.filterwarnings("ignore")

# Load dataset
data = load_iris()
X = data.data
y = data.target

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [3]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=500),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC()
}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='macro'),
        "Recall": recall_score(y_test, y_pred, average='macro'),
        "F1-Score": f1_score(y_test, y_pred, average='macro')
    })

df_results = pd.DataFrame(results)
print("🔍 Base Model Performance:\n", df_results)


🔍 Base Model Performance:
                  Model  Accuracy  Precision  Recall  F1-Score
0  Logistic Regression       1.0        1.0     1.0       1.0
1        Random Forest       1.0        1.0     1.0       1.0
2                  SVM       1.0        1.0     1.0       1.0


In [5]:
# Grid Search for RandomForest
param_grid = {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 5, 10],
    "min_samples_split": [2, 5]
}

grid_search = GridSearchCV(RandomForestClassifier(), param_grid, cv=3, scoring="f1_macro")
grid_search.fit(X_train, y_train)

print("✅ Best GridSearchCV Parameters:", grid_search.best_params_)


✅ Best GridSearchCV Parameters: {'max_depth': 5, 'min_samples_split': 5, 'n_estimators': 200}


In [6]:
from scipy.stats import uniform

param_dist = {
    "C": uniform(0.1, 10),
    "gamma": ['scale', 'auto'],
    "kernel": ['linear', 'rbf']
}

random_search = RandomizedSearchCV(SVC(), param_distributions=param_dist, n_iter=10, cv=3, scoring="f1_macro", random_state=42)
random_search.fit(X_train, y_train)

print("✅ Best RandomizedSearchCV Parameters:", random_search.best_params_)

✅ Best RandomizedSearchCV Parameters: {'C': 3.845401188473625, 'gamma': 'scale', 'kernel': 'linear'}


In [9]:
best_rf = grid_search.best_estimator_
best_svc = random_search.best_estimator_

final_models = {
    "Tuned Random Forest": best_rf,
    "Tuned SVM": best_svc
}

tuned_results = []

for name, model in final_models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    tuned_results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='macro'),
        "Recall": recall_score(y_test, y_pred, average='macro'),
        "F1-Score": f1_score(y_test, y_pred, average='macro')
    })

df_tuned = pd.DataFrame(tuned_results)
print("\n🔍 Tuned Model Performance:\n", df_tuned)



🔍 Tuned Model Performance:
                  Model  Accuracy  Precision    Recall  F1-Score
0  Tuned Random Forest  1.000000    1.00000  1.000000  1.000000
1            Tuned SVM  0.977778    0.97619  0.974359  0.974321


In [11]:
best_model = df_tuned.sort_values(by="F1-Score", ascending=False).iloc[0]
print(f"\n🥇 Best Model: {best_model['Model']} with F1-Score = {best_model['F1-Score']:.4f}")


🥇 Best Model: Tuned Random Forest with F1-Score = 1.0000
