<a href="https://colab.research.google.com/github/CodeWithJatin1/Celebal_project/blob/main/Untitled10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from scipy.stats import uniform, randint

iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

print("Dataset Loaded:")
print(f"Features (X) shape: {X.shape}")
print(f"Target (y) shape: {y.shape}\n")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Dataset Split:")
print(f"Training features (X_train) shape: {X_train.shape}")
print(f"Testing features (X_test) shape: {X_test.shape}\n")

model_results = {}

models = {
    "Logistic Regression (Base)": LogisticRegression(max_iter=500, random_state=42),
    "Random Forest (Base)": RandomForestClassifier(random_state=42),
    "Support Vector Machine (Base)": SVC(random_state=42)
}

print("🔎 Base Model Evaluation:")
for name, model in models.items():
    print(f"\n--- Training and Evaluating: {name} ---")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

    print(f"Accuracy : {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall   : {recall:.4f}")
    print(f"F1 Score : {f1:.4f}")

    model_results[name] = {
        'model': model,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'tuned': False
    }

print("\n--- Hyperparameter Tuning: GridSearchCV for Random Forest ---")
param_grid_rf = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10]
}

grid_search_rf = GridSearchCV(RandomForestClassifier(random_state=42), param_grid_rf, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_rf.fit(X_train, y_train)

best_rf_grid = grid_search_rf.best_estimator_
y_pred_best_rf_grid = best_rf_grid.predict(X_test)

print("\n✅ Best Model after GridSearchCV (Random Forest):")
print("Best Parameters:", grid_search_rf.best_params_)

accuracy_rf_grid = accuracy_score(y_test, y_pred_best_rf_grid)
precision_rf_grid = precision_score(y_test, y_pred_best_rf_grid, average='weighted', zero_division=0)
recall_rf_grid = recall_score(y_test, y_pred_best_rf_grid, average='weighted', zero_division=0)
f1_rf_grid = f1_score(y_test, y_pred_best_rf_grid, average='weighted', zero_division=0)

print(f"Accuracy : {accuracy_rf_grid:.4f}")
print(f"Precision: {precision_rf_grid:.4f}")
print(f"Recall   : {recall_rf_grid:.4f}")
print(f"F1 Score : {f1_rf_grid:.4f}")

model_results["Random Forest (GridSearchCV)"] = {
    'model': best_rf_grid,
    'accuracy': accuracy_rf_grid,
    'precision': precision_rf_grid,
    'recall': recall_rf_grid,
    'f1_score': f1_rf_grid,
    'tuned': True,
    'tuning_method': 'GridSearchCV'
}

print("\n--- Hyperparameter Tuning: RandomizedSearchCV for Logistic Regression ---")

param_distributions_lr = {
    'C': uniform(loc=0.01, scale=100),
    'solver': ['liblinear', 'lbfgs', 'saga'],
    'penalty': ['l1', 'l2']
}

random_search_lr = RandomizedSearchCV(
    LogisticRegression(max_iter=1000, random_state=42),
    param_distributions=param_distributions_lr,
    n_iter=50,
    cv=5,
    scoring='accuracy',
    random_state=42,
    n_jobs=-1,
    error_score='raise'
)

try:
    random_search_lr.fit(X_train, y_train)
except Exception as e:
    print(f"An error occurred during RandomizedSearchCV for Logistic Regression: {e}")
    print("Some solver/penalty combinations might be incompatible. Retrying with a more robust parameter distribution.")
    param_distributions_lr = {
        'C': uniform(loc=0.01, scale=100),
        'solver': ['liblinear', 'lbfgs'],
        'penalty': ['l2']
    }
    random_search_lr = RandomizedSearchCV(
        LogisticRegression(max_iter=1000, random_state=42),
        param_distributions=param_distributions_lr,
        n_iter=50,
        cv=5,
        scoring='accuracy',
        random_state=42,
        n_jobs=-1
    )
    random_search_lr.fit(X_train, y_train)

best_lr_random = random_search_lr.best_estimator_
y_pred_best_lr_random = best_lr_random.predict(X_test)

print("\n✅ Best Model after RandomizedSearchCV (Logistic Regression):")
print("Best Parameters:", random_search_lr.best_params_)

accuracy_lr_random = accuracy_score(y_test, y_pred_best_lr_random)
precision_lr_random = precision_score(y_test, y_pred_best_lr_random, average='weighted', zero_division=0)
recall_lr_random = recall_score(y_test, y_pred_best_lr_random, average='weighted', zero_division=0)
f1_lr_random = f1_score(y_test, y_pred_best_lr_random, average='weighted', zero_division=0)

print(f"Accuracy : {accuracy_lr_random:.4f}")
print(f"Precision: {precision_lr_random:.4f}")
print(f"Recall   : {recall_lr_random:.4f}")
print(f"F1 Score : {f1_lr_random:.4f}")

model_results["Logistic Regression (RandomizedSearchCV)"] = {
    'model': best_lr_random,
    'accuracy': accuracy_lr_random,
    'precision': precision_lr_random,
    'recall': recall_lr_random,
    'f1_score': f1_lr_random,
    'tuned': True,
    'tuning_method': 'RandomizedSearchCV'
}

print("\n--- Comprehensive Model Performance Summary ---")
print("{:<40} {:<10} {:<10} {:<10} {:<10}".format("Model Name", "Accuracy", "Precision", "Recall", "F1 Score"))
print("-" * 90)

best_overall_model_name = ""
best_overall_accuracy = -1

for name, metrics in model_results.items():
    print("{:<40} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
        name,
        metrics['accuracy'],
        metrics['precision'],
        metrics['recall'],
        metrics['f1_score']
    ))
    if metrics['accuracy'] > best_overall_accuracy:
        best_overall_accuracy = metrics['accuracy']
        best_overall_model_name = name

print("\n--- Best Performing Model Overall ---")
print(f"The best performing model based on Accuracy is: {best_overall_model_name}")
print(f"With an Accuracy of: {best_overall_accuracy:.4f}")


Dataset Loaded:
Features (X) shape: (150, 4)
Target (y) shape: (150,)

Dataset Split:
Training features (X_train) shape: (120, 4)
Testing features (X_test) shape: (30, 4)

🔎 Base Model Evaluation:

--- Training and Evaluating: Logistic Regression (Base) ---
Accuracy : 1.0000
Precision: 1.0000
Recall   : 1.0000
F1 Score : 1.0000

--- Training and Evaluating: Random Forest (Base) ---
Accuracy : 1.0000
Precision: 1.0000
Recall   : 1.0000
F1 Score : 1.0000

--- Training and Evaluating: Support Vector Machine (Base) ---
Accuracy : 1.0000
Precision: 1.0000
Recall   : 1.0000
F1 Score : 1.0000

--- Hyperparameter Tuning: GridSearchCV for Random Forest ---

✅ Best Model after GridSearchCV (Random Forest):
Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 10}
Accuracy : 1.0000
Precision: 1.0000
Recall   : 1.0000
F1 Score : 1.0000

--- Hyperparameter Tuning: RandomizedSearchCV for Logistic Regression ---
An error occurred during RandomizedSearchCV for Logistic Regressio