# Hyper parameter tuning

In [8]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np
import nbimporter 
from supervised_learning import models,results,X_train, X_test, y_train, y_test

##  Applying GridSearchCV & RandomizedSearchCV

In [10]:
#  Evaluate baseline models(Models we already trained before)
print("=== Baseline Results ===")

for model, scores in results.items():
    print(f"\n{model}: {scores}")


=== Baseline Results ===

Logistic Regression: {'Accuracy': 0.9344262295081968, 'Precision': 0.9285714285714286, 'Recall': 0.9285714285714286, 'F1-score': 0.9285714285714286, 'AUC': 0.9718614718614719}

Decision Tree: {'Accuracy': 0.8360655737704918, 'Precision': 0.78125, 'Recall': 0.8928571428571429, 'F1-score': 0.8333333333333334, 'AUC': 0.8403679653679653}

Random Forest: {'Accuracy': 0.8852459016393442, 'Precision': 0.8620689655172413, 'Recall': 0.8928571428571429, 'F1-score': 0.8771929824561403, 'AUC': 0.9632034632034632}

SVM: {'Accuracy': 0.9344262295081968, 'Precision': 0.9285714285714286, 'Recall': 0.9285714285714286, 'F1-score': 0.9285714285714286, 'AUC': 0.9761904761904763}


In [5]:
#  Define hyperparameter grids

param_grids = {
    "Logistic Regression": {
        "C": [0.01, 0.1, 1, 10],
        "penalty": ["l2"],   
        "solver": ["lbfgs", "liblinear"]
    },
    "Decision Tree": {
        "max_depth": [None, 5, 10, 20],
        "min_samples_split": [2, 5, 10],
        "criterion": ["gini", "entropy"]
    },
    "Random Forest": {
        "n_estimators": [50, 100, 200],
        "max_depth": [None, 5, 10, 20],
        "min_samples_split": [2, 5, 10]
    },
    "SVM": {
        "C": [0.1, 1, 10],
        "kernel": ["linear", "rbf", "poly"],
        "gamma": ["scale", "auto"]
    }
}


In [16]:
# GridSearchCV + RandomizedSearchCV

optimized_results = {}

for name, model in models.items():
    print(f"\n--- {name} ---")

    # GridSearchCV
    grid = GridSearchCV(model, param_grids[name], cv=5, scoring="accuracy", n_jobs=-1)
    grid.fit(X_train, y_train)
    best_grid = grid.best_estimator_
  
    # RandomizedSearchCV
    rand = RandomizedSearchCV(model, param_distributions=param_grids[name],
                              n_iter=5, cv=5, scoring="accuracy", random_state=42, n_jobs=-1)
    rand.fit(X_train, y_train)
    best_rand = rand.best_estimator_

    # Choose best between Grid and Random
    if grid.best_score_ >= rand.best_score_:
        best_model = best_grid
        print("Best Params (GridSearch):", grid.best_params_)
    else:
        best_model = best_rand
        print("Best Params (RandomizedSearch):", rand.best_params_)

    # Retrain on train set
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)
    y_proba = best_model.predict_proba(X_test)[:, 1] if hasattr(best_model, "predict_proba") else None
    auc = roc_auc_score(y_test, y_proba) if y_proba is not None else None

    optimized_results[name] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1": f1_score(y_test, y_pred),
        "AUC": auc
    }
    

print("\n=== Optimized Results ===")
for model, scores in optimized_results.items():
    print(f"\n{model}: {scores}")


--- Logistic Regression ---
Best Params (GridSearch): {'C': 0.1, 'penalty': 'l2', 'solver': 'lbfgs'}

--- Decision Tree ---
Best Params (GridSearch): {'criterion': 'gini', 'max_depth': 5, 'min_samples_split': 10}

--- Random Forest ---
Best Params (GridSearch): {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}

--- SVM ---
Best Params (GridSearch): {'C': 1, 'gamma': 'auto', 'kernel': 'rbf'}

=== Optimized Results ===

Logistic Regression: {'Accuracy': 0.9508196721311475, 'Precision': 0.9629629629629629, 'Recall': 0.9285714285714286, 'F1': 0.9454545454545454, 'AUC': 0.9761904761904762}

Decision Tree: {'Accuracy': 0.9016393442622951, 'Precision': 0.8928571428571429, 'Recall': 0.8928571428571429, 'F1': 0.8928571428571429, 'AUC': 0.8885281385281386}

Random Forest: {'Accuracy': 0.8688524590163934, 'Precision': 0.8333333333333334, 'Recall': 0.8928571428571429, 'F1': 0.8620689655172413, 'AUC': 0.961038961038961}

SVM: {'Accuracy': 0.9344262295081968, 'Precision': 0.928571428