In [8]:
import pandas as pd
import os
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [9]:
df = pd.read_csv("../data/reduced_heart.csv")

In [10]:
X = df.drop(columns=["num"])
y = df["num"]
if y.nunique() > 2:
    y = (y > 0).astype(int)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [11]:
param_grids = {
    "LogisticRegression": {
        "model": LogisticRegression(max_iter=2000, random_state=42),
        "params": {
            "C": [0.01, 0.1, 1, 10, 100],
            "penalty": ["l2"],
            "solver": ["lbfgs", "liblinear"]
        }
    },
    "DecisionTree": {
        "model": DecisionTreeClassifier(random_state=42),
        "params": {
            "max_depth": [None, 3, 5, 10],
            "min_samples_split": [2, 5, 10],
            "min_samples_leaf": [1, 2, 4]
        }
    },
    "RandomForest": {
        "model": RandomForestClassifier(random_state=42),
        "params": {
            "n_estimators": [50, 100, 200, 300],
            "max_depth": [None, 5, 10, 20],
            "min_samples_split": [2, 5, 10],
            "min_samples_leaf": [1, 2, 4]
        }
    },
    "SVM": {
        "model": SVC(probability=True, random_state=42),
        "params": {
            "C": [0.1, 1, 10, 100],
            "kernel": ["linear", "rbf", "poly"],
            "gamma": ["scale", "auto"]
        }
    }
}

In [12]:
tuned_results = {}
best_models = {}

for name, mp in param_grids.items():
    print(f"\n🔍 Hyperparameter Tuning for {name} ...")

    # GridSearchCV
    grid = GridSearchCV(
        mp["model"],
        mp["params"],
        cv=5,
        scoring="accuracy",
        n_jobs=-1
    )
    grid.fit(X_train, y_train)
    print(f"GridSearchCV - Best Params: {grid.best_params_}")
    print(f"GridSearchCV - CV Score: {grid.best_score_:.3f}")


🔍 Hyperparameter Tuning for LogisticRegression ...
GridSearchCV - Best Params: {'C': 0.01, 'penalty': 'l2', 'solver': 'lbfgs'}
GridSearchCV - CV Score: 0.843

🔍 Hyperparameter Tuning for DecisionTree ...
GridSearchCV - Best Params: {'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2}
GridSearchCV - CV Score: 0.797

🔍 Hyperparameter Tuning for RandomForest ...
GridSearchCV - Best Params: {'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 300}
GridSearchCV - CV Score: 0.856

🔍 Hyperparameter Tuning for SVM ...
GridSearchCV - Best Params: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
GridSearchCV - CV Score: 0.835


In [14]:
 best_model = grid.best_estimator_
 y_pred = best_model.predict(X_test)
 acc = accuracy_score(y_test, y_pred)
 print(f"➡️ Test Accuracy ({name}): {acc:.3f}")
 print("Classification Report:\n", classification_report(y_test, y_pred, zero_division=0))

 tuned_results[name] = acc
 best_models[name] = best_model


➡️ Test Accuracy (SVM): 0.883
Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.94      0.90        32
           1       0.92      0.82      0.87        28

    accuracy                           0.88        60
   macro avg       0.89      0.88      0.88        60
weighted avg       0.89      0.88      0.88        60



In [20]:
os.makedirs("models", exist_ok=True)
final_model = best_models["SVM"]
joblib.dump(final_model, "../models/best_model_tuned.pkl")
print("\n✅ Best tuned RandomForest model saved to models/best_model_tuned.pkl")



✅ Best tuned RandomForest model saved to models/best_model_tuned.pkl
