In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import joblib
import os


In [18]:
# Load data
data_path = r"F:\Omar 3amora\Heart_Disease_Project\Data\heart_disease_reduced_features.csv"
df = pd.read_csv(data_path)

X = df.drop(columns='num')
y = df['num']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
y_test_bin = label_binarize(y_test, classes=sorted(y.unique()))


In [19]:
models = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'SVM': SVC(probability=True, random_state=42)
}

param_grids = {
    'Logistic Regression': {
        'C': [0.01, 0.1, 1, 10],
        'solver': ['lbfgs', 'saga']
    },
    'Decision Tree': {
        'max_depth': [None, 5, 10, 20],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    },
    'Random Forest': {
        'n_estimators': [100, 200, 300],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    },
    'SVM': {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf'],
        'gamma': ['scale', 'auto']
    }
}

results = {}
best_models = {}


In [20]:
for name, model in models.items():
    print(f"\n🔍 Tuning {name}...")
    grid = GridSearchCV(model, param_grids[name], cv=5, scoring='accuracy', n_jobs=-1, verbose=1)

    if name in ['SVM', 'Logistic Regression']:
        grid.fit(X_train, y_train)
        y_pred = grid.predict(X_test)
        y_proba = grid.predict_proba(X_test)
    else:
        grid.fit(X_train, y_train)
        y_pred = grid.predict(X_test)
        y_proba = grid.predict_proba(X_test)

    # Save best model
    best_models[name] = grid.best_estimator_

    # Evaluation metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted')
    rec = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    auc = roc_auc_score(y_test_bin, y_proba, multi_class='ovr', average='weighted')

    results[name] = {
        'Best Params': grid.best_params_,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-score': f1,
        'AUC': auc
    }


🔍 Tuning Logistic Regression...
Fitting 5 folds for each of 8 candidates, totalling 40 fits


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))



🔍 Tuning Decision Tree...
Fitting 5 folds for each of 36 candidates, totalling 180 fits


  _warn_prf(average, modifier, msg_start, len(result))



🔍 Tuning Random Forest...
Fitting 5 folds for each of 81 candidates, totalling 405 fits


  _warn_prf(average, modifier, msg_start, len(result))



🔍 Tuning SVM...
Fitting 5 folds for each of 12 candidates, totalling 60 fits


  _warn_prf(average, modifier, msg_start, len(result))


In [21]:
os.makedirs("results", exist_ok=True)
with open("F:/Omar 3amora/Heart_Disease_Project/Results/evaluation_metrics.txt", "w") as f:
    for name, metrics in results.items():
        f.write(f"\n{name}:\n")
        for k, v in metrics.items():
            if isinstance(v, dict):
                f.write(f"  {k}: {v}\n")
            else:
                f.write(f"  {k}: {v:.4f}\n")

print("✅ Metrics saved to results/evaluation_metrics.txt")


✅ Metrics saved to results/evaluation_metrics.txt


In [23]:
best_model_name = max(results, key=lambda k: results[k]['AUC'])
final_model = best_models[best_model_name]

print(f"\n🚀 Best model selected: {best_model_name} (AUC = {results[best_model_name]['AUC']:.4f})")

# =======================
# Save best model
# =======================
os.makedirs("models", exist_ok=True)
joblib.dump(final_model, "F:/Omar 3amora/Heart_Disease_Project/Models/best_model.pkl")
print("✅ Best model saved as models/best_model.pkl")


🚀 Best model selected: Random Forest (AUC = 0.8532)
✅ Best model saved as models/best_model.pkl
