In [1]:
# main.py
"""
Main script to run the full Heart Disease Prediction pipeline:
1. Load data
2. Preprocess
3. Train and compare baseline models
4. Hyperparameter tuning for Random Forest
5. Final evaluation and feature importance
"""

from src.data_loader import load_data, preprocess_data
from src.model_training import train_models, tune_random_forest
from src.evaluation import evaluate_model


print("Loading dataset...")
df = load_data()  # default path: data/heart_stat.xlsx
print("Dataset loaded. Shape:", df.shape)


Loading dataset...
Dataset loaded. Shape: (1190, 12)


In [2]:
print("\nPreprocessing data...")
X_train, X_test, y_train, y_test = preprocess_data(df)
print("Preprocessing done.")
print(f"Training set: {X_train.shape}, Test set: {X_test.shape}")


Preprocessing data...
Preprocessing done.
Training set: (952, 11), Test set: (238, 11)


In [None]:
print("\nTraining baseline models...")
models = train_models(X_train, y_train, X_test, y_test)

print("\nHyperparameter tuning for Random Forest...")
best_rf = tune_random_forest(X_train, y_train)


Training baseline models...

Logistic Regression
Accuracy: 0.8403361344537815
ROC-AUC: 0.9049036281179138
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.82      0.83       112
           1       0.84      0.86      0.85       126

    accuracy                           0.84       238
   macro avg       0.84      0.84      0.84       238
weighted avg       0.84      0.84      0.84       238


Random Forest
Accuracy: 0.9285714285714286
ROC-AUC: 0.9711947278911565
Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.92      0.92       112
           1       0.93      0.94      0.93       126

    accuracy                           0.93       238
   macro avg       0.93      0.93      0.93       238
weighted avg       0.93      0.93      0.93       238


SVM (RBF)
Accuracy: 0.8445378151260504
ROC-AUC: 0.9278982426303856
Classification Report:
              precision    rec

Unnamed: 0,Accuracy,ROC_AUC
Random Forest,0.928571,0.971195
Gradient Boosting,0.903361,0.949617
SVM (RBF),0.844538,0.927898
Logistic Regression,0.840336,0.904904



Hyperparameter tuning for Random Forest...
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


In [None]:
print("\nEvaluating final tuned Random Forest model...")
evaluate_model(best_rf, X_test, y_test)

print("\nPipeline completed successfully!")