In [122]:
import pandas as pd
import numpy as np
import joblib

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score
from xgboost import XGBClassifier

# 1️⃣ Load dataset
df = pd.read_csv("heart_disease_dataset.csv")

# 2️⃣ Feature Engineering
df['age_chol'] = df['age'] * df['cholesterol']
df['thalach_age'] = df['thalassemia'] / (df['age'] + 1)
df['bp_chol_ratio'] = df['resting_blood_pressure'] / (df['cholesterol'] + 1)
df['heart_rate_ratio'] = df['max_heart_rate'] / (df['age'] + 1)
df['chest_pain_angina'] = df['chest_pain_type'] * df['exercise_induced_angina']

# 3️⃣ Define features & target
target_col = "heart_disease"
X = df.drop(columns=[target_col])
y = df[target_col]

In [124]:
# 4️⃣ Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 5️⃣ Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
joblib.dump(scaler, "scaler.pkl")


['scaler.pkl']

In [126]:
# 6️⃣ Base models + hyperparameter tuning

# Random Forest
rf = RandomForestClassifier(random_state=42)
rf_params = {
    'n_estimators': [500, 700],
    'max_depth': [4, 5, 6],
    'min_samples_split': [5, 10],
    'min_samples_leaf': [1, 2],
    'max_features': ['sqrt', 'log2']
}
rf_grid = GridSearchCV(rf, rf_params, cv=5, n_jobs=-1, scoring='roc_auc')
rf_grid.fit(X_train_scaled, y_train)
rf_best = rf_grid.best_estimator_

# Gradient Boosting
gb = GradientBoostingClassifier(random_state=42)
gb_params = {
    'n_estimators': [500, 700],
    'learning_rate': [0.03, 0.05, 0.07],
    'max_depth': [3, 4, 5]
}
gb_grid = GridSearchCV(gb, gb_params, cv=5, n_jobs=-1, scoring='roc_auc')
gb_grid.fit(X_train_scaled, y_train)
gb_best = gb_grid.best_estimator_

# XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb_params = {
    'n_estimators': [500, 700],
    'learning_rate': [0.03, 0.05, 0.07],
    'max_depth': [3, 4, 5],
    'subsample': [0.8, 1.0]
}
xgb_grid = GridSearchCV(xgb, xgb_params, cv=5, n_jobs=-1, scoring='roc_auc')
xgb_grid.fit(X_train_scaled, y_train)
xgb_best = xgb_grid.best_estimator_


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [128]:
# 7️⃣ Stacking classifier
stack_clf = StackingClassifier(
    estimators=[('rf', rf_best), ('gb', gb_best), ('xgb', xgb_best)],
    final_estimator=LogisticRegression(max_iter=500),
    cv=5,
    passthrough=True
)

# 8️⃣ Voting ensemble with stacking + base models
voting_clf = VotingClassifier(
    estimators=[('stack', stack_clf), ('rf', rf_best), ('gb', gb_best), ('xgb', xgb_best)],
    voting='soft',
    n_jobs=-1
)

In [130]:
# 9️⃣ Train final ensemble
voting_clf.fit(X_train_scaled, y_train)
joblib.dump(voting_clf, "HeartDisease_StackVoting.pkl")


['HeartDisease_StackVoting.pkl']

In [132]:
# 10️⃣ Evaluate
y_pred = voting_clf.predict(X_test_scaled)
y_proba = voting_clf.predict_proba(X_test_scaled)[:, 1]

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_proba))

Classification Report:
               precision    recall  f1-score   support

           0       0.70      0.58      0.64        36
           1       0.70      0.80      0.74        44

    accuracy                           0.70        80
   macro avg       0.70      0.69      0.69        80
weighted avg       0.70      0.70      0.70        80

Accuracy: 0.7
ROC AUC: 0.7683080808080808


In [134]:
import joblib

# Save the trained ensemble model
joblib.dump(voting_clf, "HeartDisease_Ensemble.pkl")

# Save the scaler
joblib.dump(scaler, "scaler.pkl")

print("Model and scaler saved successfully!")


Model and scaler saved successfully!
