In [1]:
# Step 1: Import libraries
import pandas as pd
import warnings
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import StackingClassifier, RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE, BorderlineSMOTE
from imblearn.under_sampling import TomekLinks

warnings.filterwarnings("ignore")

# Step 2: Load dataset (13 features only)
df = pd.read_csv("heart.csv")
X = df.drop("target", axis=1)
y = df["target"]

# Step 3: Scale numeric features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Step 5: Hybrid resampling
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

bsmote = BorderlineSMOTE(random_state=42)
X_res, y_res = bsmote.fit_resample(X_res, y_res)

tomek = TomekLinks()
X_res, y_res = tomek.fit_resample(X_res, y_res)

# Step 6: Build stacked model
base_models = [
    ('rf', RandomForestClassifier(n_estimators=200, random_state=42)),
    ('svc', SVC(kernel='rbf', C=1.0, gamma='scale', probability=True)),
    ('knn', KNeighborsClassifier(n_neighbors=5)),
    ('log', LogisticRegression(max_iter=1000)),
    ('et', ExtraTreesClassifier(n_estimators=200, random_state=42)),
    ('dt', DecisionTreeClassifier(max_depth=10, random_state=42))
]

meta_model = LogisticRegression(max_iter=1000)
stacked_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)
stacked_model.fit(X_res, y_res)

# Step 7: Evaluate
y_pred = stacked_model.predict(X_test)
y_proba = stacked_model.predict_proba(X_test)[:, 1]
acc = accuracy_score(y_test, y_pred)

print(f"✅ Accuracy: {acc*100:.2f}%")
print("\n📋 Classification Report:\n", classification_report(y_test, y_pred))

# Step 8: Save model and scaler
with open("heart_probability_model.pkl", "wb") as f:
    pickle.dump({"model": stacked_model, "scaler": scaler}, f)

print("✅ Model saved as heart_probability_model.pkl")


✅ Accuracy: 98.54%

📋 Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205

✅ Model saved as heart_probability_model.pkl


In [14]:
# heart_predictor.py

def predict_heart_disease_risk(input_features):
    import pickle
    import numpy as np

    # Check input length
    if not isinstance(input_features, list) or len(input_features) != 13:
        return {
            "error": "Invalid input. Please provide a list of 13 numeric features."
        }

    try:
        # Load model and scaler
        with open("heart_probability_model.pkl", "rb") as f:
            data = pickle.load(f)

        model = data["model"]
        scaler = data["scaler"]

        # Preprocess input
        input_array = np.array(input_features).reshape(1, -1)
        input_scaled = scaler.transform(input_array)

        # Predict probability
        probability = model.predict_proba(input_scaled)[0][1]
        percent = round(probability * 100, 2)

        # Suggestion based on risk
        if percent >= 75:
            suggestion = "🔴 High Risk: Recommend ECG, Echo, Stress Test, and Angiography."
        elif percent >= 50:
            suggestion = "🟠 Medium Risk: Recommend ECG, Echo, and Blood Test."
        elif percent >= 25:
            suggestion = "🟡 Low Risk: Recommend Lifestyle Check & Regular Monitoring."
        else:
            suggestion = "🟢 Very Low Risk: No urgent tests, maintain healthy lifestyle."

        # Return structured response
        return {
            "risk_percent": percent,
            "message": f"❤️ Heart Disease Risk: {percent}%",
            "suggestion": suggestion
        }

    except Exception as e:
        return {
            "error": f"Prediction failed: {str(e)}"
        }


# Example usage
if __name__ == "__main__":
    input_features = [
        21,     # age
        1,      # sex (1 = male, 0 = female)
        0,      # cp (chest pain type)
        120,    # trestbps
        233,    # chol
        0,      # fbs
        0,      # restecg
        75,     # thalach
        0,      # exang
        2.3,    # oldpeak
        0,      # slope
        0,      # ca
        1       # thal
    ]

    result = predict_heart_disease_risk(input_features)
    if "error" in result:
        print("❌", result["error"])
    else:
        print(result["message"])
        print(result["suggestion"])


❤️ Heart Disease Risk: 13.26%
🟢 Very Low Risk: No urgent tests, maintain healthy lifestyle.
