Heart Disease Prediction and Personalized Recommendation System Using Machine Learning

OBJECTIVES

The main objectives of this project are:

To predict the presence of heart disease using clinical data.

To compare multiple machine learning models for performance

To prioritize recall to minimize missed disease cases

To provide personalized lifestyle, diet, and medication category recommendations

To build an interactive and explainable healthcare system

XGBoost was selected as the final model due to superior performance while maintaining acceptable interpretability.Through this Machine Learning model I created 3 types of interactive app-i)1st model-Healthcare or Hospital use,
ii)Patient mode and Doctor mode.

FINAL INTERPRETATION
This project focuses on building a correct, interpretable, and healthcare-relevant machine learning pipeline. While advanced features are planned for future phases, the current system successfully demonstrates predictive analytics, risk stratification, and safe medical recommendations.
The system is interactive and deployment-ready. Full cloud deployment is planned as future work.

In [None]:
!pip install gradio shap xgboost
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, confusion_matrix

import joblib
import shap
import gradio as gr




In [None]:
df = pd.read_csv("heart.csv")
df.head()
df.info()
df.isnull().sum()

X = df.drop("target", axis=1)
y = df["target"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [None]:
#logistic regression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)


In [None]:
#random forest
rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=6,
    random_state=42
)
rf.fit(X_train, y_train)

In [None]:
#XGBoost
xgb = XGBClassifier(
    n_estimators=200,
    learning_rate=0.05,
    max_depth=4,
    eval_metric='logloss',
    random_state=42
)
xgb.fit(X_train, y_train)

In [None]:
#model evaluation
def evaluate(model, name):
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:,1]

    print(f"\n{name}")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Recall:", recall_score(y_test, y_pred))
    print("ROC AUC:", roc_auc_score(y_test, y_prob))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
evaluate(lr, "Logistic Regression")
evaluate(rf, "Random Forest")
evaluate(xgb, "XGBoost (Final)")

joblib.dump(xgb, "heart_disease_model.pkl")
joblib.dump(scaler, "scaler.pkl")


#model explainablity
explainer = shap.Explainer(xgb)
shap_values = explainer(X_test[:100])
shap.summary_plot(shap_values, X_test[:100], feature_names=X.columns)


In [None]:
#recommendation system
def get_recommendations(risk_score):
    if risk_score >= 0.75:
        return {
            "risk": "High",
            "medicines": [
                "Statins (cholesterol control)",
                "Beta blockers",
                "ACE inhibitors"
            ],
            "diet": [
                "Low-sodium DASH diet",
                "Avoid fried foods",
                "Increase fruits & vegetables"
            ],
            "lifestyle": [
                "Quit smoking",
                "Daily BP monitoring",
                "Consult cardiologist immediately"
            ]
        }
    elif risk_score >= 0.4:
        return {
            "risk": "Moderate",
            "medicines": [
                "Doctor-guided statins if needed"
            ],
            "diet": [
                "Mediterranean diet",
                "Reduce sugar & salt"
            ],
            "lifestyle": [
                "Regular exercise",
                "Weight management"
            ]
        }
    else:
        return {
            "risk": "Low",
            "medicines": [
                "No medication required"
            ],
            "diet": [
                "Balanced heart-healthy diet"
            ],
            "lifestyle": [
                "Regular exercise",
                "Annual health checkups"
            ]
        }


In [None]:
#model 1 for hospitals
model = joblib.load("heart_disease_model.pkl")
scaler = joblib.load("scaler.pkl")

def predict_heart_disease(
    age, sex, cp, trestbps, chol, fbs,
    restecg, thalach, exang, oldpeak, slope, ca, thal
):
    input_data = np.array([[age, sex, cp, trestbps, chol, fbs,
                            restecg, thalach, exang, oldpeak, slope, ca, thal]])

    input_scaled = scaler.transform(input_data)
    risk_score = model.predict_proba(input_scaled)[0][1]

    reco = get_recommendations(risk_score)

    return (
        f"{risk_score*100:.2f} %",
        reco["risk"],
        ", ".join(reco["medicines"]),
        ", ".join(reco["diet"]),
        ", ".join(reco["lifestyle"])
    )
interface = gr.Interface(
    fn=predict_heart_disease,
    inputs=[
        gr.Number(label="Age"),
        gr.Number(label="Sex (1=Male, 0=Female)"),
        gr.Number(label="Chest Pain Type"),
        gr.Number(label="Resting BP"),
        gr.Number(label="Cholesterol"),
        gr.Number(label="Fasting Blood Sugar"),
        gr.Number(label="Rest ECG"),
        gr.Number(label="Max Heart Rate"),
        gr.Number(label="Exercise Angina"),
        gr.Number(label="Oldpeak"),
        gr.Number(label="Slope"),
        gr.Number(label="Number of Major Vessels"),
        gr.Number(label="Thalassemia")
    ],
    outputs=[
        gr.Text(label="Heart Disease Risk"),
        gr.Text(label="Risk Level"),
        gr.Text(label="Recommended Medicines"),
        gr.Text(label="Diet Plan"),
        gr.Text(label="Lifestyle Advice")
    ],
    title="Heart Disease Prediction & Recommendation System",
    description="‚ö†Ô∏è This system is for educational decision-support only and does not replace medical advice."
)

interface.launch()


In [None]:
# model 2 for  patients and doctors quick use
import gradio as gr
import numpy as np
import joblib

model = joblib.load("heart_disease_model.pkl")
scaler = joblib.load("scaler.pkl")
def get_recommendations(risk_score):
    if risk_score >= 0.75:
        return "HIGH", (
            "Statins, Beta-blockers (doctor guided)",
            "Low-sodium DASH diet",
            "Quit smoking, cardiologist consultation"
        )
    elif risk_score >= 0.4:
        return "MODERATE", (
            "Doctor evaluation recommended",
            "Mediterranean diet",
            "Regular exercise, weight control"
        )
    else:
        return "LOW", (
            "No medication needed",
            "Balanced heart-healthy diet",
            "Annual checkups, active lifestyle"
        )
def patient_mode(age, gender, chest_pain, breathlessness, smoking):
    sex = 1 if gender == "Male" else 0

    cp_map = {
        "Typical Angina": 0,
        "Atypical Angina": 1,
        "Non-anginal Pain": 2,
        "Asymptomatic": 3
    }
    cp = cp_map[chest_pain]

    trestbps = 140 if breathlessness == "Yes" else 120
    chol = 240 if smoking == "Yes" else 200
    fbs = 1 if smoking == "Yes" else 0
    restecg = 1
    thalach = 150 - (age // 5)
    exang = 1 if breathlessness == "Yes" else 0
    oldpeak = 2.0 if breathlessness == "Yes" else 0.5
    slope = 1
    ca = 0
    thal = 2

    data = np.array([[age, sex, cp, trestbps, chol, fbs,
                      restecg, thalach, exang, oldpeak, slope, ca, thal]])

    data_scaled = scaler.transform(data)
    risk = model.predict_proba(data_scaled)[0][1]

    level, reco = get_recommendations(risk)

    return f"{risk*100:.2f} %", level, reco[0], reco[1], reco[2]
def doctor_mode(age, sex, cp, trestbps, chol, fbs,
                restecg, thalach, exang, oldpeak, slope, ca, thal):

    data = np.array([[age, sex, cp, trestbps, chol, fbs,
                      restecg, thalach, exang, oldpeak, slope, ca, thal]])

    data_scaled = scaler.transform(data)
    risk = model.predict_proba(data_scaled)[0][1]

    level, reco = get_recommendations(risk)

    return f"{risk*100:.2f} %", level, reco[0], reco[1], reco[2]
with gr.Blocks() as demo:
    gr.Markdown("# ‚ù§Ô∏è Heart Disease Prediction & Recommendation System")
    gr.Markdown("‚ö†Ô∏è Educational decision-support system. Not a medical diagnosis tool.")

    with gr.Tab("üë§ Patient Mode"):
        age = gr.Slider(20, 80, label="Age")
        gender = gr.Radio(["Male", "Female"], label="Gender")
        chest_pain = gr.Dropdown(
            ["Typical Angina", "Atypical Angina", "Non-anginal Pain", "Asymptomatic"],
            label="Chest Pain Type"
        )
        breathlessness = gr.Radio(["Yes", "No"], label="Shortness of Breath")
        smoking = gr.Radio(["Yes", "No"], label="Smoking Habit")

        btn1 = gr.Button("Check Heart Risk")

        out1 = gr.Text(label="Risk Percentage")
        out2 = gr.Text(label="Risk Level")
        out3 = gr.Text(label="Medication Advice")
        out4 = gr.Text(label="Diet Plan")
        out5 = gr.Text(label="Lifestyle Advice")

        btn1.click(
            patient_mode,
            inputs=[age, gender, chest_pain, breathlessness, smoking],
            outputs=[out1, out2, out3, out4, out5]
        )

    with gr.Tab("üßë‚Äç‚öïÔ∏è Doctor Mode"):
        inputs = [
            gr.Number(label="Age"),
            gr.Number(label="Sex (1=Male, 0=Female)"),
            gr.Number(label="Chest Pain Type (0‚Äì3)"),
            gr.Number(label="Resting BP"),
            gr.Number(label="Cholesterol"),
            gr.Number(label="Fasting Blood Sugar"),
            gr.Number(label="Rest ECG"),
            gr.Number(label="Max Heart Rate"),
            gr.Number(label="Exercise Angina"),
            gr.Number(label="Oldpeak"),
            gr.Number(label="Slope"),
            gr.Number(label="Number of Vessels"),
            gr.Number(label="Thalassemia")
        ]

        btn2 = gr.Button("Predict (Doctor Mode)")
        outputs = [
            gr.Text(label="Risk Percentage"),
            gr.Text(label="Risk Level"),
            gr.Text(label="Medication Advice"),
            gr.Text(label="Diet Plan"),
            gr.Text(label="Lifestyle Advice")
        ]

        btn2.click(doctor_mode, inputs=inputs, outputs=outputs)

demo.launch()
