# ❤️ Heart Disease Prediction System
Machine Learning based Heart Disease Risk Prediction

In [None]:

import pandas as pd
import numpy as np
import os
import joblib

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


## Load Dataset

In [None]:

DATA_FILE = "heart_disease_full_range_dataset.xlsx"
MODEL_FILE = "heart_disease_model.pkl"

df = pd.read_excel(DATA_FILE)
df.head()


## Feature Engineering

In [None]:

df["cholesterol_level"] = pd.cut(
    df["cholesterol"],
    bins=[0, 200, 240, 500],
    labels=[0, 1, 2]
).astype(int)

df["bp_level"] = pd.cut(
    df["resting_bp"],
    bins=[0, 120, 140, 300],
    labels=[0, 1, 2]
).astype(int)

df["age_group"] = pd.cut(
    df["age"],
    bins=[0, 35, 55, 100],
    labels=[0, 1, 2]
).astype(int)

df["bmi_risk"] = df["obesity"] * df["age"]

df["cardiac_risk_score"] = (
    df["diabetes"]
    + df["hypertension"]
    + df["smoking"]
    + df["exercise_angina"]
    + df["cholesterol_level"]
    + df["bp_level"]
)


## Model Training

In [None]:

X = df.drop("heart_disease", axis=1)
y = df["heart_disease"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

accuracy = accuracy_score(y_test, model.predict(X_test))
accuracy


## Save Model

In [None]:

joblib.dump(model, MODEL_FILE)


## Example Prediction

In [None]:

sample = X.iloc[[0]]
model.predict(sample)
