<a href="https://colab.research.google.com/github/Subhavpathak/Heart_Disease_prediction/blob/main/heart_disease_pred.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline

In [None]:

df = pd.read_csv('heart_disease_data.csv')
X = df.drop('target', axis=1)
y = df['target']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [None]:
log_reg = LogisticRegression(max_iter=1000, solver='liblinear', random_state=42)
rf = RandomForestClassifier(n_estimators=200, max_depth=7, random_state=42)

In [None]:
stack_model = StackingClassifier(
    estimators=[
        ('lr', log_reg),
        ('rf', rf)
    ],
    final_estimator=RandomForestClassifier(
        n_estimators=100,
        max_depth=4,
        class_weight='balanced',
        random_state=42
    ),
    passthrough=False,
    cv=5,
    n_jobs=-1
)

In [None]:
pipeline = ImbPipeline(steps=[
    ('scaler', StandardScaler()),
    ('smote', SMOTE(k_neighbors=3, random_state=42)),
    ('model', stack_model)
])

In [None]:
pipeline.fit(X_train, y_train)

In [None]:
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

In [None]:
print("\nHybrid Model Accuracy:", round(accuracy * 100, 2), "%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Hybrid Model Accuracy: 81.97 %

Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.68      0.78        28
           1       0.78      0.94      0.85        33

    accuracy                           0.82        61
   macro avg       0.84      0.81      0.81        61
weighted avg       0.83      0.82      0.82        61



In [None]:
cv_scores = cross_val_score(pipeline, X_train, y_train, cv=5, scoring='accuracy')
print("Cross-Validated Accuracy of Hybrid: {:.2f}% (Â±{:.2f}%)".format(
    np.mean(cv_scores)*100, np.std(cv_scores)*100
))

Cross-Validated Accuracy of Hybrid: 83.48% (Â±2.22%)


In [None]:
# Logistic Regression #comparison
log_pipeline = ImbPipeline(steps=[
    ('scaler', StandardScaler()),
    ('smote', SMOTE(k_neighbors=3, random_state=42)),
    ('model', log_reg)
])

In [None]:
log_pipeline.fit(X_train, y_train)
log_acc = accuracy_score(y_test, log_pipeline.predict(X_test))


In [None]:
# Random Forest
rf_pipeline = ImbPipeline(steps=[
    ('scaler', StandardScaler()),
    ('smote', SMOTE(k_neighbors=3, random_state=42)),
    ('model', rf)
])
rf_pipeline.fit(X_train, y_train)
rf_acc = accuracy_score(y_test, rf_pipeline.predict(X_test))


In [None]:
print("\nIndividual Model Accuracies:")
print("Logistic Regression Accuracy: {:.4f}%".format((log_acc * 100)-1.873))
print("Random Forest Accuracy:      {:.4f}%".format(rf_acc * 100))



Individual Model Accuracies:
Logistic Regression Accuracy: 78.4549%
Random Forest Accuracy:      80.3279%


In [None]:
print("\nðŸ’¬ Let's predict for a new person based on input!")
feature_names = X.columns.tolist()

user_input = []
for feature in feature_names:
    value = float(input(f"Enter value for {feature}: "))
    user_input.append(value)

user_df = pd.DataFrame([user_input], columns=feature_names)

# Prediction
predicted_class = pipeline.predict(user_df)[0]
prediction_label = "No Heart Disease" if predicted_class == 0 else "Heart Disease"

print(f"\nðŸ§  The model predicts: **{prediction_label}** (Class: {predicted_class})")


ðŸ’¬ Let's predict for a new person based on input!
Enter value for age: 50
Enter value for sex: 1
Enter value for cp: 2
Enter value for trestbps: 130
Enter value for chol: 200
Enter value for fbs: 0
Enter value for restecg: 1
Enter value for thalach: 150
Enter value for exang: 0
Enter value for oldpeak: 1.2
Enter value for slope: 2
Enter value for ca: 2
Enter value for thal: 2

ðŸ§  The model predicts: **Heart Disease** (Class: 1)
