# Heart Disease Prediction Pipeline (Logistic Regression)

This notebook demonstrates a complete end-to-end ML pipeline using **Logistic Regression** to predict heart disease risk.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, roc_curve, auc
import joblib
import os

## 1. Data Loading
We use the Heart Disease dataset.

In [None]:
df = pd.read_csv('../data/heart.csv')
df.head()

## 2. Preprocessing


In [None]:
df.fillna(df.median(), inplace=True)
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 3. Model Training & Export


In [None]:
# Logistic Regression Pipeline
lr_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LogisticRegression())
])

lr_pipe.fit(X_train, y_train)

os.makedirs('../backend/model', exist_ok=True)
joblib.dump(lr_pipe, '../backend/model/logistic_model.joblib')
print("Logistic Regression model exported.")

## 4. Evaluation


In [None]:
y_pred = lr_pipe.predict(X_test)
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))