# 🫀 Heart Disease Prediction (End-to-End ML Project)
---
This notebook demonstrates an end-to-end machine learning project for predicting heart disease:
1. Data Cleaning & Preprocessing
2. Exploratory Data Analysis (EDA)
3. Model Training (Logistic Regression, Random Forest, SVM)
4. Model Evaluation & Comparison
5. Insights & Reporting

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

## 1. Load Data

In [None]:
# Load dataset (replace with your file path if needed)
data = pd.read_csv('HeartDiseaseTrain-Test.csv')
data.head()

## 2. Data Preprocessing

In [None]:
le = LabelEncoder()
for col in ['sex','chest_pain_type','fasting_blood_sugar','rest_ecg',
            'exercise_induced_angina','slope','thalassemia',
            'vessels_colored_by_flourosopy']:
    data[col] = le.fit_transform(data[col])

X = data.drop('target', axis=1)
y = data['target']

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

## 3. Exploratory Data Analysis (EDA)

In [None]:
plt.figure(figsize=(6,4))
sns.countplot(x='sex', hue='target', data=data)
plt.title('Heart Disease Distribution by Sex')
plt.show()

plt.figure(figsize=(10,6))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()

## 4. Model Training & Evaluation

In [None]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SVM': SVC(kernel='linear', probability=True)
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    print(f'
{name}')
    print('Accuracy:', acc)
    print(classification_report(y_test, y_pred))
    sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
    plt.title(f'{name} Confusion Matrix')
    plt.show()

## 5. Model Comparison

In [None]:
results