# 🫀 Heart Disease Dataset - Exploratory Data Analysis (EDA)

This notebook explores the **heart.csv** dataset. We analyze patient health indicators to understand patterns and correlations with heart disease.

**Target:** `HeartDisease` — whether the patient has heart disease or not.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)

In [None]:
# Load the dataset
df = pd.read_csv('heart.csv')
df.head()

In [None]:
# Basic info
df.info()

In [None]:
# Summary statistics
df.describe(include='all')

In [None]:
# Check for missing values
df.isnull().sum()

In [None]:
sns.countplot(data=df, x='HeartDisease', palette='coolwarm')
plt.title('Heart Disease Class Distribution')
plt.xticks([0, 1], ['No Disease', 'Disease'])
plt.xlabel('Heart Disease')
plt.ylabel('Count')
plt.show()

In [None]:
num_cols = ['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak']
for col in num_cols:
    sns.histplot(df[col], kde=True, bins=30)
    plt.title(f'Distribution of {col}')
    plt.xlabel(col)
    plt.ylabel('Frequency')
    plt.show()

In [None]:
cat_cols = ['Sex', 'ChestPainType', 'FastingBS', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
for col in cat_cols:
    sns.countplot(data=df, x=col, hue='HeartDisease')
    plt.title(f'{col} vs Heart Disease')
    plt.xlabel(col)
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.legend(title='HeartDisease', labels=['No', 'Yes'])
    plt.show()

In [None]:
plt.figure(figsize=(10, 8))
corr = df.corr(numeric_only=True)
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap')
plt.show()