In [1]:
# step2_data_visualization.py

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Load the cleaned dataset
df = pd.read_csv("cleaned_heart_disease_dataset.csv")

# Create output folder
output_dir = "visualizations"
os.makedirs(output_dir, exist_ok=True)

# 1. Class Distribution Plot
plt.figure(figsize=(6, 4))
sns.countplot(x='target', data=df, hue='target', palette='viridis', legend=False)
plt.title("Heart Disease Risk Distribution (Target Classes)")
plt.xlabel("Risk Level (0: No, 1: Yes)")
plt.ylabel("Count")
plt.savefig(f"{output_dir}/class_distribution.png")
plt.close()

# 2. Correlation Heatmap
plt.figure(figsize=(12, 10))
correlation = df.corr()
sns.heatmap(correlation, annot=True, fmt=".2f", cmap="coolwarm", square=True)
plt.title("Feature Correlation Heatmap")
plt.savefig(f"{output_dir}/correlation_heatmap.png")
plt.close()

# 3. Key Feature Histograms
features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
for feature in features:
    plt.figure(figsize=(6, 4))
    sns.histplot(df[feature], kde=True, bins=30, color='skyblue')
    plt.title(f'Distribution of {feature.capitalize()}')
    plt.xlabel(feature.capitalize())
    plt.ylabel("Frequency")
    plt.savefig(f"{output_dir}/{feature}_histogram.png")
    plt.close()

print(f"✅ Visualizations saved in ./{output_dir}/")

✅ Visualizations saved in ./visualizations/
