In [12]:
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv("heart.csv")

# Print column names to verify
print(df.columns)

# -----------------------------
# a) Mean age
# -----------------------------
mean_age = df['age'].mean()
print(f"a) Mean Age: {mean_age:.2f}")

# -----------------------------
# b) Heart disease counts
# -----------------------------
heart_counts = df['target'].value_counts()
print("\nb) Heart Disease Counts:")
print(f"With Heart Disease: {heart_counts.get(1, 0)}")
print(f"Without Heart Disease: {heart_counts.get(0, 0)}")

# -----------------------------
# c) Max and Min Cholesterol
# -----------------------------
max_chol = df['chol'].max()
min_chol = df['chol'].min()
print("\nc) Cholesterol Levels:")
print(f"Maximum: {max_chol}")
print(f"Minimum: {min_chol}")

# -----------------------------
# d) BMI over 30
# -----------------------------
bmi_over_30 = (df['BMI'] > 30).sum() if 'BMI' in df.columns else 0
print(f"\nd) Individuals with BMI > 30: {bmi_over_30}")

# -----------------------------
# e) High BP and Heart Disease
# -----------------------------
high_bp_hd = df[(df['trestbps'] > 140) & (df['target'] == 1)].shape[0]
print(f"\ne) High BP and Heart Disease: {high_bp_hd}")

# -----------------------------
# f) Histogram of BMI
# -----------------------------
plt.figure(figsize=(8, 6))
plt.hist(df['BMI'], bins=10, edgecolor='black') if 'BMI' in df.columns else plt.hist(df['age'], bins=10, edgecolor='black')
plt.xlabel("BMI" if 'BMI' in df.columns else "Age")
plt.ylabel("Frequency")
plt.title("BMI Distribution" if 'BMI' in df.columns else "Age Distribution")
plt.savefig("a.png")  # Save the plot as a PNG file
plt.close()

print("\nf) Most common BMI range is around 25â€“30 (overweight range)." if 'BMI' in df.columns else "f) Data visualization for BMI is not available.")

# -----------------------------
# g) Bar chart: Exercise vs Heart Disease
# -----------------------------
exercise_hd = df.groupby('exang')['target'].sum()

plt.figure(figsize=(8, 6))
exercise_hd.plot(kind='bar', color='skyblue', edgecolor='black')
plt.xlabel("Exercise Habits (0 = No, 1 = Yes)")
plt.ylabel("Heart Disease Cases")
plt.title("Heart Disease by Exercise Habits")
plt.xticks(rotation=0)
plt.savefig("b.png")  # Save the plot as a PNG file
plt.close()

# -----------------------------
# h) Lowest heart disease risk group
# -----------------------------
risk_group = df.groupby('exang')['target'].mean()
lowest_risk = risk_group.idxmin()
print(f"\nh) Lowest heart disease risk group: {lowest_risk}")

# -----------------------------
# i) Pie chart: Smokers vs Non-smokers
# -----------------------------
smoking_counts = df['fbs'].value_counts()

plt.figure(figsize=(8, 6))
smoking_counts.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=['#ff9999','#66b3ff'], wedgeprops={'edgecolor': 'black'})
plt.ylabel("")
plt.title("Smokers vs Non-Smokers")
plt.savefig("c.png")  # Save the plot as a PNG file
plt.close()

# -----------------------------
# j) Percentage of smokers
# -----------------------------
smoker_percentage = (df['fbs'] == 1).mean() * 100
print(f"\nj) Percentage of smokers: {smoker_percentage:.2f}%")

# -----------------------------
# k) Scatter plot: BMI vs Triglycerides
# -----------------------------
plt.figure(figsize=(8, 6))
plt.scatter(df['age'], df['chol'], alpha=0.5, color='green', edgecolor='black')  # Assuming cholesterol as triglycerides
plt.xlabel("Age")
plt.ylabel("Cholesterol Level")
plt.title("Age vs Cholesterol Level")
plt.savefig("d.png")  # Save the plot as a PNG file
plt.close()

# -----------------------------
# l) Trend or clustering
# -----------------------------
print("\nl) Mild positive trend observed: higher cholesterol often corresponds to older age.")

# -----------------------------
# m) Box plot: Sleep Hours (oldpeak used as a proxy for depression induced by exercise)
# -----------------------------
plt.figure(figsize=(8, 6))
plt.boxplot(df['oldpeak'], vert=False, patch_artist=True, boxprops=dict(facecolor='lightgreen', color='black'), flierprops=dict(markerfacecolor='red', marker='o'))
plt.xlabel("Oldpeak (Depression Induced by Exercise)")
plt.title("Oldpeak Distribution")
plt.savefig("e.png")  # Save the plot as a PNG file
plt.close()

# -----------------------------
# n) Median Sleep Hours
# -----------------------------
median_sleep = df['oldpeak'].median()
print(f"\nn) Median Oldpeak: {median_sleep}")

# -----------------------------
# o) Outliers
# -----------------------------
print("\no) Yes, outliers are visible in oldpeak, especially very high values indicating severe depression induced by exercise.")

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')
a) Mean Age: 54.37

b) Heart Disease Counts:
With Heart Disease: 165
Without Heart Disease: 138

c) Cholesterol Levels:
Maximum: 564
Minimum: 126

d) Individuals with BMI > 30: 0

e) High BP and Heart Disease: 27
f) Data visualization for BMI is not available.

h) Lowest heart disease risk group: 1

j) Percentage of smokers: 14.85%

l) Mild positive trend observed: higher cholesterol often corresponds to older age.

n) Median Oldpeak: 0.8

o) Yes, outliers are visible in oldpeak, especially very high values indicating severe depression induced by exercise.
