In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('heart.csv')


In [None]:
print("Columns:", df.columns)

print("First 5 rows:\n", df.head())
print("Last 5 rows:\n", df.tail())

In [None]:
print("Descriptive Statistics:\n", df.describe())


In [None]:
print("Missing values before imputation:\n", df.isnull().sum())


In [None]:
df.fillna(df.mean(), inplace=True)


In [None]:
print("Missing values after imputation:\n", df.isnull().sum())


In [None]:
Q1 = df['chol'].quantile(0.25)
Q3 = df['chol'].quantile(0.75)
IQR = Q3 - Q1
df = df[~((df['chol'] < (Q1 - 1.5 * IQR)) | (df['chol'] > (Q3 + 1.5 * IQR)))]


In [None]:
print("Data shape after cleaning:", df.shape)


In [None]:
mean_chol = df['chol'].mean()
median_chol = df['chol'].median()
std_chol = df['chol'].std()

print(f"Mean Cholesterol: {mean_chol}, Median Cholesterol: {median_chol}, Std Dev Cholesterol: {std_chol}")


In [None]:
mean_bp_with_disease = df[df['target'] == 1]['trestbps'].mean()
mean_bp_without_disease = df[df['target'] == 0]['trestbps'].mean()

print(f"Mean BP with disease: {mean_bp_with_disease}, Mean BP without disease: {mean_bp_without_disease}")


In [None]:
max_heart_rate = df['thalach'].max()
min_heart_rate = df['thalach'].min()

print(f"Max Heart Rate: {max_heart_rate}, Min Heart Rate: {min_heart_rate}")

In [None]:
sorted_by_chol = df.sort_values(by='chol', ascending=False)
print("Top 5 patients with highest cholesterol:\n", sorted_by_chol.head())

In [None]:
high_chol_patients = df[df['chol'] > 300]
print(f"Patients with cholesterol > 300: {len(high_chol_patients)}")


In [None]:
older_patients_with_ecg = df[(df['age'] > 60) & (df['restecg'] > 0)]
print(f"Patients older than 60 with abnormal ECG: {len(older_patients_with_ecg)}")

In [None]:
plt.figure(figsize=(6, 4))
sns.histplot(df['chol'], kde=True)
plt.title('Cholesterol Distribution')
plt.xlabel('Cholesterol')
plt.ylabel('Frequency')
plt.show()

In [None]:
plt.figure(figsize=(6, 4))
sns.scatterplot(x='age', y='thalach', data=df)
plt.title('Age vs Max Heart Rate')
plt.xlabel('Age')
plt.ylabel('Max Heart Rate')
plt.show()

In [None]:
plt.figure(figsize=(6, 4))
sns.countplot(x='target', data=df)
plt.title('Patients with and without Heart Disease')
plt.xlabel('Heart Disease (0 = No, 1 = Yes)')
plt.ylabel('Count')
plt.show()


In [None]:
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df['chol'], df['age'], df['target'], c=df['target'], cmap='coolwarm')
ax.set_xlabel('Cholesterol')
ax.set_ylabel('Age')
ax.set_zlabel('Heart Disease')
plt.title('3D Plot of Cholesterol, Age and Heart Disease')
plt.show()

In [None]:
cp_counts = df['cp'].value_counts()
plt.figure(figsize=(6, 4))
plt.pie(cp_counts, labels=cp_counts.index, autopct='%1.1f%%', startangle=140)
plt.title('Proportion of Chest Pain Types')
plt.show()