# Advanced Analysis: Anemia in Heart Failure Patients

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
%matplotlib inline

In [None]:
# Load detailed dataset
df = pd.read_csv('anemia_hf_data_detailed.csv')
df.head()

In [None]:
# Convert binary categorical columns
df['IV_Iron_Administered'] = df['IV_Iron_Administered'].map({'Yes': 1, 'No': 0})
df['Readmitted_in_30_Days'] = df['Readmitted_in_30_Days'].map({'Yes': 1, 'No': 0})
df['Diabetes'] = df['Diabetes'].map({'Yes': 1, 'No': 0})
df['CKD'] = df['CKD'].map({'Yes': 1, 'No': 0})

In [None]:
# Summary statistics
df.describe()

In [None]:
# Distribution of Hemoglobin, Ferritin, and EF
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
sns.histplot(df['Hemoglobin_g_dL'], kde=True, ax=axes[0], color='skyblue')
axes[0].set_title('Hemoglobin Distribution')
sns.histplot(df['Ferritin_ng_mL'], kde=True, ax=axes[1], color='orange')
axes[1].set_title('Ferritin Distribution')
sns.histplot(df['EF_pct'], kde=True, ax=axes[2], color='green')
axes[2].set_title('Ejection Fraction Distribution')
plt.tight_layout()
plt.show()

In [None]:
# Boxplots for biomarkers vs Readmission
features = ['Hemoglobin_g_dL', 'Ferritin_ng_mL', 'TSAT_pct', 'EF_pct', 'BMI']
for col in features:
    sns.boxplot(x='Readmitted_in_30_Days', y=col, data=df)
    plt.title(f'{col} vs Readmission')
    plt.show()

In [None]:
# Correlation heatmap (continuous + binary numeric)
corr_cols = ['Hemoglobin_g_dL', 'Ferritin_ng_mL', 'TSAT_pct', 'EF_pct', 'BMI', 'IV_Iron_Administered', 'Diabetes', 'CKD', 'Readmitted_in_30_Days']
sns.heatmap(df[corr_cols].corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.tight_layout()
plt.show()

In [None]:
# Subgroup analysis: CKD patients
sns.barplot(x='IV_Iron_Administered', y='Readmitted_in_30_Days', data=df[df['CKD'] == 1])
plt.title('IV Iron vs Readmission (CKD Patients Only)')
plt.show()