# 🏥 Automated Data Exploration (ADE) for Hospital ER Dataset
This notebook performs automated data exploration for the Hospital ER dataset.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import missingno as msno
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [None]:
# Load dataset
df = pd.read_csv('data/Hospital_ER_Data.csv')
df.head()

In [None]:
# Dataset overview
df.info()

In [None]:
# Summary statistics
df.describe(include='all')

In [None]:
# Missing values heatmap
msno.matrix(df)

In [None]:
# Null counts per column
df.isnull().sum().sort_values(ascending=False)

In [None]:
# Unique values per column
df.nunique()

In [None]:
# Histograms for numeric columns
df.hist(figsize=(12, 10), bins=20)
plt.tight_layout()

In [None]:
# Boxplots to detect outliers
num_cols = ['Patient Age', 'Patient Satisfaction Score', 'Patient Waittime', 'Patients CM']
for col in num_cols:
    plt.figure(figsize=(6, 1))
    sns.boxplot(x=df[col])
    plt.title(f'Outliers in {col}')
    plt.show()

In [None]:
# Categorical feature distribution
cat_cols = ['Patient Gender', 'Patient Race', 'Department Referral', 'Patient Admission Flag']
for col in cat_cols:
    plt.figure(figsize=(6, 4))
    sns.countplot(data=df, x=col, order=df[col].value_counts().index)
    plt.title(f'Distribution of {col}')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [None]:
# Correlation matrix
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')