In [None]:
import pandas as pd
import numpy as np

# Load raw data
df_raw = pd.read_csv('Hospital ER_Data.csv')
df_raw['Patient Admission Date'] = pd.to_datetime(df_raw['Patient Admission Date'], dayfirst=True)
df_raw['date'] = df_raw['Patient Admission Date'].dt.date

# 1. Aggregate to Daily Time-Series
daily_df = df_raw.groupby('date').agg(
    Emergency_Visits=('Patient Id', 'count'),
    Real_Admissions=('Patient Admission Flag', 'sum')
).reset_index().sort_values('date')

# 2. Add Synthetic Operational Data (Reproducible)
np.random.seed(42)
daily_df['ICU_Demand'] = (daily_df['Real_Admissions'] * 0.3 + np.random.randint(-1, 2, len(daily_df))).clip(lower=0).astype(int)

# Capacity threshold based on 90th percentile of data
cap = daily_df['Emergency_Visits'].quantile(0.9) * 1.2
daily_df['Workload_Index'] = (((daily_df['Emergency_Visits'] + (daily_df['ICU_Demand'] * 2.5)) / cap) * 60).clip(0, 100).round(1)

# 3. Categorize for Decision Support
daily_df['Alert_Level'] = pd.cut(daily_df['Workload_Index'], 
                                 bins=[0, 40, 60, 80, 100], 
                                 labels=["Normal", "Elevated", "High", "Critical"])

daily_df.to_csv('backend/data/processed/processed_hospital_data.csv', index=False)