# 02 · EDA & KPIs

**Goal:** Compute KPIs and produce charts saved to `../charts/`. Always call `plt.savefig(...)` to persist visuals for the repo.

In [None]:
import pathlib
import pandas as pd
import matplotlib.pyplot as plt

DATA = pathlib.Path('../data/processed')
CHARTS = pathlib.Path('../charts')
CHARTS.mkdir(parents=True, exist_ok=True)

# Load processed datasets (created by 01 notebook)
ae_path = DATA/'monthly_ae.csv'
ecds_path = DATA/'ecds_monthly.csv'
if not ae_path.exists() or not ecds_path.exists():
    print('Processed files not found. Run 01_download_and_clean.ipynb first.')

if ae_path.exists():
    ae = pd.read_csv(ae_path, parse_dates=['month_date'])
else:
    ae = pd.DataFrame()
if ecds_path.exists():
    ecds = pd.read_csv(ecds_path, parse_dates=['month_date'])
else:
    ecds = pd.DataFrame()

if not ae.empty:
    # Merge minimal fields for national trend
    m = ae.merge(
        ecds.groupby(['month_date'], as_index=False)[['type12_attendances','arrivals_12h_or_more']].sum()
        if not ecds.empty else pd.DataFrame(columns=['month_date','type12_attendances','arrivals_12h_or_more']),
        on='month_date', how='left'
    )
    nat = m.groupby('month_date', as_index=False).agg({
        'total_attendances':'sum',
        'within_4h_count':'sum',
        'emergency_admissions':'sum',
        'over4h_after_dta':'sum',
        'type12_attendances':'sum',
        'arrivals_12h_or_more':'sum'
    })
    nat['pct_within_4h'] = nat['within_4h_count'] / nat['total_attendances']
    nat['rate_12h_from_arrival'] = nat['arrivals_12h_or_more'] / nat['type12_attendances']

    # Chart 1: National KPI Trend
    plt.figure(figsize=(10,5))
    plt.plot(nat['month_date'], nat['pct_within_4h'], label='% ≤4h')
    if 'rate_12h_from_arrival' in nat:
        plt.plot(nat['month_date'], nat['rate_12h_from_arrival'], label='12h-from-arrival rate')
    plt.title('NHS A&E — National KPI Trends')
    plt.legend()
    plt.tight_layout()
    plt.savefig('../charts/kpi_trend_national.png', dpi=200, bbox_inches='tight')
    plt.close()

    # TODO: Provider heatmap and scatter once provider-level processed files are validated
    print('Saved charts/kpi_trend_national.png')
else:
    print('No AE data loaded yet.')


### Next visuals
- `breaches12h_heatmap.png` — Providers × month heatmap of 12h-from-arrival rate
- `provider_benchmark_scatter.png` — %≤4h vs over4h DTA rate (size = attendances)
- `seasonality_faceted.png` — month-of-year index by provider/region
- `frailty_split.png` — 12h-from-arrival by CFS bands (65+)