In [None]:
# etl/etl_api_country_health.py
import requests
import pandas as pd
from config import pg_engine

INDICATORS = {
    "diabetes_prevalence": "SH.STA.DIAB.ZS",
    "health_expenditure_per_capita": "SH.XPD.CHEX.PC.CD",
    "hospital_beds_per_1k": "SH.MED.BEDS.ZS",
}

def fetch_indicator(indicator_code, indicator_name):
    url = f"https://api.worldbank.org/v2/country/all/indicator/{indicator_code}?format=json&per_page=20000"
    resp = requests.get(url)
    resp.raise_for_status()
    data = resp.json()
    rows = []
    for rec in data[1]:
        rows.append({
            "country": rec["country"]["value"],
            "year": int(rec["date"]),
            "indicator": indicator_name,
            "value": rec["value"],
            "source": "WorldBankAPI",
        })
    return pd.DataFrame(rows)

frames = []
for name, code in INDICATORS.items():
    frames.append(fetch_indicator(code, name))

all_indicators = pd.concat(frames, ignore_index=True)
all_indicators = all_indicators[all_indicators["year"] >= 2010]

pivot = (
    all_indicators
    .pivot_table(
        index="country",
        columns="indicator",
        values="value",
        aggfunc="last"
    )
    .reset_index()
)
pivot.columns.name = None

pivot["iso2_code"] = None
pivot["region"] = None
pivot["subregion"] = None
pivot["income_level"] = None
pivot["source_system"] = "WorldBankAPI"

dim_country_df = pivot.rename(columns={"country": "country_name"})
dim_country_df = dim_country_df[
    [
        "country_name",
        "iso2_code",
        "region",
        "subregion",
        "diabetes_prevalence",
        "health_expenditure_per_capita",
        "hospital_beds_per_1k",
        "income_level",
        "source_system",
    ]
]

with pg_engine.begin() as conn:
    dim_country_df.to_sql(
        "dim_country",
        con=conn,
        if_exists="append",
        index=False,
    )
