In [1]:
import os, requests, pandas as pd
from typing import List, Optional
import pycountry

In [2]:
# Raw‐data folder
RAW_DIR = "data/raw/worldbank"
os.makedirs(RAW_DIR, exist_ok=True)

In [3]:
def fetch_wb_latest(indicator: str,
                    iso_list: List[str] | None = None,
                    per_page: int = 20_000) -> pd.DataFrame:
    """
    Return the most‑recent value for a World Bank indicator.
    Output columns: country, iso3, year, value
    """
    countries = ";".join(iso_list) if iso_list else "all"
    url = (f"https://api.worldbank.org/v2/country/{countries}/indicator/{indicator}"
           f"?format=json&per_page={per_page}&mrv=1")
    data = requests.get(url, timeout=30).json()[1]           # index 0 = metadata
    df = (pd.json_normalize(data)
            .loc[:, ["country.value", "country.id", "date", "value"]]
            .rename(columns={"country.value": "country",
                             "country.id":    "iso3",
                             "date":          "year"}))
    df["value"] = pd.to_numeric(df["value"], errors="coerce")
    df["year"]  = df["year"].astype(int)
    return df

In [4]:
iso_all = [c.alpha_3 for c in pycountry.countries]
print(f"Total ISO‑3 codes: {len(iso_all)}")

Total ISO‑3 codes: 249


In [5]:
wb_metrics = {
    # Demographics pillar
    "SP.POP.TOTL"        : "total_population",
    "EN.POP.DNST"        : "population_density",
    "SP.URB.TOTL.IN.ZS"  : "urbanization_rate",
    "SP.POP.GROW"        : "population_growth_pct",
    "SE.ADT.LITR.ZS"     : "literacy_rate",
    "SP.DYN.LE00.IN"     : "life_expectancy",

    # Economic pillar
    "NY.GDP.MKTP.CD"     : "gdp_total_usd",
    "NY.GDP.PCAP.PP.CD"  : "gdp_per_capita_ppp",
    "NY.GDP.MKTP.KD.ZG"  : "real_gdp_growth_pct",      # ← replaces IMF metric
    "FP.CPI.TOTL.ZG"     : "inflation_cpi_pct",
    "SL.UEM.TOTL.ZS"     : "unemployment_rate",

    # Influence pillar (governance & military proxies via WB)
    "SI.POV.GINI"        : "gini_index",               # inequality proxy
    "MS.MIL.XPND.GD.ZS"  : "military_expenditure_pct_gdp",
}


In [6]:
for code, fname in wb_metrics.items():
    print(f"Downloading {code} …")
    df = fetch_wb_latest(code)               # all countries
    out_path = f"{RAW_DIR}/{fname}.csv"
    df.to_csv(out_path, index=False)
    print(f"  Saved → {out_path}  ({len(df)} rows)")
print("World Bank snapshot download complete.")


Downloading SP.POP.TOTL …
  Saved → data/raw/worldbank/total_population.csv  (266 rows)
Downloading EN.POP.DNST …
  Saved → data/raw/worldbank/population_density.csv  (266 rows)
Downloading SP.URB.TOTL.IN.ZS …
  Saved → data/raw/worldbank/urbanization_rate.csv  (266 rows)
Downloading SP.POP.GROW …
  Saved → data/raw/worldbank/population_growth_pct.csv  (266 rows)
Downloading SE.ADT.LITR.ZS …
  Saved → data/raw/worldbank/literacy_rate.csv  (266 rows)
Downloading SP.DYN.LE00.IN …
  Saved → data/raw/worldbank/life_expectancy.csv  (266 rows)
Downloading NY.GDP.MKTP.CD …
  Saved → data/raw/worldbank/gdp_total_usd.csv  (266 rows)
Downloading NY.GDP.PCAP.PP.CD …
  Saved → data/raw/worldbank/gdp_per_capita_ppp.csv  (266 rows)
Downloading NY.GDP.MKTP.KD.ZG …
  Saved → data/raw/worldbank/real_gdp_growth_pct.csv  (266 rows)
Downloading FP.CPI.TOTL.ZG …
  Saved → data/raw/worldbank/inflation_cpi_pct.csv  (266 rows)
Downloading SL.UEM.TOTL.ZS …
  Saved → data/raw/worldbank/unemployment_rate.csv  (2