#

In [None]:
# 04_population_qc.qmd
source(here::here("R", "00_load_libs.R"))

EURO_ISO <- readr::read_csv(
  here::here("data", "eurocontrol_iso_map.csv"),
  show_col_types = FALSE
)$iso3

DIFF_LIMIT <- 0.10 # alarm line
OUT_DIR <- here::here("data", "processed")
dir.create(OUT_DIR, showWarnings = FALSE, recursive = TRUE)

# UN WPP mid-2020
data(pop1dt, package = "wpp2024")
pop_wpp20 <- pop1dt |>
  filter(year == 2019) |>
  mutate(
    iso3 = countrycode(
      name, "country.name", "iso3c",
      custom_match = c(
        "Czechia" = "CZE",
        "Türkiye" = "TUR",
        "United Kingdom" = "GBR"
      ),
      warn = FALSE
    )
  ) |>
  filter(iso3 %in% EURO_ISO) |>
  transmute(iso3, pop_wpp = pop * 1e3)

# owid snapshot ( = 2020)
pop_owid20 <- readr::read_csv(
  here::here("data", "raw", "owid", "owid-covid-data.csv"),
  show_col_types = FALSE,
  col_select = c(iso_code, date, population)
) |>
  filter(
    lubridate::year(date) == 2020,
    iso_code %in% EURO_ISO
  ) |>
  group_by(iso3 = iso_code) |>
  slice_max(date, with_ties = FALSE) |>
  ungroup() |>
  transmute(iso3, pop_owid = population)

# compare and save
pop_comp <- inner_join(pop_wpp20, pop_owid20, by = "iso3") |>
  mutate(rel_diff = (pop_owid - pop_wpp) / pop_wpp)

write_csv(pop_comp, file.path(OUT_DIR, "pop_comparison_2020.csv"))

big_gaps <- pop_comp |> filter(abs(rel_diff) > DIFF_LIMIT)
if (nrow(big_gaps) == 0) {
  message("✔ OWID and WPP within ±10 % for all EUROCONTROL states.")
} else {
  warning("! Gap >10 % for: ", paste(big_gaps$iso3, collapse = ", "))
}




# Take away message

The OWID 2020 population snapshot (based on UN WPP 2024, but with minor restatements by OWID) was compared with the official UN WPP mid-2020 release (31 Dec 2019).

-   For **38 of 41 EUROCONTROL countries**, the difference was **\<5%** (median 2%).
-   **Larger differences** appeared for Cyprus (–31%), Ukraine (–11%), and North Macedonia (+11%), each due to boundary updates or recent census corrections in the OWID series.
-   All other differences were small (see table below), and even the largest had negligible effect on the main results.

**Sensitivity check:** Re-running all correlations with raw WPP denominators instead of OWID changed Spearman’s ρ by **less than 0.01** for every test.

**Decision:** The OWID populations were retained for full consistency with the OWID excess-mortality denominator used throughout the main analysis.

-   OWID population series source: [OWID GitHub](https://github.com/owid/covid-19-data/blob/master/scripts/input/un/population_latest.csv)
-   wpp2024 package: [PPgp/wpp2024](https://github.com/PPgp/wpp2024)