# ERA5 climate data – agrégation

## Configuration

In [None]:
# Set SNT paths
ROOT_PATH <- "~/workspace"
CODE_PATH <- file.path(ROOT_PATH, "code")
CONFIG_PATH <- file.path(ROOT_PATH, "configuration")
DATA_PATH <- file.path(ROOT_PATH, "data")
ERA5_AGGREGATE_PATH <- file.path(DATA_PATH, "era5", "aggregate")
PLOTS_PATH <- file.path(ROOT_PATH, "pipelines", "snt_era5_aggregate", "reporting", "outputs")

# Load util functions
source(file.path(CODE_PATH, "snt_utils.r"))

# Required packages
required_packages <- c(
    "dplyr",
    "tidyr",
    "ggplot2",
    "glue",
    "arrow",
    "sf",
    "reticulate",
    "lubridate",
    "viridis"
)
install_and_load(required_packages)

# OpenHEXA SDK
Sys.setenv(RETICULATE_PYTHON = "/opt/conda/bin/python")
reticulate::py_config()$python
openhexa <- import("openhexa.sdk")

# Required for sf
Sys.setenv(PROJ_LIB = "/opt/conda/share/proj")
Sys.setenv(GDAL_DATA = "/opt/conda/share/gdal")

# Load SNT config
config_json <- tryCatch(
    jsonlite::fromJSON(file.path(CONFIG_PATH, "SNT_config.json")),
    error = function(e) {
        msg <- paste0("Error loading configuration: ", conditionMessage(e))
        cat(msg)
        stop(msg)
    }
)

COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE
dhis2_dataset <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_FORMATTED

# Create output dir for plots
dir.create(PLOTS_PATH, recursive = TRUE, showWarnings = FALSE)
log_msg(glue("ERA5 aggregate report for {COUNTRY_CODE}. Outputs: {PLOTS_PATH}"))

## Chargement des données agrégées ERA5

In [None]:
# List variable subdirs (e.g. 2m_temperature, total_precipitation)
if (!dir.exists(ERA5_AGGREGATE_PATH)) {
    stop(glue("ERA5 aggregate path not found: {ERA5_AGGREGATE_PATH}. Run the ERA5 aggregate pipeline first."))
}

variable_dirs <- list.dirs(ERA5_AGGREGATE_PATH, full.names = TRUE, recursive = FALSE)
variables <- basename(variable_dirs)

# Load monthly data for each variable
monthly_data <- list()
for (var in variables) {
    f <- file.path(ERA5_AGGREGATE_PATH, var, glue("{COUNTRY_CODE}_{var}_monthly.parquet"))
    if (file.exists(f)) {
        monthly_data[[var]] <- arrow::read_parquet(f) %>%
            mutate(VARIABLE = !!var)
        log_msg(glue("Loaded {var}: {nrow(monthly_data[[var]])} rows"))
    } else {
        log_msg(glue("File not found: {f}"), level = "warning")
    }
}

if (length(monthly_data) == 0) {
    stop("No monthly ERA5 data found. Run the ERA5 aggregate pipeline first.")
}

## Graphiques

### Série temporelle – température (moyenne nationale)

In [None]:
var_temp <- "2m_temperature"
if (var_temp %in% names(monthly_data)) {
    d <- monthly_data[[var_temp]] %>%
        mutate(PERIOD_DATE = lubridate::ym(as.character(PERIOD)))
    
    national <- d %>%
        group_by(PERIOD, PERIOD_DATE) %>%
        summarise(MEAN = mean(MEAN, na.rm = TRUE), .groups = "drop")
    
    p_temp <- ggplot(national, aes(x = PERIOD_DATE, y = MEAN)) +
        geom_line(linewidth = 0.6, color = "#c62828") +
        theme_minimal() +
        labs(
            title = glue("Température moyenne mensuelle (ERA5) – {COUNTRY_CODE}"),
            x = "Période",
            y = "Température (°C)"
        ) +
        theme(plot.title = element_text(hjust = 0.5, face = "bold"))
    print(p_temp)
    
    ggsave(
        file.path(PLOTS_PATH, glue("{COUNTRY_CODE}_era5_temperature_monthly.png")),
        p_temp, width = 10, height = 4, dpi = 150
    )
    log_msg(glue("Saved temperature plot."))
} else {
    log_msg(glue("{var_temp} not in data. Skipping temperature plot."), level = "warning")
}

### Série temporelle – précipitations (moyenne nationale)

In [None]:
var_precip <- "total_precipitation"
if (var_precip %in% names(monthly_data)) {
    d <- monthly_data[[var_precip]] %>%
        mutate(PERIOD_DATE = lubridate::ym(as.character(PERIOD)))
    
    national <- d %>%
        group_by(PERIOD, PERIOD_DATE) %>%
        summarise(MEAN = mean(MEAN, na.rm = TRUE), .groups = "drop")
    
    p_precip <- ggplot(national, aes(x = PERIOD_DATE, y = MEAN)) +
        geom_line(linewidth = 0.6, color = "#1565c0") +
        theme_minimal() +
        labs(
            title = glue("Précipitations mensuelles moyennes (ERA5) – {COUNTRY_CODE}"),
            x = "Période",
            y = "Précipitations (mm)"
        ) +
        theme(plot.title = element_text(hjust = 0.5, face = "bold"))
    print(p_precip)
    
    ggsave(
        file.path(PLOTS_PATH, glue("{COUNTRY_CODE}_era5_precipitation_monthly.png")),
        p_precip, width = 10, height = 4, dpi = 150
    )
    log_msg(glue("Saved precipitation plot."))
} else {
    log_msg(glue("{var_precip} not in data. Skipping precipitation plot."), level = "warning")
}

### Précipitations annuelles (ERA5)

In [None]:
# Load shapes for maps (optional)
spatial_data <- tryCatch({
    spatial_data_filename <- glue("{COUNTRY_CODE}_shapes.geojson")
    get_latest_dataset_file_in_memory(dhis2_dataset, spatial_data_filename)
}, error = function(e) {
    log_msg(glue("Could not load shapes for maps: {conditionMessage(e)}"), level = "warning")
    NULL
})

In [None]:
# Style exact des screenshots: grille 2x4 (précipitations par année) + carte moyenne
var_precip <- "total_precipitation"
adm2_id_col <- "ADM2_ID"
if (!is.null(spatial_data) && !adm2_id_col %in% names(spatial_data)) {
    adm2_id_col <- names(spatial_data)[grepl("(?i)adm2.*id", names(spatial_data), perl = TRUE)][1]
    if (is.na(adm2_id_col) || length(adm2_id_col) == 0) adm2_id_col <- "ADM2_ID"
}

if (var_precip %in% names(monthly_data) && !is.null(spatial_data)) {
    # Palette et bornes comme sur les screenshots
    precip_breaks <- c(0, 99, 249, 799, 1199, Inf)
    precip_labels <- c("0-99", "100-249", "250-799", "800-1199", "≥ 1200")
    precip_colors <- c("#F7FBFF", "#C6DBEF", "#6BAED6", "#2171B5", "#084594")
    names(precip_colors) <- precip_labels

    # Précipitations annuelles par ADM2 et par année (toutes les années disponibles)
    annual_by_year <- monthly_data[[var_precip]] %>%
        group_by(ADM2_ID, YEAR) %>%
        summarise(TOTAL_MM = sum(MEAN, na.rm = TRUE), .groups = "drop") %>%
        mutate(PREcip_CAT = cut(
            TOTAL_MM,
            breaks = precip_breaks,
            labels = precip_labels,
            include.lowest = TRUE,
            right = FALSE
        ))

    plot_grid <- spatial_data %>%
        left_join(annual_by_year, by = setNames("ADM2_ID", adm2_id_col))

    # Grille : une carte par année (4 colonnes, comme sur les screens)
    n_years <- n_distinct(annual_by_year$YEAR)
    n_col <- 4L
    n_row <- ceiling(n_years / n_col)
    p_grid <- ggplot(plot_grid) +
        geom_sf(aes(fill = PREcip_CAT), color = "black", linewidth = 0.1) +
        scale_fill_manual(
            values = precip_colors,
            name = "Précipitations annuelles totales:",
            na.value = "grey95",
            drop = FALSE
        ) +
        facet_wrap(~ YEAR, ncol = n_col, strip.position = "top") +
        theme_void() +
        theme(
            plot.title = element_text(colour = "#c62828", hjust = 0, face = "bold", size = 14),
            strip.text = element_text(size = 12, face = "plain"),
            strip.background = element_blank(),
            legend.position = "bottom",
            legend.title = element_text(size = 10),
            legend.text = element_text(size = 9),
            panel.spacing = unit(0.4, "cm"),
            plot.background = element_rect(fill = "white", colour = "grey90", linewidth = 0.5)
        ) +
        labs(title = "Précipitations annuelles (ERA5)")
    print(p_grid)
    ggsave(
        file.path(PLOTS_PATH, glue("{COUNTRY_CODE}_era5_precipitation_annual_grid.png")),
        p_grid, width = 12, height = max(6, n_row * 2.5), dpi = 200
    )
    log_msg(glue("Saved precipitation grid ({n_years} years)."))
}

In [None]:
# Carte unique : précipitations annuelles moyennes (toute la période) – style screenshot 2 + BLUESQUARE
if (var_precip %in% names(monthly_data) && !is.null(spatial_data)) {
    precip_breaks <- c(0, 99, 249, 799, 1199, Inf)
    precip_labels <- c("0-99", "100-249", "250-799", "800-1199", "≥ 1200")
    precip_colors <- c("#F7FBFF", "#C6DBEF", "#6BAED6", "#2171B5", "#084594")
    names(precip_colors) <- precip_labels

    # Moyenne annuelle sur toutes les années (même période que la grille)
    annual_totals <- monthly_data[[var_precip]] %>%
        group_by(ADM2_ID, YEAR) %>%
        summarise(TOTAL_MM = sum(MEAN, na.rm = TRUE), .groups = "drop")
    year_min <- min(annual_totals$YEAR)
    year_max <- max(annual_totals$YEAR)
    year_range_lab <- if (year_min == year_max) as.character(year_min) else glue("{year_min}-{year_max}")

    mean_annual <- annual_totals %>%
        group_by(ADM2_ID) %>%
        summarise(MEAN_ANNUAL = mean(TOTAL_MM, na.rm = TRUE), .groups = "drop") %>%
        mutate(PREcip_CAT = cut(MEAN_ANNUAL, breaks = precip_breaks, labels = precip_labels, include.lowest = TRUE, right = FALSE))

    plot_mean <- spatial_data %>%
        left_join(mean_annual, by = setNames("ADM2_ID", adm2_id_col))

    p_single <- ggplot(plot_mean) +
        geom_sf(aes(fill = PREcip_CAT), color = "black", linewidth = 0.1) +
        scale_fill_manual(
            values = precip_colors,
            name = glue("Précipitations annuelles moyennes ({year_range_lab})"),
            na.value = "grey95",
            drop = FALSE
        ) +
        theme_void() +
        theme(
            plot.title = element_text(colour = "#c62828", hjust = 0, face = "bold", size = 14),
            legend.position = "bottom",
            legend.title = element_text(size = 10),
            legend.text = element_text(size = 9),
            plot.background = element_rect(fill = "white", colour = NA),
            plot.caption = element_text(hjust = 0, colour = "#084594", size = 10, face = "bold")
        ) +
        labs(title = "Précipitations annuelles (ERA5)", caption = "BLUESQUARE")
    print(p_single)
    ggsave(
        file.path(PLOTS_PATH, glue("{COUNTRY_CODE}_era5_precipitation_mean_{year_min}_{year_max}.png")),
        p_single, width = 8, height = 6, dpi = 200
    )
    log_msg(glue("Saved mean annual precipitation map ({year_range_lab})."))
} else if (is.null(spatial_data)) {
    log_msg("Shapes not available. Skipping precipitation maps.", level = "warning")
}

In [None]:
log_msg(glue("ERA5 aggregate report finished. Plots saved in {PLOTS_PATH}"))