# **Cartes extraites du Malaria Atlas Project (MAP)**

## Setup

In [None]:
# Set SNT Paths
SNT_ROOT_PATH  <- "~/workspace"
CODE_PATH      <- file.path(SNT_ROOT_PATH, "code")
CONFIG_PATH    <- file.path(SNT_ROOT_PATH, "configuration")
REPORTING_NB_PATH <- file.path(SNT_ROOT_PATH, "pipelines", "snt_map_extracts", "reporting")

# Load helpers
source(file.path(CODE_PATH, "snt_utils.r"))
source(file.path(CODE_PATH, "snt_palettes.r"))

# List required packages 
required_packages <- c("dplyr", "ggplot2", "stringr", "viridis", "purrr", "sf", "reticulate")  

# Execute function
install_and_load(required_packages)

# Set environment to load openhexa.sdk from the right environment
Sys.setenv(RETICULATE_PYTHON = "/opt/conda/bin/python")
reticulate::py_config()$python
openhexa <- import("openhexa.sdk")

# Required environment for the sf packages
Sys.setenv(PROJ_LIB = "/opt/conda/share/proj")
Sys.setenv(GDAL_DATA = "/opt/conda/share/gdal")

In [None]:
# Load SNT config
config_json <- tryCatch({ jsonlite::fromJSON(file.path(CONFIG_PATH, "SNT_config.json"))},
    error = function(e) {
        msg <- paste0("Error while loading configuration", conditionMessage(e))  
        cat(msg)   
        stop(msg) 
    })

In [None]:
# Configuration variables
DATASET_NAME <- config_json$SNT_DATASET_IDENTIFIERS$SNT_MAP_EXTRACT
COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE
ADM_2 <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_2)

In [None]:
# # This is notw in snt_utils !

# # print function
# printdim <- function(df, name = deparse(substitute(df))) {
#   cat("Dimensions of", name, ":", nrow(df), "rows x", ncol(df), "columns\n\n")
# }

In [None]:
# Load SNT metadata
metadata_json <- tryCatch({ jsonlite::fromJSON(file.path(CONFIG_PATH, "SNT_metadata.json")) },
    error = function(e) {
        msg <- paste0("[ERROR] Error while loading metadata", conditionMessage(e))  
        cat(msg)   
        stop(msg) 
    })

log_msg(paste0("SNT metadata loaded from : ", file.path(CONFIG_PATH, "SNT_metadata.json")))

In [None]:
break_vals <- jsonlite::fromJSON(metadata_json$PF_PR_RATE$SCALE)

log_msg(paste0("PfPR (MAP) scale break values loaded from SNT_metadata.json : ", paste(break_vals, collapse = ", ")))

## Import data

### MAP data

In [None]:
map_data <- tryCatch({ get_latest_dataset_file_in_memory(DATASET_NAME, paste0(COUNTRY_CODE, "_map_data.parquet")) }, 
                  error = function(e) {
                      msg <- paste("Error while loading seasonality file for: " , COUNTRY_CODE, conditionMessage(e))
                      cat(msg)
                      stop(msg)
                      })

In [None]:
printdim(map_data)
head(map_data)

### Shapes

In [None]:
DATASET_DHIS2 <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_FORMATTED

shapes_data <- tryCatch({ get_latest_dataset_file_in_memory(DATASET_DHIS2, paste0(COUNTRY_CODE, "_shapes.geojson")) }, 
                  error = function(e) {
                      msg <- paste("Error while loading DHIS2 Shapes data for: " , COUNTRY_CODE, conditionMessage(e))
                      cat(msg)
                      stop(msg)
                      })

printdim(shapes_data)

## Plot

#### Prep data

In [None]:
# Merge geometry with map data
map_data_shapes <- left_join(shapes_data, map_data, by = join_by(ADM1_NAME, ADM1_ID, ADM2_NAME, ADM2_ID))

In [None]:
# Extract value for YEAR
# (pipeline extracts data for 1 year at a time)
map_data$YEAR |> unique() -> year

log_msg(paste0("MAP data extracted for year: ", year ))

#### Plot Prevalence (PfPR)
Use specific breaks and color scale (palette) for comparison with MIS data

ðŸŽ¨ Dynamic categories and color assignement

##### 0. Define parameters for plotting

In [None]:
# Parameters
palette <- palette_pfpr_map_mis
# year <- 2025 # extracted from data - more robust approach
metric_name <- "Pf_Parasite_Rate"

##### 1. Define breaks and labels

In [None]:
# Safety code to avoid breaking if nothings is fund in json_metadata
if (is.null(break_vals) || length(break_vals) == 0) {
    log_msg("[WARNING] No break values found in SNT_metadata.json at defined node. Using default values.", "warning")
    break_vals <- c(5, 10, 20, 30, 40)
}

In [None]:
# 1. Define  breaks
# Note: assumes that the data starts at 0!

# 2. Create the full set of cut points (0 to Infinity)
full_breaks <- c(0, break_vals, Inf)

# 3. Create dynamic labels
labels <- c(
  paste0("< ", break_vals[1], " %"),                                      # First label
  paste0(break_vals[-length(break_vals)], " - ", break_vals[-1], " %"),     # Middle labels
  paste0("> ", break_vals[length(break_vals)], " %")                       # Last label
)

# Check
labels

##### 2. Check palette & redefine if needed

In [None]:
# "Backup" strategy in case of mismatch between number of labels and colors:
# dynamically create a palette with enough colors, based on the length of labels and 
# using the first and last values of the original palette
if (length(labels) != length(palette)) {
    log_msg("Number of labels does not match number of colors in the chosen palette. Creating dynamic palette.", "warning")
    palette <- colorRampPalette(c(palette[1], palette[length(palette)]))(length(labels))

    print(palette)
}

##### 3. Plot "Pf_Parasite_Rate"

In [None]:
# Filter for PfPR 2025
map_data_shapes_filtered <- map_data_shapes |> 
filter(METRIC_NAME == metric_name & YEAR == year) 

# Create CATEGORY col based on breaks and labels
map_data_shapes_plot <- map_data_shapes_filtered |>
mutate(
    VALUE_PERC = VALUE * 100, # ðŸš¨ Important! Now values are in percentage (not in fraction 0-1)
    CATEGORY = cut(
        VALUE_PERC,
        breaks = full_breaks,
        labels = labels,
        include.lowest = TRUE,
        right = FALSE
    )
)

In [None]:
ggplot(data = map_data_shapes_plot) +
  geom_sf(aes(geometry = geometry,
              fill = CATEGORY),
          color = "black",
          size = 0.75,
          show.legend=TRUE
  ) +
  labs(
    title = "PrÃ©valence du Plasmodium falciparum (PfPR) par DS (ADM2)",
    subtitle = glue::glue("Source: MAP {year}")
    ) +
  scale_fill_manual(
    values = palette,  
    labels = labels,    
    limits = names(palette), # Forces all levels to exist
    drop = FALSE # Prevents dropping empty levels
  ) +
  theme_void() +
  theme(
    legend.position = "bottom",
    legend.title = element_blank(),
    strip.text = element_text(face = "bold"),
    legend.key.height = unit(0.75,"line"),
    legend.margin = margin(10,0,0,0)
  ) + 
  guides(fill = guide_legend(nrow = 1))


ggsave(
  file.path(REPORTING_NB_PATH, "outputs", "figures", glue::glue("prevalence_MAP_{year}.png")),
  create.dir = TRUE,
  units = "cm",
  width = 21,
  height = 15,
  bg = "white"
)

log_msg(paste0("ðŸ“Š Figure saved to: ", file.path(REPORTING_NB_PATH, "outputs", "figures", glue::glue("prevalence_MAP_{year}.png"))))

-------------

#### Plot all available metrics (choropleth maps)

In [None]:
# Get list of metrics
metrics <- unique(map_data$METRIC_NAME)

# Create one map per metric
plots <- map(metrics, function(metric) {
  ggplot(map_data_shapes %>% filter(METRIC_NAME == metric)) +
    geom_sf(aes(fill = VALUE), color = "white") +
    scale_fill_viridis_c(option = "C", na.value = "lightgrey") +
    labs(
      title = paste0(metric),
      fill = "Valeur"
    ) +
    theme_minimal(base_size = 16) +
    theme(
      plot.title = element_text(size = 20, face = "bold"),
      legend.title = element_text(size = 16),
      legend.text = element_text(size = 14)
    )
})

In [None]:
# Set plot size for individual display
options(repr.plot.width = 10, repr.plot.height = 8)

# Loop through plots and print one by one
for (p in plots) {
  print(p)
  Sys.sleep(1)  # Optional: short pause between plots
}