# Estimations de l‚Äôincidence brute et ajust√©e

## 1. Setup

In [None]:
# Set SNT Paths
SNT_ROOT_PATH  <- "~/workspace"
CODE_PATH      <- file.path(SNT_ROOT_PATH, "code")
CONFIG_PATH    <- file.path(SNT_ROOT_PATH, "configuration")
DATA_PATH <- file.path(SNT_ROOT_PATH, 'data', 'dhis2')

# load util functions
source(file.path(CODE_PATH, "snt_utils.r"))

# List required packages 
required_packages <- c(
    "dplyr", 
    "tidyr", 
    # "terra", 
    "ggplot2", 
    "stringr", 
    # "lubridate", 
    # "viridis", 
    # "patchwork", 
    # "zoo", 
    # "purrr", 
    "arrow", 
    "sf", 
    "reticulate" 
    # "leaflet"
    )

# Execute function
install_and_load(required_packages)

# Set environment to load openhexa.sdk from the right environment
Sys.setenv(RETICULATE_PYTHON = "/opt/conda/bin/python")
reticulate::py_config()$python
openhexa <- import("openhexa.sdk")

# Load SNT config
config_json <- tryCatch({ jsonlite::fromJSON(file.path(CONFIG_PATH, "SNT_config.json"))},
    error = function(e) {
        msg <- paste0("Error while loading configuration", conditionMessage(e))  
        cat(msg)   
        stop(msg) 
    })

# Required environment for the sf packages
Sys.setenv(PROJ_LIB = "/opt/conda/share/proj")
Sys.setenv(GDAL_DATA = "/opt/conda/share/gdal")

In [None]:
# Configuration variables
DATASET_NAME <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_INCIDENCE
COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE

# Cols to select from pyramid
ADMIN_1_NAME <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_1)
ADMIN_2_NAME <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_2)
ADMIN_1_ID <- str_replace(ADMIN_1_NAME, "_NAME", "_ID")
ADMIN_2_ID <- str_replace(ADMIN_2_NAME, "_NAME", "_ID")

In [None]:
ADMIN_1_ID
ADMIN_2_ID
ADMIN_1_NAME
ADMIN_2_NAME

In [None]:
# print function
printdim <- function(df, name = deparse(substitute(df))) {
  cat("Dimensions of", name, ":", nrow(df), "rows x", ncol(df), "columns\n\n")
}

## 2. Load data

#### 2.1. Shapes

In [None]:
# import DHIS2 shapes data
DATASET_DHIS2 <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_FORMATTED
shapes_data <- tryCatch({ get_latest_dataset_file_in_memory(DATASET_DHIS2, paste0(COUNTRY_CODE, "_shapes.geojson")) }, 
                  error = function(e) {
                      msg <- paste("Error while loading DHIS2 Shapes data for: " , COUNTRY_CODE, conditionMessage(e))
                      cat(msg)
                      stop(msg)
                      })

In [None]:
names(shapes_data)

#### 2.2. Pyramid
This needed to add back the `*_NAME` cols (normally we only output tables with the `*_ID` cols)

In [None]:

pyramid_data <- tryCatch({ get_latest_dataset_file_in_memory(DATASET_DHIS2, paste0(COUNTRY_CODE, "_pyramid.parquet")) }, 
                  error = function(e) {
                      msg <- paste("Error while loading DHIS2 Shapes data for: " , COUNTRY_CODE, conditionMessage(e))
                      cat(msg)
                      stop(msg)
                      })

In [None]:
# Keep only relevant cols: ADM_2
pyramid <- pyramid_data %>%
  # select with renaming
  select(
    ADM1_ID = all_of(ADMIN_1_ID),
    ADM1_NAME = all_of(ADMIN_1_NAME), 
    ADM2_ID = all_of(ADMIN_2_ID),
    ADM2_NAME = all_of(ADMIN_2_NAME)
  ) %>%
  distinct()

head(pyramid, 3)

#### 2.3. Monthly cases
Needed for <b>coherence checks</b>:
* **TPR** at monthly level over time 
    * Explain changes (or lack thereof) between Crude and Adj1
    * Useful to monitor resistance (or testing behaviour ... ?)
* **Reporting Rate**
    * Explain changes (or lack thereof) between Adj1 and Adj2
* **Indicators** coherence:
    * SUSP > TEST
    * TEST > CONF
    * ... (check and add more ...)


‚ö†Ô∏è Note: **Import** from üìÅ`/data/` folder (not OH Dataset) - TBD if this intermediate file should also be stored in the dataset "SNT_DHIS2_INCIDENCE
" ...

In [None]:
# Import monthly_cases data from 
monthly_cases <- arrow::read_parquet(file.path(DATA_PATH, "incidence", paste0(COUNTRY_CODE, "_monthly_cases.parquet")))

dim(monthly_cases)
head(monthly_cases, 3)

In [None]:
# Add _NAME cols by joining with pyramid_data
monthly_cases <- left_join(monthly_cases, pyramid, by = join_by(ADM1_ID, ADM2_ID))

In [None]:
head(monthly_cases, 3)

#### 2.4. Yearly Incidence

### üëá **This function should be moved to `code/snt_utils.r`**

In [None]:
# Identify the parquet file in the dataset (dynamic way, not hardcoded)

dataset_last_version <- openhexa$workspace$get_dataset(DATASET_NAME)$latest_version

files_iter <- dataset_last_version$files 

files <- list()
repeat {
  file <- tryCatch(
    py_to_r(iter_next(files_iter)),
    error = function(e) NULL
  )
  
  if (is.null(file)) break
  
  files <- append(files, list(file))
}

# Extract list of filename from files
filenames <- sapply(files, function(f) f$filename)

# Identify index of the element of filenames (list) which contains ".parquet" 
parquet_index <- which(grepl(".parquet", filenames))

# Assign filename to variable
filename_to_import <- files[[parquet_index]]$filename

print(paste0("Identified incidence file to be imported: ", filename_to_import))

In [None]:
# import data
yearly_incidence <- tryCatch({ get_latest_dataset_file_in_memory(DATASET_NAME, filename_to_import) }, 
                  error = function(e) {
                      msg <- paste("Error while loading seasonality file for: " , COUNTRY_CODE, conditionMessage(e))
                      cat(msg)
                      stop(msg)
                      })

dim(yearly_incidence)
head(yearly_incidence, 3)

#### Define plot size

In [None]:
options(repr.plot.width = 20, repr.plot.height = 12)

## Coherence checks

See Jira: https://bluesquare.atlassian.net/browse/SNT25-272

#### 1. TPR

In [None]:
# Calculate yearly TPR to be added on top of the monthly TPR plots
monthly_cases_yearly <- monthly_cases %>%
    group_by(ADM1_NAME, ADM2_ID, ADM2_NAME, YEAR) %>%  
    mutate(
        CONF_yearly = sum(CONF, na.rm = TRUE),
        TEST_yearly = sum(TEST, na.rm = TRUE)
    ) %>%
    ungroup() %>%
    mutate(
      TPR_yearly = ifelse(!is.na(CONF_yearly) & !is.na(TEST_yearly) & (TEST_yearly != 0), CONF_yearly / TEST_yearly, 1)
    ) 

head(monthly_cases_yearly)

##### 1.1. TPR (monthly) over time

In [None]:
ggplot(monthly_cases_yearly) +
# Monthly TPR lines
  geom_line(
    aes(x = MONTH, y = TPR, group = ADM2_NAME),
    color = "grey21",
    alpha = 0.75) +
  facet_grid(
    cols = vars(YEAR), rows = vars(ADM1_NAME),
    switch = "y") +
  scale_x_continuous(breaks = seq(1,12,1)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1L), limits = c(0, 1)) +
  geom_hline(
    yintercept = 0,
    color = "grey21",
    linewidth = 0.5
  ) +
  labs(
    title = "Taux de Positivit√© des Tests (TPR) pour ADM2 et mois"  ) +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.text.x = element_text(angle = 90, hjust = 0.5),
    strip.placement = "outside",
    strip.background = element_rect(fill = "grey21"),
    strip.text = element_text(color = "white"),
    axis.title.y = element_blank()
  )

ggsave(
    file.path(SNT_ROOT_PATH, "pipelines/snt_dhis2_incidence/reporting/outputs/TPR_monthly.png"),
    units = "cm",
    width = 21,
    height = 29.7,
    dpi = 200)

##### 1.2. TPR (monthly & yearly) over time

In [None]:

# Add layer of yearly TPR on top (actually underneath) of monthly TPR

ggplot(monthly_cases_yearly) +
# Yearly TPR lines
  geom_line(
    aes(x = MONTH, y = TPR_yearly, group = ADM2_NAME), 
    color = "grey21",
    alpha = 0.25,
    linewidth = 0.5) +
# Monthly TPR lines
  geom_line(
    aes(x = MONTH, y = TPR, group = ADM2_NAME),
    color = "grey21",
    alpha = 0.75) +
  facet_grid(
    cols = vars(YEAR), rows = vars(ADM1_NAME),
    switch = "y") +
  scale_x_continuous(breaks = seq(1,12,1)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1L), limits = c(0, 1)) +
  geom_hline(
    yintercept = 0,
    color = "grey21",
    linewidth = 0.5
  ) +
  labs(
    title = "Taux de Positivit√© des Tests (TPR) pour ADM2 at pour mois et ann√©e",
    subtitle = "Les valeurs agr√©g√©es par ann√©e sont indiqu√©es comme lignes horizontales.") +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.text.x = element_text(angle = 90, hjust = 0.5),
    strip.placement = "outside",
    strip.background = element_rect(fill = "grey21"),
    strip.text = element_text(color = "white"),
    axis.title.y = element_blank()
  )

ggsave(
    file.path(SNT_ROOT_PATH, "pipelines/snt_dhis2_incidence/reporting/outputs/TPR_monthly_yearly.png"),
    units = "cm",
    width = 21,
    height = 29.7,
    dpi = 200)

#### 2. RR
For more detailas, check **report** notebooks for reporting rate of used method. Possible options:
* **Dataset**: pipelines/snt_dhis2_reporting_rate_dataset/reporting/outputs/**snt_dhis2_reporting_rate_dataset_report**\_OUTPUT\_\*.ipynb
* **DataElement**: work in progress ...

‚ö†Ô∏è‚ö†Ô∏è‚ö†Ô∏è **TO DO**: align code here with report notebook pf reporting rate (use "üé® NEW dynamic colors & breaks" approach) ‚ö†Ô∏è‚ö†Ô∏è‚ö†Ô∏è

In [None]:
# Tile plot faceted by YEAR
ggplot(data = monthly_cases) +
  geom_tile(aes(x = MONTH,
                y = forcats::fct_rev(ADM2_NAME),
                # fill = REPORTING_RATE_CATEGORY
                fill = REPORTING_RATE
                ), 
                color = "white",
                show.legend = TRUE,
                # Fill NA values with white
                na.rm = FALSE
                ) +
#   scale_fill_manual(
#       values = palette_to_use, # üé® NEW dynamic colors & breaks!
#       na.value = "white",
#       name = "Reporting Rate: "
#     ) +
  scale_fill_viridis_c(
      option = "viridis",
      na.value = "white",
      name = "Reporting Rate:",
      direction = -1
      # labels = scales::percent_format(accuracy = 1L)
    ) +
  scale_x_continuous(breaks = seq(1, 12, 1)) +
  facet_grid(rows = vars(ADM1_NAME), cols = vars(YEAR), 
    scales = "free_y", space = "free_y",
    switch = "y") +
  theme_minimal() +
  theme(
    plot.subtitle = element_text(margin=margin(0,0,20,0)),
    legend.position = "bottom",
    legend.key.height = unit(0.25, "cm"),
    axis.text.x = element_text(size = 7),
    axis.title.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    strip.placement = "outside",    
    strip.text = element_text(color = "white", face = "bold", size = 10),
    strip.background = element_rect(fill = "grey21")
  ) +
  guides(fill = guide_legend(nrow = 1))

# Export plot as png
ggsave(
    file.path(SNT_ROOT_PATH, "pipelines/snt_dhis2_incidence/reporting/outputs/ReportingRate_heatmap_monthly.png"),
    units = "cm",
    width = 21,
    height = 29.7,
    dpi = 200)

In [None]:
# Check on data completeness for REPORTING RATE data: 
# check how many values of REPORTING_RATE are NA
na_count <- sum(is.na(monthly_cases$REPORTING_RATE))     
if (na_count > 0) {
    log_msg(glue("‚ö†Ô∏è Warning: Reporting Rate data contains {na_count} missing values (NA) in 'REPORTING_RATE' column."), "warning")
} else {
    log_msg("‚úÖ Reporting Rate data contains no missing values (NA) in 'REPORTING_RATE' column.")
}

#### 3. Coherence checks on Incidence 
Logic: each level of adjustment should produce values that are greater (or equal) to the previous level.<br>

Namely:
* Crude <= Adj1
* Adj1 <= Adj2
* Adj2 <= Adj3

Given than Crude, Adj1, Adj2, and Adj3 are calculated by aggregating `CONF`, `N1`, `N2`, and `N3` at ADM2 x YEAR, we can first verify that the relationship between these values is coherent. Namely, check if
* `CONF` <= `N1`
* `N1` <= `N2`
* `N2` <= `N3` 

##### 3.1. Scatter plots for pairwise comparison of incidence "metrics"
Metrics used to calculate incidence: `CONF`, `N1`, `N2`, (and `N3`)

In [None]:
# CONF vs N1 

# Create warning message if there are CONF values greater than N1
conf_greater_n1_count <- sum(monthly_cases$CONF > monthly_cases$N1, na.rm = TRUE)
if (conf_greater_n1_count > 0) {
    warning_text <- glue("‚úò Warning: There are {conf_greater_n1_count} instances where CONF is greater than N1.", "warning")
} else {
    warning_text <- "‚úî All CONF values are less than or equal to N1."
}

ggplot(data = monthly_cases) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
  geom_point(
    aes(
      x = N1,
      y = CONF),
    alpha = 0.5) +
  labs(
    title = "CONF vs N1",
    subtitle = "N1 is expected to be greater or equal to CONF",
    caption = warning_text
    ) +
  theme_minimal() +
  theme(
    aspect.ratio = 1,
    plot.caption.position = "plot",
    plot.caption = element_text(hjust = 0)
  )

In [None]:
# N1 > N2

# Create warning message if there are N1 values greater than N2
n1_greater_n2_count <- sum(monthly_cases$N1 > monthly_cases$N2, na.rm = TRUE)
if (n1_greater_n2_count > 0) {
    warning_text <- glue("‚úò Warning: There are {n1_greater_n2_count} instances where N1 is greater than N2.", "warning")
} else {
    warning_text <- "‚úî All N1 values are less than or equal to N2."
}

ggplot(data = monthly_cases) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
  geom_point(
    aes(
      x = N2,
      y = N1),
    alpha = 0.5) +
  labs(title = "N1 vs N2",
       subtitle = "N2 is expected to be greater or equal to N1.",
       caption = warning_text
       ) +
  theme_minimal() +
  theme(
    aspect.ratio = 1,
    plot.caption.position = "plot",
    plot.caption = element_text(hjust = 0)
  )

##### 3.2. Scatter plots for pairwise comparison of Incidence values
Actual (calculated) incidence: Crude, Adj1, Adj2, Adj3

In [None]:
# Crude vs Adj1

# Create warning message if there are INCIDENCE_CRUDE values greater than INCIDENCE_ADJ_TESTING
incidence_crude_greater_adj1_count <- sum(yearly_incidence$INCIDENCE_CRUDE > yearly_incidence$INCIDENCE_ADJ_TESTING, na.rm = TRUE)
if (incidence_crude_greater_adj1_count > 0) {
    warning_text <- glue("‚úò Attention : Il y a {incidence_crude_greater_adj1_count} cas o√π INCIDENCE_CRUDE est sup√©rieure √† INCIDENCE_ADJ_TESTING.", 
    "warning")
} else {
    warning_text <- "‚úî Toutes les valeurs INCIDENCE_CRUDE sont inf√©rieures ou √©gales √† INCIDENCE_ADJ_TESTING."
}

ggplot(data = yearly_incidence) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
  geom_point(
    aes(
      x = INCIDENCE_ADJ_TESTING,
      y = INCIDENCE_CRUDE),
    alpha = 0.5) +
  labs(
    title = "INCIDENCE_CRUDE vs INCIDENCE_ADJ_TESTING",
    subtitle = "INCIDENCE_ADJ_TESTING devrait √™tre sup√©rieur ou √©gal √† INCIDENCE_CRUDE.",
    caption = warning_text
    ) +
  theme_minimal() +
  theme(
    aspect.ratio = 1,
    plot.caption.position = "plot",
    plot.caption = element_text(hjust = 0)
  )

In [None]:
# Export plots as png
ggsave(
    file.path(SNT_ROOT_PATH, "pipelines/snt_dhis2_incidence/reporting/outputs/Incidence_crude_vs_adj_testing.png"),
    units = "cm",
    width = 21,
    height = 21,
    dpi = 200)

In [None]:
# Adj1 vs Adj2

# Create warning message if there are INCIDENCE_ADJ_TESTING values greater than INCIDENCE_ADJ_REPORTING.
testing_greater_than_reporting <- sum(yearly_incidence$INCIDENCE_ADJ_TESTING > yearly_incidence$INCIDENCE_ADJ_REPORTING, na.rm = TRUE)
if (testing_greater_than_reporting > 0) {
    warning_text <- glue("‚úò Attention : Il y a {testing_greater_than_reporting} cas o√π INCIDENCE_ADJ_TESTING est sup√©rieure √† INCIDENCE_ADJ_REPORTING.", "warning")
} else {
  warning_text <- "‚úî Toutes les valeurs INCIDENCE_ADJ_TESTING sont inf√©rieures ou √©gales √† INCIDENCE_ADJ_REPORTING."
}

ggplot(data = yearly_incidence) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
  geom_point(
    aes(
      x = INCIDENCE_ADJ_REPORTING,
      y = INCIDENCE_ADJ_TESTING),
    alpha = 0.5) +
  labs(
    title = "INCIDENCE_ADJ_TESTING vs INCIDENCE_ADJ_REPORTING",
    subtitle = "INCIDENCE_ADJ_REPORTING devrait √™tre sup√©rieur ou √©gal √† INCIDENCE_ADJ_TESTING.",
    caption = warning_text
    ) +
  theme_minimal() +
  theme(
    aspect.ratio = 1,
    plot.caption.position = "plot",
    plot.caption = element_text(hjust = 0)
  )

In [None]:
# Export plots as png
ggsave(
    file.path(SNT_ROOT_PATH, "pipelines/snt_dhis2_incidence/reporting/outputs/Incidence_adj_testing_vs_adj_reporting.png"),
    units = "cm",
    width = 21,
    height = 21,
    dpi = 200)

In [None]:
# Plot Adj3 vs Adj2 - but only if the column yearly_incidence$INCIDENCE_ADJ_CARESEEKING exists and is not empty

if ("INCIDENCE_ADJ_CARESEEKING" %in% colnames(yearly_incidence) && any(!is.na(yearly_incidence$INCIDENCE_ADJ_CARESEEKING))) {
  
    # Create warning message if there are INCIDENCE_ADJ_CARESEEKING values greater than INCIDENCE_ADJ_TESTING.
    careseeking_greater_than_testing <- sum(yearly_incidence$INCIDENCE_ADJ_CARESEEKING > yearly_incidence$INCIDENCE_ADJ_TESTING, na.rm = TRUE)
    if (careseeking_greater_than_testing > 0) {
        warning_text <- glue("‚úò Attention : Il y a {careseeking_greater_than_testing} cas o√π INCIDENCE_ADJ_CARESEEKING est sup√©rieure √† INCIDENCE_ADJ_TESTING.", "warning")
    } else {
      warning_text <- "‚úî Toutes les valeurs INCIDENCE_ADJ_CARESEEKING sont inf√©rieures ou √©gales √† INCIDENCE_ADJ_TESTING."
    }

    ggplot(data = yearly_incidence) +
        geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
        geom_point(
        aes(
            x = INCIDENCE_ADJ_CARESEEKING,
            y = INCIDENCE_ADJ_TESTING),
        alpha = 0.5) +
        labs(
        title = "INCIDENCE_ADJ_TESTING vs INCIDENCE_ADJ_CARESEEKING",
        subtitle = "INCIDENCE_ADJ_CARESEEKING devrait √™tre sup√©rieur ou √©gal √† INCIDENCE_ADJ_TESTING.",
        caption = warning_text
        ) +
        theme_minimal() +
        theme(
        aspect.ratio = 1,
        plot.caption.position = "plot",
        plot.caption = element_text(hjust = 0)
        )

# Export plots as png
    ggsave(
        file.path(SNT_ROOT_PATH, "pipelines/snt_dhis2_incidence/reporting/outputs/Incidence_adj_testing_vs_adj_careseeking.png"),
        units = "cm",
        width = 21,
        height = 21,
        dpi = 200)
}


## Incidence du paludisme par ann√©e par district sanitaire

In [None]:
# Step 1: Prepare long-form data
incidence_long <- yearly_incidence  %>% # incidence_data
  select(ADM2_ID, YEAR, POPULATION,
         INCIDENCE_CRUDE,
         INCIDENCE_ADJ_TESTING,
         INCIDENCE_ADJ_REPORTING,
         INCIDENCE_ADJ_CARESEEKING) %>%
  pivot_longer(
    cols = starts_with("INCIDENCE"),
    names_to = "INCIDENCE_TYPE",
    values_to = "incidence"
  ) %>%
  mutate(
    incidence_type_label = case_when(
      INCIDENCE_TYPE == "INCIDENCE_CRUDE"             ~ "Brute",
      INCIDENCE_TYPE == "INCIDENCE_ADJ_TESTING"       ~ "Ajust√©e 1 (Test)",
      INCIDENCE_TYPE == "INCIDENCE_ADJ_REPORTING"     ~ "Ajust√©e 2 (Test + Compl√©tude)",
      INCIDENCE_TYPE == "INCIDENCE_ADJ_CARESEEKING"   ~ "Ajust√©e 3 (Test + Compl√©tude + Soins)",
      TRUE ~ INCIDENCE_TYPE
    )
  )

# Step 2: Join with shapefile
map_data_long <- shapes_data %>%
  left_join(incidence_long, by = "ADM2_ID")

# Step 3: Categorize incidence for coloring
map_data_long <- map_data_long %>%
  mutate(
    incidence_cat = case_when(
      is.na(incidence)       ~ "NA",
      incidence < 100        ~ "0 √† 100",
      incidence < 250        ~ "100 √† 250",
      incidence < 500        ~ "250 √† 450",
      incidence < 1000       ~ "450 √† 1000",
      TRUE                   ~ "> √† 1000"
    ),
    incidence_cat = factor(incidence_cat, levels = c(
      "0 √† 100", "100 √† 250", "250 √† 450", "450 √† 1000", "> √† 1000", "NA"
    )),
    incidence_type_label = factor(incidence_type_label, levels = c(
      "Brute",
      "Ajust√©e 1 (Test)",
      "Ajust√©e 2 (Test + Compl√©tude)",
      "Ajust√©e 3 (Test + Compl√©tude + Soins)"
    ))
  )

# Step 4: Color palette from your map
incidence_colors <- c(
  "0 √† 100"     = "#bdd7e7",
  "100 √† 250"   = "#fcae91",
  "250 √† 450"   = "#fb6a4a",
  "450 √† 1000"  = "#cb181d",
  "> √† 1000"    = "#67000d",
  "NA"          = "#000000"
)

# Step 5: Plot
options(repr.plot.width = 20, repr.plot.height = 12)
ggplot(map_data_long) +
  geom_sf(aes(fill = incidence_cat), color = "white", size = 0.2) +
  facet_grid(rows = vars(incidence_type_label), cols = vars(YEAR)) +
  scale_fill_manual(values = incidence_colors, name = "Incidence (pour 1000)") +
  labs(
    title = "Incidence annuelle du paludisme par district sanitaire",
    subtitle = "Brute et ajust√©e selon les √©tapes OMS"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    strip.text = element_text(face = "bold", size = 12),
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 13),
    legend.position = "right"
  )


In [None]:
# Step 1: Compute mean incidence across years
mean_incidence <- yearly_incidence %>% # incidence_data
  select(ADM2_ID, POPULATION,
         INCIDENCE_CRUDE,
         INCIDENCE_ADJ_TESTING,
         INCIDENCE_ADJ_REPORTING,
         INCIDENCE_ADJ_CARESEEKING) %>%
  pivot_longer(
    cols = starts_with("INCIDENCE"),
    names_to = "INCIDENCE_TYPE",
    values_to = "incidence"
  ) %>%
  group_by(ADM2_ID, INCIDENCE_TYPE) %>%
  summarise(incidence = mean(incidence, na.rm = TRUE), .groups = "drop") %>%
  mutate(
    incidence_type_label = case_when(
      INCIDENCE_TYPE == "INCIDENCE_CRUDE"             ~ "Brute",
      INCIDENCE_TYPE == "INCIDENCE_ADJ_TESTING"       ~ "Ajust√©e 1 (Test)",
      INCIDENCE_TYPE == "INCIDENCE_ADJ_REPORTING"     ~ "Ajust√©e 2 (Test + Compl√©tude)",
      INCIDENCE_TYPE == "INCIDENCE_ADJ_CARESEEKING"   ~ "Ajust√©e 3 (Test + Compl√©tude + Soins)",
      TRUE ~ INCIDENCE_TYPE
    )
  )

# Step 2: Join with shapefile
map_data_mean <- shapes_data %>%
  left_join(mean_incidence, by = "ADM2_ID")

# Step 3: Categorize
map_data_mean <- map_data_mean %>%
  mutate(
    incidence_cat = case_when(
      is.na(incidence)       ~ "NA",
      incidence < 100        ~ "0 √† 100",
      incidence < 250        ~ "100 √† 250",
      incidence < 500        ~ "250 √† 450",
      incidence < 1000       ~ "450 √† 1000",
      TRUE                   ~ "> √† 1000"
    ),
    incidence_cat = factor(incidence_cat, levels = c(
      "0 √† 100", "100 √† 250", "250 √† 450", "450 √† 1000", "> √† 1000", "NA"
    )),
    incidence_type_label = factor(incidence_type_label, levels = c(
      "Brute",
      "Ajust√©e 1 (Test)",
      "Ajust√©e 2 (Test + Compl√©tude)",
      "Ajust√©e 3 (Test + Compl√©tude + Soins)"
    ))
  )

In [None]:
# Step 4: Plot mean incidence
ggplot(map_data_mean) +
  geom_sf(aes(fill = incidence_cat), color = "white", size = 0.2) +
  facet_wrap(~ incidence_type_label) +
  scale_fill_manual(values = incidence_colors, name = "Incidence moyenne (pour 1000)") +
  labs(
    title = "Incidence moyenne du paludisme par district sanitaire",
    subtitle = "Moyenne annuelle (toutes ann√©es confondues)",
    x = NULL, y = NULL
  ) +
  theme_minimal(base_size = 16) +
  theme(
    strip.text = element_text(face = "bold", size = 16),
    plot.title = element_text(face = "bold", size = 20),
    plot.subtitle = element_text(size = 16),
    legend.position = "right"
  )