In [None]:
# Set defaul values for parameters if not injected by papermill (i.e. during development)
if (!exists("N1_METHOD")) {
    N1_METHOD = "PRES"
}   
if (!exists("ROUTINE_DATA_CHOICE")) {
    ROUTINE_DATA_CHOICE = "imputed"
}
if (!exists("OUTLIER_DETECTION_METHOD")) {
    OUTLIER_DETECTION_METHOD = "mean"
}
if (!exists("USE_CSB_DATA")) {
    USE_CSB_DATA = FALSE
}
if (!exists("USE_ADJUSTED_POPULATION")) {
    USE_ADJUSTED_POPULATION = TRUE
}
if (!exists("DISAGGREGATION_SELECTION")) {
    DISAGGREGATION_SELECTION = NULL
}

In [None]:
# # For development purposes only!
# DISAGGREGATION_SELECTION = "PREGNANT"

# Estimations de l‚Äôincidence brute et ajust√©e

## 1. Setup

In [None]:
# Define suffix for exporting final outputs (preserve selected disaggregation in filename)
DISAGGREGATION_SELECTION_SUFFIX <- ifelse(is.null(DISAGGREGATION_SELECTION), "TOTAL", DISAGGREGATION_SELECTION)

In [None]:
# Set SNT Paths
SNT_ROOT_PATH  <- "~/workspace"
CODE_PATH      <- file.path(SNT_ROOT_PATH, "code")
CONFIG_PATH    <- file.path(SNT_ROOT_PATH, "configuration")
DATA_PATH <- file.path(SNT_ROOT_PATH, 'data', 'dhis2')
FIGURES_PATH <- file.path(SNT_ROOT_PATH, "pipelines/snt_dhis2_incidence/reporting/outputs/figures")

# load util functions
source(file.path(CODE_PATH, "snt_utils.r"))
# Load palettes
source(file.path(CODE_PATH, "snt_palettes.r"))

# List required packages 
required_packages <- c(
    "dplyr", 
    "tidyr", 
    "ggplot2", 
    "stringr", 
    "arrow", 
    "sf", 
    "reticulate" 
    )

# Execute function
install_and_load(required_packages)

In [None]:
# Set environment to load openhexa.sdk from the right environment
Sys.setenv(RETICULATE_PYTHON = "/opt/conda/bin/python")
reticulate::py_config()$python
openhexa <- import("openhexa.sdk")

# Required environment for the sf packages
Sys.setenv(PROJ_LIB = "/opt/conda/share/proj")
Sys.setenv(GDAL_DATA = "/opt/conda/share/gdal")

#### Load `SNT_config`

In [None]:
# Load SNT config
config_json <- tryCatch({ jsonlite::fromJSON(file.path(CONFIG_PATH, "SNT_config.json"))},
    error = function(e) {
        msg <- paste0("Error while loading configuration", conditionMessage(e))  
        cat(msg)   
        stop(msg) 
    })

In [None]:
# Configuration variables
DATASET_NAME <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_INCIDENCE
COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE

# Cols to select from pyramid
ADMIN_1_NAME <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_1)
ADMIN_2_NAME <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_2)
ADMIN_1_ID <- str_replace(ADMIN_1_NAME, "_NAME", "_ID")
ADMIN_2_ID <- str_replace(ADMIN_2_NAME, "_NAME", "_ID")

#### Load `SNT_metadata`
This is needed for the correct use of palettes and categories (breaks, or scale)

In [None]:
# Load SNT metadata
metadata_json <- tryCatch({ jsonlite::fromJSON(file.path(CONFIG_PATH, "SNT_metadata.json")) },
    error = function(e) {
        msg <- paste0("[ERROR] Error while loading metadata", conditionMessage(e))  
        cat(msg)   
        stop(msg) 
    })

log_msg(paste0("SNT metadata loaded from : ", file.path(CONFIG_PATH, "SNT_metadata.json")))

In [None]:
break_vals <- jsonlite::fromJSON(metadata_json$INCIDENCE_CRUDE$SCALE)

log_msg(paste0("Incidence (crude) scale break values loaded from SNT_metadata.json : ", paste(break_vals, collapse = ", ")))

## 2. Load data

#### 2.1. Shapes

In [None]:
# import DHIS2 shapes data
DATASET_DHIS2 <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_FORMATTED
shapes_data <- tryCatch({ get_latest_dataset_file_in_memory(DATASET_DHIS2, paste0(COUNTRY_CODE, "_shapes.geojson")) }, 
                  error = function(e) {
                      msg <- paste("Error while loading DHIS2 Shapes data for: " , COUNTRY_CODE, conditionMessage(e))
                      cat(msg)
                      stop(msg)
                      })

#### 2.2. Pyramid
This is needed to add back the `*_NAME` cols to the main data <br>
(Because normally we only output tables with the `*_ID` cols)

In [None]:
pyramid_data <- tryCatch({ get_latest_dataset_file_in_memory(DATASET_DHIS2, paste0(COUNTRY_CODE, "_pyramid.parquet")) }, 
                  error = function(e) {
                      msg <- paste("Error while loading DHIS2 Shapes data for: " , COUNTRY_CODE, conditionMessage(e))
                      cat(msg)
                      stop(msg)
                      })

In [None]:
# Keep only relevant cols and rename them to match incidence data
pyramid <- pyramid_data %>%
  select(
    ADM1_ID = all_of(ADMIN_1_ID),
    ADM1_NAME = all_of(ADMIN_1_NAME), 
    ADM2_ID = all_of(ADMIN_2_ID),
    ADM2_NAME = all_of(ADMIN_2_NAME)
  ) %>%
  distinct()

head(pyramid, 3)

#### 2.3. Monthly cases
Needed for <b>coherence checks</b>:
* **TPR** at monthly level over time 
    * Explain changes (or lack thereof) between Crude and Adj1
    * Useful to monitor resistance (or testing behaviour ... ?)
* **Reporting Rate**
    * Explain changes (or lack thereof) between Adj1 and Adj2
* **Indicators** coherence:
    * SUSP > TEST
    * TEST > CONF
    * ... (check and add more ...)


‚ö†Ô∏è Note: **Import** from üìÅ`/data/` folder (not OH Dataset) <br>
TBD if this intermediate file should also be stored in the dataset "SNT_DHIS2_INCIDENCE
" ...

In [None]:
# Import monthly_cases data from 

file_path <- file.path(DATA_PATH, "incidence", paste0(COUNTRY_CODE, "_monthly_cases.parquet"))
monthly_cases <- arrow::read_parquet(file_path)
log_msg(paste0("Monthly cases data loaded from : ", file_path))

dim(monthly_cases)
head(monthly_cases, 3)

In [None]:
# Add _NAME cols by joining with pyramid_data
monthly_cases <- left_join(monthly_cases, pyramid, by = join_by(ADM1_ID, ADM2_ID))

In [None]:
head(monthly_cases, 3)

#### 2.4. Yearly Incidence
Currently, **each execution of the Incidence pipeline adds a new file to the OH Dataset**, where the filename stores the choice of parameters used.<br>
This introduces the issue of having to chose the correct file to import.

For this, we need to **resolve the correct filename**, based on:
1. Pipeline paramters (injected here as well): `COUNTRY_CODE`, `ROUTINE_DATA_CHOICE`
2. Context-derived parameter (based on filename of what available in Dataset): `REPORTING_RATE_METHOD`

In [None]:
COUNTRY_CODE
ROUTINE_DATA_CHOICE

DATA_PATH # in previous nb (code) was in lower key (data_path)

**Note**: `REPORTING_RATE_METHOD` this is NOT a parameter!<br>
The method is derived based on what is available in the dataset `config_json$SNT_DATASET_IDENTIFIERS$DHIS2_REPORTING_RATE`

In [None]:
# Define dataset and file names (based on parameter)
rr_dataset_name <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_REPORTING_RATE
file_name_de <- paste0(COUNTRY_CODE, "_reporting_rate_dataelement.parquet")
file_name_ds <- paste0(COUNTRY_CODE, "_reporting_rate_dataset.parquet")

# Determine REPORTING_RATE_METHOD based on available file names in the dataset (without loading files)
dataset_last_version <- openhexa$workspace$get_dataset(rr_dataset_name)$latest_version
files_iter <- dataset_last_version$files

files <- list()
repeat {
  file <- tryCatch(
    py_to_r(iter_next(files_iter)),
    error = function(e) NULL
  )
  if (is.null(file)) break
  files <- append(files, list(file))
}

filenames <- sapply(files, function(f) f$filename)

if (file_name_de %in% filenames) {
  REPORTING_RATE_METHOD <- "dataelement"
} else if (file_name_ds %in% filenames) {
  REPORTING_RATE_METHOD <- "dataset"
} else {
  stop(glue("[ERROR] Neither reporting rate file found for: {COUNTRY_CODE}"))
}

log_msg(paste0("Determined REPORTING_RATE_METHOD: ", REPORTING_RATE_METHOD))

In [None]:
# Resolving filename to import based on parameters 
# to ensure it imports what created by the incidence calculation step (code nb)
filename_to_import <- glue::glue(
    "{COUNTRY_CODE}_incidence_year_routine-data-{ROUTINE_DATA_CHOICE}_rr-method-{REPORTING_RATE_METHOD}.parquet"
  )

log_msg(paste0("Importing yearly incidence data from file : ", filename_to_import))

In [None]:
yearly_incidence <- tryCatch({ get_latest_dataset_file_in_memory(DATASET_NAME, filename_to_import) }, 
                  error = function(e) {
                      msg <- paste("Error while loading seasonality file for: " , COUNTRY_CODE, conditionMessage(e))
                      cat(msg)
                      stop(msg)
                      })

dim(yearly_incidence)
head(yearly_incidence, 3)

## Plot settings

### üé® Dynamic categories and color assignement

##### 1. Define breaks and labels

In [None]:
# Safety code to avoid breaking if nothings is fund in json_metadata
if (is.null(break_vals) || length(break_vals) == 0) {
    log_msg("[WARNING] No break values found in SNT_metadata.json for INCIDENCE_CRUDE$SCALE. Using default values.", "warning")
    break_vals <- c(100, 250, 450, 1000)
}

In [None]:
# 1. Define  breaks
# Note: assumes that the data starts at 0!

# 2. Create the full set of cut points (0 to Infinity)
full_breaks <- c(0, break_vals, Inf)

# 3. Create dynamic labels
labels <- c(
  paste0("< ", break_vals[1]),                                      # First label
  paste0(break_vals[-length(break_vals)], "-", break_vals[-1]),     # Middle labels
  paste0("> ", break_vals[length(break_vals)])                       # Last label
)

# Check
labels

##### 3. Pick appropriate palette

In [None]:
# Count nr of breaks
nr_of_colors <- length(labels)

# nr_of_colors
palette_to_use <- get_range_from_count(nr_of_colors)

# # Need to make palettes as named vectors so that scale_color_manual() and scale_fill_manual() can use them properly
# # Note: need to reverse order of labels to match the palette order "meaning" (red "" should correcpond to lowest value)
# names(palette_to_use) <- rev(labels)

print(palette_to_use)


#### Define plot size

In [None]:
options(repr.plot.width = 20, repr.plot.height = 12)

## Coherence checks

See Jira: https://bluesquare.atlassian.net/browse/SNT25-272

#### 1. TPR

In [None]:
# Calculate yearly TPR to be added on top of the monthly TPR plots
monthly_cases_yearly <- monthly_cases %>%
    group_by(ADM1_NAME, ADM2_ID, ADM2_NAME, YEAR) %>%  
    mutate(
        CONF_yearly = sum(CONF, na.rm = TRUE),
        TEST_yearly = sum(TEST, na.rm = TRUE)
    ) %>%
    ungroup() %>%
    mutate(
      TPR_yearly = ifelse(!is.na(CONF_yearly) & !is.na(TEST_yearly) & (TEST_yearly != 0), CONF_yearly / TEST_yearly, 1)
    ) 

head(monthly_cases_yearly)

##### 1.1. TPR (monthly) over time

In [None]:
ggplot(monthly_cases_yearly) +
# Monthly TPR lines
  geom_line(
    aes(x = MONTH, y = TPR, group = ADM2_NAME),
    color = "grey21",
    alpha = 0.75) +
  facet_grid(
    cols = vars(YEAR), rows = vars(ADM1_NAME),
    switch = "y") +
  scale_x_continuous(breaks = seq(1,12,1)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1L), limits = c(0, 1)) +
  geom_hline(
    yintercept = 0,
    color = "grey21",
    linewidth = 0.5
  ) +
  labs(
    title = "Taux de Positivit√© des Tests (TPR) pour ADM2 et mois"  ) +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    strip.placement = "outside",
    strip.background = element_rect(fill = "grey21"),
    strip.text = element_text(color = "white"),
    axis.title.y = element_blank()
  )

ggsave(
    file.path(FIGURES_PATH, glue::glue("TPR_monthly_{DISAGGREGATION_SELECTION_SUFFIX}.png")),
    create.dir = TRUE,
    bg = "white",
    units = "cm",
    width = 21,
    height = 29.7,
    dpi = 200)

##### 1.2. TPR (monthly & yearly) over time

In [None]:

# Add layer of yearly TPR on top (actually underneath) of monthly TPR

ggplot(monthly_cases_yearly) +
# Yearly TPR lines
  geom_line(
    aes(x = MONTH, y = TPR_yearly, group = ADM2_NAME), 
    color = "grey21",
    alpha = 0.25,
    linewidth = 0.5) +
# Monthly TPR lines
  geom_line(
    aes(x = MONTH, y = TPR, group = ADM2_NAME),
    color = "grey21",
    alpha = 0.75) +
  facet_grid(
    cols = vars(YEAR), rows = vars(ADM1_NAME),
    switch = "y") +
  scale_x_continuous(breaks = seq(1,12,1)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1L), limits = c(0, 1)) +
  geom_hline(
    yintercept = 0,
    color = "grey21",
    linewidth = 0.5
  ) +
  labs(
    title = "Taux de Positivit√© des Tests (TPR) pour ADM2 at pour mois et ann√©e",
    subtitle = "Les valeurs agr√©g√©es par ann√©e sont indiqu√©es comme lignes horizontales.") +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    strip.placement = "outside",
    strip.background = element_rect(fill = "grey21"),
    strip.text = element_text(color = "white"),
    axis.title.y = element_blank()
  )

ggsave(
    file.path(FIGURES_PATH, glue::glue("TPR_monthly_yearly_{DISAGGREGATION_SELECTION_SUFFIX}.png")),
    create.dir = TRUE,
    bg = "white",
    units = "cm",
    width = 21,
    height = 29.7,
    dpi = 200)

#### 2. RR
For more detailas, check **report** notebooks for reporting rate of used method. Possible options:
* **Dataset**: pipelines/snt_dhis2_reporting_rate_dataset/reporting/outputs/**snt_dhis2_reporting_rate_dataset_report**\_OUTPUT\_\*.ipynb
* **DataElement**: work in progress ...

‚ö†Ô∏è‚ö†Ô∏è‚ö†Ô∏è **TO DO**: align code here with report notebook pf reporting rate (use "üé® NEW dynamic colors & breaks" approach) ‚ö†Ô∏è‚ö†Ô∏è‚ö†Ô∏è

In [None]:
# Tile plot faceted by YEAR
ggplot(data = monthly_cases) +
  geom_tile(aes(x = MONTH,
                y = forcats::fct_rev(ADM2_NAME),
                # fill = REPORTING_RATE_CATEGORY
                fill = REPORTING_RATE
                ), 
                color = "white",
                show.legend = TRUE,
                # Fill NA values with white
                na.rm = FALSE
                ) +
#   scale_fill_manual(
#       values = palette_to_use, # üé® NEW dynamic colors & breaks!
#       na.value = "white",
#       name = "Reporting Rate: "
#     ) +
  scale_fill_viridis_c(
      option = "viridis",
      na.value = "white",
      name = "Reporting Rate:",
      direction = -1
      # labels = scales::percent_format(accuracy = 1L)
    ) +
  scale_x_continuous(breaks = seq(1, 12, 1)) +
  facet_grid(rows = vars(ADM1_NAME), cols = vars(YEAR), 
    scales = "free_y", space = "free_y",
    switch = "y") +
  theme_minimal() +
  theme(
    plot.subtitle = element_text(margin=margin(0,0,20,0)),
    legend.position = "bottom",
    legend.key.height = unit(0.25, "cm"),
    axis.text.x = element_text(size = 7),
    axis.title.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    strip.placement = "outside",    
    strip.text = element_text(color = "white", face = "bold", size = 10),
    strip.background = element_rect(fill = "grey21")
  ) +
  guides(fill = guide_legend(nrow = 1))

# Export plot as png
ggsave(
    file.path(FIGURES_PATH, glue::glue("ReportingRate_heatmap_monthly_{DISAGGREGATION_SELECTION_SUFFIX}.png")),
    create.dir = TRUE,
    bg = "white",
    units = "cm",
    width = 21,
    height = 29.7,
    dpi = 200)

In [None]:
# Check on data completeness for REPORTING RATE data: 
# check how many values of REPORTING_RATE are NA
na_count <- sum(is.na(monthly_cases$REPORTING_RATE))     
if (na_count > 0) {
    log_msg(glue("‚ö†Ô∏è Warning: Reporting Rate data contains {na_count} missing values (NA) in 'REPORTING_RATE' column."), "warning")
} else {
    log_msg("‚úÖ Reporting Rate data contains no missing values (NA) in 'REPORTING_RATE' column.")
}

### 3. Coherence checks on Incidence: Scatter plots

Logic: each level of adjustment should produce values that are greater (or equal) to the previous level.<br>

Namely:
* Crude <= Adj1
* Adj1 <= Adj2
* Adj2 <= Adj3

Given than Crude, Adj1, Adj2, and Adj3 are calculated by aggregating `CONF`, `N1`, `N2`, and `N3` at ADM2 x YEAR, we can first verify that the relationship between these values is coherent. Namely, check if
* `CONF` <= `N1`
* `N1` <= `N2`
* `N2` <= `N3` 

#### 3.1. Incidence "metrics"
Metrics used to calculate incidence: `CONF`, `N1`, `N2`, (and `N3`)

In [None]:
# CONF vs N1 

# Create warning message if there are CONF values greater than N1
conf_greater_n1_count <- sum(monthly_cases$CONF > monthly_cases$N1, na.rm = TRUE)
if (conf_greater_n1_count > 0) {
    warning_text <- glue("‚úò Warning: There are {conf_greater_n1_count} instances where CONF is greater than N1.", "warning")
} else {
    warning_text <- "‚úî All CONF values are less than or equal to N1."
}

ggplot(data = monthly_cases) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
  geom_point(
    aes(
      x = N1,
      y = CONF),
    alpha = 0.5) +
  labs(
    title = "CONF vs N1",
    subtitle = "N1 is expected to be greater or equal to CONF",
    caption = warning_text
    ) +
  theme_minimal() +
  theme(
    aspect.ratio = 1,
    plot.caption.position = "plot",
    plot.caption = element_text(hjust = 0)
  )

In [None]:
# N1 > N2

# Create warning message if there are N1 values greater than N2
n1_greater_n2_count <- sum(monthly_cases$N1 > monthly_cases$N2, na.rm = TRUE)
if (n1_greater_n2_count > 0) {
    warning_text <- glue("‚úò Warning: There are {n1_greater_n2_count} instances where N1 is greater than N2.", "warning")
} else {
    warning_text <- "‚úî All N1 values are less than or equal to N2."
}

ggplot(data = monthly_cases) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
  geom_point(
    aes(
      x = N2,
      y = N1),
    alpha = 0.5) +
  labs(title = "N1 vs N2",
       subtitle = "N2 is expected to be greater or equal to N1.",
       caption = warning_text
       ) +
  theme_minimal() +
  theme(
    aspect.ratio = 1,
    plot.caption.position = "plot",
    plot.caption = element_text(hjust = 0)
  )

#### 3.2. Incidence values
Actual (calculated) incidence: Crude, Adj1, Adj2, Adj3

In [None]:
# Add col to mark cases where INCIDENCE_ADJ_TESTING < INCIDENCE_CRUDE so that it is displayed in red in the plot
yearly_incidence_plot <- yearly_incidence %>%
  mutate(
    FLAG_CRUDE_VS_ADJTEST = ifelse(INCIDENCE_ADJ_TESTING < INCIDENCE_CRUDE, TRUE, FALSE),
    FLAG_ADJTEST_VS_ADJREP = ifelse(INCIDENCE_ADJ_REPORTING < INCIDENCE_ADJ_TESTING, TRUE, FALSE)
  )

if ("INCIDENCE_ADJ_CARESEEKING" %in% colnames(yearly_incidence) && any(!is.na(yearly_incidence$INCIDENCE_ADJ_CARESEEKING))) {
    # Create col to flag cases where INCIDENCE_ADJ_TESTING > INCIDENCE_ADJ_CARESEEKING
    yearly_incidence_plot <- yearly_incidence_plot %>%
      mutate(
        FLAG_ADJTEST_VS_ADJCARE = ifelse(INCIDENCE_ADJ_TESTING > INCIDENCE_ADJ_CARESEEKING, TRUE, FALSE)
      )
}

head(yearly_incidence_plot)

##### Crude vs Adj for Testing (Adj1)

In [None]:
# Create warning message if there are INCIDENCE_CRUDE values greater than INCIDENCE_ADJ_TESTING
incidence_crude_greater_adj1_count <- sum(yearly_incidence_plot$FLAG_CRUDE_VS_ADJTEST, na.rm = TRUE)  
if (incidence_crude_greater_adj1_count > 0) {
    warning_text <- glue("‚úò Attention: il y a {incidence_crude_greater_adj1_count} instances o√π INCIDENCE_CRUDE est sup√©rieure √† INCIDENCE_ADJ_TESTING.", "warning")
} else {
    warning_text <- "‚úî Toutes les valeurs INCIDENCE_CRUDE sont inf√©rieures ou √©gales √† INCIDENCE_ADJ_TESTING."
}

# Plot with points colored based on FLAG_CRUDE_VS_ADJTEST and faceted by YEAR
ggplot(data = yearly_incidence_plot) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "black") +
  geom_point(
    aes(
      x = INCIDENCE_CRUDE,
      y = INCIDENCE_ADJ_TESTING,
      color = FLAG_CRUDE_VS_ADJTEST),
    alpha = 0.7,
    size = 2) +
  scale_color_manual(
    values = c("FALSE" = "black", "TRUE" = "red")
  ) +
  scale_x_continuous(limits = c(0, NA), breaks = c(0, break_vals)) +
  scale_y_continuous(limits = c(0, NA), breaks = c(0, break_vals)) +
  facet_wrap(vars(YEAR), nrow = 1) +
  labs(
    title = "INCIDENCE_CRUDE vs INCIDENCE_ADJ_TESTING",
    subtitle = warning_text,
    caption = glue::glue("M√©thode de calcul de N1: {N1_METHOD}.\nUtilisation de la population ajust√©e: {USE_ADJUSTED_POPULATION}.\nDonn√©es de routine: {ROUTINE_DATA_CHOICE}.\nM√©thode de d√©tection des valeurs aberrantes: {OUTLIER_DETECTION_METHOD}.\nTaux de d√©claration calcul√© selon la m√©thode : {REPORTING_RATE_METHOD}.\nUtilisation des donn√©es CSB: {USE_CSB_DATA}.")
    ) +
  theme_minimal() +
  theme(
    aspect.ratio = 1,
    legend.position = "none",
    strip.text = element_text(face = "bold", size = 10),
    panel.grid.minor = element_blank(),
    plot.caption = element_text(size = 7, hjust = 0)
  )

# Export plots as png
ggsave(
    file.path(FIGURES_PATH, glue::glue("Incidence_year_crude_vs_adj_testing_{DISAGGREGATION_SELECTION_SUFFIX}.png")),
    create.dir = TRUE,
    bg = "white",
    units = "cm",
    width = 25,
    height = 12.5,
    dpi = 200)

##### Adj for Testing (Adj1) vs Adj for Reporting (Adj2)

In [None]:
# Create warning message if there are INCIDENCE_ADJ_TESTING values greater than INCIDENCE_ADJ_REPORTING
incidence_adj1_greater_adj2_count <- sum(yearly_incidence_plot$FLAG_ADJTEST_VS_ADJREP, na.rm = TRUE)  
if (incidence_adj1_greater_adj2_count > 0) {
    warning_text <- glue("‚úò Attention: il y a {incidence_adj1_greater_adj2_count} instances o√π INCIDENCE_ADJ_TESTING est sup√©rieure √† INCIDENCE_ADJ_REPORTING.", "warning")
} else {
    warning_text <- "‚úî Toutes les valeurs INCIDENCE_ADJ_TESTING sont inf√©rieures ou √©gales √† INCIDENCE_ADJ_REPORTING."
}

# Plot with points colored based on FLAG_ADJTEST_VS_ADJREP and faceted by YEAR
ggplot(data = yearly_incidence_plot) +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "black") +
    geom_point(
        aes(
        x = INCIDENCE_ADJ_TESTING,
        y = INCIDENCE_ADJ_REPORTING,
        color = FLAG_ADJTEST_VS_ADJREP),
        alpha = 0.7,
        size = 2) +
    scale_color_manual(
        values = c("FALSE" = "black", "TRUE" = "red")
    ) +
    scale_x_continuous(limits = c(0, NA), breaks = c(0, break_vals)) +
    scale_y_continuous(limits = c(0, NA), breaks = c(0, break_vals)) +
    facet_wrap(vars(YEAR), nrow = 1) +
    labs(
        title = "INCIDENCE_ADJ_TESTING vs INCIDENCE_ADJ_REPORTING",
        subtitle = warning_text,
        caption = glue::glue("M√©thode de calcul de N1: {N1_METHOD}.\nUtilisation de la population ajust√©e: {USE_ADJUSTED_POPULATION}.\nDonn√©es de routine: {ROUTINE_DATA_CHOICE}.\nM√©thode de d√©tection des valeurs aberrantes: {OUTLIER_DETECTION_METHOD}.\nTaux de d√©claration calcul√© selon la m√©thode : {REPORTING_RATE_METHOD}.\nUtilisation des donn√©es CSB: {USE_CSB_DATA}.")
        ) +
    theme_minimal() +
    theme(
        aspect.ratio = 1,
        legend.position = "none",
        strip.text = element_text(face = "bold", size = 10),
        panel.grid.minor = element_blank(),
        plot.caption = element_text(size = 7, hjust = 0)
    )

# Export plots as png
ggsave(
    file.path(FIGURES_PATH, glue::glue("Incidence_year_adj_testing_vs_adj_reporting_{DISAGGREGATION_SELECTION_SUFFIX}.png")),
    create.dir = TRUE,
    bg = "white",
    units = "cm",
    width = 25,
    height = 12.5,
    dpi = 200)

##### Adj for Reporting (Adj2) vs Adj for Care Seeking Behaviour (Adj3)

In [None]:
if ("INCIDENCE_ADJ_CARESEEKING" %in% colnames(yearly_incidence) && any(!is.na(yearly_incidence$INCIDENCE_ADJ_CARESEEKING))) {

    # Create warning message if there are INCIDENCE_ADJ_TESTING values greater than INCIDENCE_ADJ_CARESEEKING
    incidence_adj2_greater_adj3_count <- sum(yearly_incidence$INCIDENCE_ADJ_TESTING > yearly_incidence$INCIDENCE_ADJ_CARESEEKING, na.rm = TRUE)  
    if (incidence_adj2_greater_adj3_count > 0) {
       warning_text <- glue("‚úò Attention: il y a {incidence_adj2_greater_adj3_count} instances o√π INCIDENCE_ADJ_TESTING est sup√©rieure √† INCIDENCE_ADJ_CARESEEKING.", "warning")
    } else {
       warning_text <- "‚úî Toutes les valeurs INCIDENCE_ADJ_TESTING sont inf√©rieures ou √©gales √† INCIDENCE_ADJ_CARESEEKING."
    }

    # Plot with points colored based on FLAG_ADJTEST_VS_ADJREP and faceted by YEAR
    ggplot(data = yearly_incidence_plot2) +
        geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "black") +
        geom_point(
            aes(
            x = INCIDENCE_ADJ_TESTING,
            y = INCIDENCE_ADJ_CARESEEKING,
            color = FLAG_ADJTEST_VS_ADJCARE),
            alpha = 0.7,
            size = 2) +
        scale_color_manual(
            values = c("FALSE" = "black", "TRUE" = "red")
        ) +
        scale_x_continuous(limits = c(0, NA), breaks = c(0, break_vals)) +
        scale_y_continuous(limits = c(0, NA), breaks = c(0, break_vals)) +
        facet_wrap(vars(YEAR), nrow = 1) +
        labs(
            title = "INCIDENCE_ADJ_TESTING vs INCIDENCE_ADJ_CARESEEKING",
            subtitle = warning_text,
            caption = glue::glue("M√©thode de calcul de N1: {N1_METHOD}.\nUtilisation de la population ajust√©e: {USE_ADJUSTED_POPULATION}.\nDonn√©es de routine: {ROUTINE_DATA_CHOICE}.\nM√©thode de d√©tection des valeurs aberrantes: {OUTLIER_DETECTION_METHOD}.\nTaux de d√©claration calcul√© selon la m√©thode : {REPORTING_RATE_METHOD}.\nUtilisation des donn√©es CSB: {USE_CSB_DATA}.")
            ) +
        theme_minimal() +
        theme(
            aspect.ratio = 1,
            legend.position = "none",
            strip.text = element_text(face = "bold", size = 10),
            panel.grid.minor = element_blank(),
            plot.caption = element_text(size = 7, hjust = 0)
        )

    # Export plots as png
    ggsave(
        file.path(FIGURES_PATH, glue::glue("Incidence_year_adj_testing_vs_adj_careseeking_{DISAGGREGATION_SELECTION_SUFFIX}.png")),
        create.dir = TRUE,
        bg = "white",
        units = "cm",
        width = 25,
        height = 12.5,
        dpi = 200)

}

## Incidence du paludisme par ann√©e par district sanitaire

#### Puor annee et niveau d'ajustement

In [None]:
# Step 1: Prepare long-form data
incidence_long <- yearly_incidence  %>% # incidence_data
  select(ADM2_ID, YEAR, POPULATION,
         INCIDENCE_CRUDE,
         INCIDENCE_ADJ_TESTING,
         INCIDENCE_ADJ_REPORTING,
         INCIDENCE_ADJ_CARESEEKING) %>%
  pivot_longer(
    cols = starts_with("INCIDENCE"),
    names_to = "INCIDENCE_TYPE",
    values_to = "incidence"
  ) %>%
  mutate(
    incidence_type_label = case_when(
      INCIDENCE_TYPE == "INCIDENCE_CRUDE"             ~ "Brute",
      INCIDENCE_TYPE == "INCIDENCE_ADJ_TESTING"       ~ "Ajust√©e 1\n(Test)",
      INCIDENCE_TYPE == "INCIDENCE_ADJ_REPORTING"     ~ "Ajust√©e 2\n(Test + Compl√©tude)",
      INCIDENCE_TYPE == "INCIDENCE_ADJ_CARESEEKING"   ~ "Ajust√©e 3\n(Test + Compl√©tude + Soins)",
      TRUE ~ INCIDENCE_TYPE
    )
  )

# Reorder incidence_type_label for plotting
incidence_long$incidence_type_label <- factor(
incidence_long$incidence_type_label,
levels = c("Brute", "Ajust√©e 1\n(Test)", "Ajust√©e 2\n(Test + Compl√©tude)", "Ajust√©e 3\n(Test + Compl√©tude + Soins)")
)
# # Remove INCIDENCE_ADJ_CARESEEKING if this is all empty ...
# filter(!is.na(incidence))


# Step 2: Join with shapefile
map_data_long <- shapes_data %>%
  left_join(incidence_long, by = "ADM2_ID")


# Step 3: categorize incidence based on break values from metadata
map_data_long <- map_data_long %>%
  mutate(
    INCIDENCE_CATEGORY = cut(
      incidence,
      breaks = full_breaks,
      labels = labels,
      right = TRUE, # so that 1.00 is assigned to "0.95 - 1.00"
      include.lowest = TRUE
    )
  )

In [None]:
options(repr.plot.width = 20, repr.plot.height = 12)

# Dynamically define subtitle text (handle `is.null(DISAGGREGATION_SELECTION)` so it disaplys TOTAL instead)
if (is.null(DISAGGREGATION_SELECTION)) {    
    subtitle_text <- "Brute et ajust√©e selon les √©tapes OMS.\nAucune d√©sagr√©gation sp√©cifique s√©lectionn√©e."
} else {
    subtitle_text <- glue::glue("Brute et ajust√©e selon les √©tapes OMS.\nD√©sagr√©gation utilis√©e: {DISAGGREGATION_SELECTION}.")
}

# Plot maps faceted by incidence type and year
ggplot(map_data_long) +
  geom_sf(aes(fill = INCIDENCE_CATEGORY), color = "white", size = 0.2) +
  facet_grid(
    rows = vars(incidence_type_label),
    cols = vars(YEAR)
    ) +
  scale_fill_manual(values = palette_to_use, name = "Incidence (pour 1000)") +
  labs(
    title = "Incidence annuelle du paludisme par district sanitaire",
    subtitle = subtitle_text,
    # caption = glue::glue("M√©thode de calcul de N1: {N1_METHOD}.\nUtilisation de la population ajust√©e: {USE_ADJUSTED_POPULATION}.\nDonn√©es de routine: {ROUTINE_DATA_CHOICE}.\nD√©tection des valeurs aberrantes: {OUTLIER_DETECTION_METHOD}.\nUtilisation des donn√©es CSB: {USE_CSB_DATA}.")
    caption = glue::glue("M√©thode de calcul de N1: {N1_METHOD}.\nUtilisation de la population ajust√©e: {USE_ADJUSTED_POPULATION}.\nDonn√©es de routine: {ROUTINE_DATA_CHOICE}.\nM√©thode de d√©tection des valeurs aberrantes: {OUTLIER_DETECTION_METHOD}.\nTaux de d√©claration calcul√© selon la m√©thode : {REPORTING_RATE_METHOD}.\nUtilisation des donn√©es CSB: {USE_CSB_DATA}.")
  ) +
  theme_minimal(base_size = 14) +
  theme(
    strip.text = element_text(face = "bold", size = 12),
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 13),
    plot.caption = element_text(size = 7, hjust = 0),
    legend.position = "right",
    legend.justification = "top",
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
  )

ggsave(
  file.path(FIGURES_PATH, 
  glue::glue("Incidence_faceted_year_adjustment_{DISAGGREGATION_SELECTION_SUFFIX}.png")),
  create.dir = TRUE,
  units = "cm",
  width = 31,
  height = 31,
  dpi = 200
  )

#### Moyenne annuelle (toutes ann√©es confondues)

In [None]:
# Summarize incidence_long by computing mean incidence per INCIDENCE_TYPE and ADM2_ID across all years
incidence_long_mean <- incidence_long  %>% 
select(-POPULATION) %>%
# Added 20260128
group_by(ADM2_ID, INCIDENCE_TYPE,	incidence_type_label) |>
summarise(
  across(starts_with("INCIDENCE"), ~mean(., na.rm = TRUE)), # üîç pox PROBLEM here: if missing data for RR -> sum of N2 by YEAR is smaller than the sum of N1 !
  .groups = "drop"
        ) 

# Step 2: Join with shapefile
map_data_long_mean <- shapes_data %>%
  left_join(incidence_long_mean, by = "ADM2_ID")


# Step 3: categorize incidence based on break values from metadata
map_data_long_mean <- map_data_long_mean %>%
  mutate(
    INCIDENCE_CATEGORY = cut(
      incidence,
      breaks = full_breaks,
      labels = labels,
      right = TRUE, # so that 1.00 is assigned to "0.95 - 1.00"
      include.lowest = TRUE
    )
  )

# head(map_data_long_mean)

In [None]:
subtitle_text_mean <- if (is.null(DISAGGREGATION_SELECTION)) {
    "Moyenne annuelle (toutes ann√©es confondues).\nAucune d√©sagr√©gation sp√©cifique s√©lectionn√©e."
} else {
    glue::glue("Moyenne annuelle (toutes ann√©es confondues).\nD√©sagr√©gation utilis√©e: {DISAGGREGATION_SELECTION}.")
}

# Plot maps faceted by incidence type
ggplot(map_data_long_mean) +
  geom_sf(aes(fill = INCIDENCE_CATEGORY), color = "white", size = 0.2) +
  facet_wrap(
    ~incidence_type_label,
    nrow = 1
    ) +
  scale_fill_manual(values = palette_to_use, name = "Incidence (pour 1000)") +
  labs(
    title = "Incidence moyenne du paludisme par district sanitaire",
    subtitle = subtitle_text_mean,
    caption = glue::glue("M√©thode de calcul de N1: {N1_METHOD}.\nUtilisation de la population ajust√©e: {USE_ADJUSTED_POPULATION}.\nDonn√©es de routine: {ROUTINE_DATA_CHOICE}.\nM√©thode de d√©tection des valeurs aberrantes: {OUTLIER_DETECTION_METHOD}.\nTaux de d√©claration calcul√© selon la m√©thode : {REPORTING_RATE_METHOD}.\nUtilisation des donn√©es CSB: {USE_CSB_DATA}.")
  ) +
  theme_minimal(base_size = 14) +
  theme(
    strip.text = element_text(face = "bold", size = 12),
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 13),
    plot.caption = element_text(size = 7, hjust = 0),
    legend.position = "right",
    legend.justification = "top",
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
  )


# Export plots as png
YEAR_RANGE <- paste0(min(yearly_incidence$YEAR), "-", max(yearly_incidence$YEAR))
ggsave(
        file.path(FIGURES_PATH, 
        glue::glue("Incidence_faceted_adjustment_{DISAGGREGATION_SELECTION_SUFFIX}_mean-{YEAR_RANGE}.png")),
        create.dir = TRUE,
        bg = "white",
        units = "cm",
        width = 41,
        height = 21,
        dpi = 200
)