### 1. Setup

In [None]:
# Project paths
SNT_ROOT_PATH <- "/home/hexa/workspace" 
REPORTING_NB_OUTPUTS_PATH <- file.path(SNT_ROOT_PATH, "pipelines/snt_dhis2_reporting_rate_dataset/reporting/outputs")
CODE_PATH <- file.path(SNT_ROOT_PATH, 'code') # this is where we store snt_utils.r
CONFIG_PATH <- file.path(SNT_ROOT_PATH, 'configuration') # .json config file
DATA_PATH <- file.path(SNT_ROOT_PATH, 'data', 'dhis2')  

# Load utils
source(file.path(CODE_PATH, "snt_utils.r"))
# Load palettes
source(file.path(CODE_PATH, "snt_palettes.r"))

# Load libraries 
required_packages <- c("arrow", "tidyverse", "stringi", "jsonlite", "httr", "reticulate", "glue")
install_and_load(required_packages)

# Environment variables
Sys.setenv(PROJ_LIB = "/opt/conda/share/proj")
Sys.setenv(GDAL_DATA = "/opt/conda/share/gdal")
Sys.setenv(RETICULATE_PYTHON = "/opt/conda/bin/python")

# Load OpenHEXA sdk
openhexa <- import("openhexa.sdk")

#### 1.1. Load and check `snt config` file

In [None]:
# Load SNT config
config_json <- tryCatch({ jsonlite::fromJSON(file.path(CONFIG_PATH, "SNT_config.json")) },
    error = function(e) {
        msg <- paste0("[ERROR] Error while loading configuration", conditionMessage(e))  
        cat(msg)   
        stop(msg) 
    })

log_msg(paste0("SNT configuration loaded from : ", file.path(CONFIG_PATH, "SNT_config.json")))

In [None]:
# Configuration settings
COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE
ADMIN_1 <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_1)
ADMIN_2 <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_2)

REPORTING_RATE_DATASET_NAME <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_REPORTING_RATE
DHIS2_FORMATTED_DATASET_NAME <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_FORMATTED

#### 1.2. Load and check `snt metadata` file
This is needed for the correct use of palettes and categories (breaks, or scale)

In [None]:
# Load SNT metadata
metadata_json <- tryCatch({ jsonlite::fromJSON(file.path(CONFIG_PATH, "SNT_metadata.json")) },
    error = function(e) {
        msg <- paste0("[ERROR] Error while loading metadata", conditionMessage(e))  
        cat(msg)   
        stop(msg) 
    })

log_msg(paste0("SNT metadata loaded from : ", file.path(CONFIG_PATH, "SNT_metadata.json")))

In [None]:
break_vals <- jsonlite::fromJSON(metadata_json$REPORTING_RATE$SCALE)

log_msg(paste0("Reporting Rate scale break values loaded from SNT_metadata.json : ", paste(break_vals, collapse = ", ")))

### 2. Load Data

#### 2.1. Output of pipeline notebook
Import file named `{COUNTRY_CODE}_reporting_rate_dataset.parquet` from **OH Dataset** "SNT_DHIS2_REPORTING_RATE" (as in `config_json$SNT_DATASET_IDENTIFIERS$DHIS2_REPORTING_RATE`)

In [None]:

reporting_rate_dataset <- tryCatch({ get_latest_dataset_file_in_memory(REPORTING_RATE_DATASET_NAME, glue::glue("{COUNTRY_CODE}_reporting_rate_dataset.parquet")) }, 
                  error = function(e) {
                      msg <- paste("Error while loading Reporting Rate (Dataset) data file for: " , COUNTRY_CODE, conditionMessage(e))  # log error message
                      cat(msg)
                      stop(msg)
})

# log
log_msg(glue::glue("Data file loaded from dataset : {REPORTING_RATE_DATASET_NAME} dataframe dimensions: {paste(dim(reporting_rate_dataset), collapse=', ')}"))
dim(reporting_rate_dataset)
head(reporting_rate_dataset, 2)

#### 2.2. Shapes
To make choropleth (map)

In [None]:
shapes <- tryCatch({ get_latest_dataset_file_in_memory(DHIS2_FORMATTED_DATASET_NAME, paste0(COUNTRY_CODE, "_shapes.geojson")) }, 
                  error = function(e) {                      
                      msg <- paste0(COUNTRY_CODE , " Shapes data is not available in dataset: '" , DHIS2_FORMATTED_DATASET_NAME, "' last version.")
                      log_msg(msg, "warning")
                      shapes <- NULL
                      })

log_msg(glue::glue("Shapes loaded from dataset: '{DHIS2_FORMATTED_DATASET_NAME}'. \nDataframe with dimensions: {paste(dim(shapes), collapse=', ')}"))
names(shapes)

### 3. Plots

#### 3.1. ðŸŽ¨ Dynamic category and color assignement

##### 1. Define breaks and labels

In [None]:
# Safety code to avoid breaking if nothings is fund in json_metadata
if (is.null(break_vals) || length(break_vals) == 0) {
    log_msg("[WARNING] No break values found in SNT_metadata.json for REPORTING_RATE$SCALE. Using default values.", "warning")
    break_vals <- c(0.5, 0.8, 0.9, 0.95, 1.00)
}

In [None]:
# 1. Define  breaks
# Note: assumes that the data starts at 0!
# break_vals <- c(0.5, 0.8, 0.9, 0.95, 1.00) # older hardcoded version
# break_vals <- metadata_json$REPORTING_RATE$SCALE # moved upstream

# 2. Create the full set of cut points (0 to Infinity)
full_breaks <- c(0, break_vals, Inf)

# 3. Create dynamic labels
labels <- c(
  paste0("< ", break_vals[1]),                                      # First label
  paste0(break_vals[-length(break_vals)], " - ", break_vals[-1]),     # Middle labels
  paste0("> ", break_vals[length(break_vals)])                       # Last label
)

# Check
labels

##### 2. Create category col

In [None]:
reporting_rate_dataset <- reporting_rate_dataset %>%
  mutate(
    REPORTING_RATE_CATEGORY = cut(
      REPORTING_RATE,
      breaks = full_breaks,
      labels = labels,
      right = TRUE, # so that 1.00 is assigned to "0.95 - 1.00"
      include.lowest = TRUE
    )
  )

##### 3. Pick appropriate palette

In [None]:
# Count nr of breaks
nr_of_colors <- length(labels)

# nr_of_colors
palette_to_use <- get_range_from_count(nr_of_colors)

# Need to make palettes as named vectors so that scale_color_manual() and scale_fill_manual() can use them properly
# Note: need to reverse order of labels to match the palette order "meaning" (red "" should correcpond to lowest value)
names(palette_to_use) <- rev(labels)

palette_to_use


#### 3.2. Scatter plot of RR over time (by ADM2)
This should facilitate the identification of ADM2 and periods in which reporting rate is low ...

In [None]:
# Line point plot faceted by YEAR
ggplot(data = reporting_rate_dataset) +
  geom_line(aes(x = MONTH,
                 y = REPORTING_RATE,
                 group = ADM2_ID,
                 color = REPORTING_RATE_CATEGORY), 
                 alpha = 0.3,
                 show.legend = FALSE
                 ) +
  geom_point(aes(x = MONTH,
                 y = REPORTING_RATE,
                 group = ADM2_ID,
                 color = REPORTING_RATE_CATEGORY)) + 
  facet_grid(~YEAR) + 
  scale_color_manual(
      values = palette_to_use, # ðŸŽ¨ NEW dynamic colors & breaks!
      na.value = "white",
      name = "Reporting Rate Categories"
    ) +
  scale_x_continuous(breaks = seq(1, 12, 1)) +
  scale_y_continuous(
    breaks = c(0, break_vals), # ðŸŽ¨ NEW dynamic colors & breaks!
    # Dynamically set max value to fit actual data (do show values >1 if present)
    limits = c(0, max(reporting_rate_dataset$REPORTING_RATE, na.rm = TRUE) + 0.1)
  ) +
  labs(
    title =  "Reporting Rate (Dataset)",
    x = "Month",
    y = "Reporting Rate\n(Dataset)"  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(margin=margin(0,0,20,0)),
    legend.position = "none",
    # legend.title.position = "top",
    legend.title = element_blank(),
    legend.key.width = unit(3, "cm"),
    legend.key.height = unit(0.25, "cm"),
    axis.title.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank(),
    strip.placement = "outside",
    strip.text = element_text(face = "bold", size = 10)
  )

In [None]:
# Export plot as PNG
ggsave(filename = paste0(COUNTRY_CODE, "_reporting_rate_dataset_adm2_linepoint.png"), 
path = REPORTING_NB_OUTPUTS_PATH, 
height = 15,
width = 45,
units = "cm",
bg = "white",
dpi = 200
)

# Add log message
log_msg(glue::glue("ðŸ“Š Plot (linepoint) saved to: {file.path(REPORTING_NB_OUTPUTS_PATH, paste0(COUNTRY_CODE, '_reporting_rate_dataset_adm2_linepoint.png'))}"))

#### 3.3. MAP of Reporting Rate with method Dataset

In [None]:
# Join shapes to reporting rate data

data_to_plot <- reporting_rate_dataset %>%
  left_join(shapes, by = c("ADM2_ID"))

In [None]:
# Choropleth map with reporting rate data by ADM2
ggplot(data = data_to_plot) +
  geom_sf(aes(
     fill = REPORTING_RATE_CATEGORY,
     geometry = geometry), 
     color = "white",
     size = 0.01) +
  scale_fill_manual(
   values = palette_to_use, # ðŸŽ¨ NEW dynamic colors & breaks!
   na.value = "white",
   ) +
  theme_void() +
  theme(
   plot.title = element_text(margin=margin(0,0,20,0)),
   legend.position = "bottom",
   legend.title = element_blank(),
   legend.key.height = unit(0.25, "cm")
  ) +
  labs(title = paste("Reporting Rate (Dataset)")) +
  facet_grid(
   rows = vars(YEAR),   
   cols = vars(MONTH),
   switch = "both") 

In [None]:
# Export plot as PNG
ggsave(filename = paste0(COUNTRY_CODE, "_reporting_rate_dataset_adm2_map.png"), 
path = REPORTING_NB_OUTPUTS_PATH, 
width = 50, height = 20, units = "cm", 
dpi = 200)

# Add log message
log_msg(glue::glue("ðŸ“Š Plot (map) saved to: {file.path(REPORTING_NB_OUTPUTS_PATH, paste0(COUNTRY_CODE, '_reporting_rate_dataset_adm2_map.png'))}"))

#### The End :)

In [None]:
log_msg("ðŸŽ‰ Reporting Rate (Dataset) report notebook completed successfully!")