### 1. Setup

In [None]:
# Project paths
SNT_ROOT_PATH <- "/home/hexa/workspace" 
REPORTING_NB_OUTPUTS_PATH <- file.path(SNT_ROOT_PATH, "pipelines/snt_dhis2_reporting_rate_dataelement/reporting/outputs")
CODE_PATH <- file.path(SNT_ROOT_PATH, 'code') # this is where we store snt_utils.r
CONFIG_PATH <- file.path(SNT_ROOT_PATH, 'configuration') # .json config file
DATA_PATH <- file.path(SNT_ROOT_PATH, 'data', 'dhis2')  

# Load utils
source(file.path(CODE_PATH, "snt_utils.r"))
source(file.path(CODE_PATH, "snt_palettes.r"))

# Load libraries 
required_packages <- c("arrow", "tidyverse", "stringi", "jsonlite", "httr", "reticulate", "glue")
install_and_load(required_packages)

# Environment variables
Sys.setenv(PROJ_LIB = "/opt/conda/share/proj")
Sys.setenv(GDAL_DATA = "/opt/conda/share/gdal")
Sys.setenv(RETICULATE_PYTHON = "/opt/conda/bin/python")

# Load OpenHEXA sdk
openhexa <- import("openhexa.sdk")

#### 1.1. Load and check `snt config` file

In [None]:
# Load SNT config
config_json <- tryCatch({ jsonlite::fromJSON(file.path(CONFIG_PATH, "SNT_config.json")) },
    error = function(e) {
        msg <- paste0("[ERROR] Error while loading configuration", conditionMessage(e))  
        cat(msg)   
        stop(msg) 
    })

log_msg(paste0("SNT configuration loaded from : ", file.path(CONFIG_PATH, "SNT_config.json")))

In [None]:
# Configuration settings
COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE
ADMIN_1 <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_1)
ADMIN_2 <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_2)

# Reporting Rate data is stored in the same OH Dataset regardless of whether it comes from DataSet or DataElement method
REPORTING_RATE_DATASET_NAME <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_REPORTING_RATE

DHIS2_FORMATTED_DATASET_NAME <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_FORMATTED

In [None]:
REPORTING_RATE_DATASET_NAME

### 2. Load Data

#### 2.1. Output of pipeline notebook
Import file named `{COUNTRY_CODE}_reporting_rate_dataset.parquet` from **OH Dataset** "SNT_DHIS2_REPORTING_RATE" (as in `config_json$SNT_DATASET_IDENTIFIERS$DHIS2_REPORTING_RATE`)

In [None]:

rr_filename <- glue::glue("{COUNTRY_CODE}_reporting_rate_dataelement.parquet")

reporting_rate <- tryCatch({ get_latest_dataset_file_in_memory(REPORTING_RATE_DATASET_NAME, rr_filename) }, 
                  error = function(e) {
                      msg <- paste("Error while loading Reporting Rate (DataElement) data file: " , rr_filename, "\n", # conditionMessage(e))
                      "Make sure you have run the `snt_dhis2_reporting_rate_dataelement` (Data Element Reporting rate) pipeline to generate the required data file.")  # log error message
                      cat(msg)
                      stop(msg)
})

# log
log_msg(glue::glue("Data file loaded from dataset : {REPORTING_RATE_DATASET_NAME} dataframe dimensions: {paste(dim(reporting_rate), collapse=', ')}"))

dim(reporting_rate)
head(reporting_rate, 2)

#### 2.2. Shapes
To make choropleth (map)

In [None]:
shapes <- tryCatch({ get_latest_dataset_file_in_memory(DHIS2_FORMATTED_DATASET_NAME, paste0(COUNTRY_CODE, "_shapes.geojson")) }, 
                  error = function(e) {                      
                      msg <- paste0(COUNTRY_CODE , " Shapes data is not available in dataset: '" , DHIS2_FORMATTED_DATASET_NAME, "' last version.")
                      log_msg(msg, "warning")
                      shapes <- NULL
                      })

log_msg(glue::glue("Shapes loaded from dataset: '{DHIS2_FORMATTED_DATASET_NAME}'. \nDataframe with dimensions: {paste(dim(shapes), collapse=', ')}"))
names(shapes)

### 3. Plots

In [None]:
# ‚ö†Ô∏è To be updated to dynamic bins (scale, based on SNT_metadata.json) and palettes (snt_palettes.r) !
reporting_rate_palette <- c(
    "0-0.5" = "#d73027",      # Red
    "0.5-0.8" = "#fc8d59",    # Orange
    "0.8-0.9" = "#fee08b",    # Yellow
    "0.9-0.95" = "#d9ef8b",   # Light Green
    "0.95-1.00" = "#91cf60",     # Green
    ">1.00" = "#1a9850"          # Dark Green
    )

In [None]:
# ‚ö†Ô∏è To be updated to dynamic bins (scale, based on SNT_metadata.json) and palettes (snt_palettes.r) !
reporting_rate <- reporting_rate %>%
  mutate(
    REPORTING_RATE_CATEGORY = case_when(
      REPORTING_RATE < 0.5 ~ "0-0.5",
      REPORTING_RATE >= 0.5 & REPORTING_RATE < 0.8 ~ "0.5-0.8",
      REPORTING_RATE >= 0.8 & REPORTING_RATE < 0.9 ~ "0.8-0.9",
      REPORTING_RATE >= 0.9 & REPORTING_RATE < 0.95 ~ "0.9-0.95",
      REPORTING_RATE >= 0.95 & REPORTING_RATE <= 1.00 ~ "0.95-1.00",
      REPORTING_RATE > 1.00 ~ ">1.00",
      TRUE ~ NA_character_
    )
  )

#### 3.1. Scatter plot of RR over time (by ADM2)
This should facilitate the identification of ADM2 and periods in which reporting rate is low ...

In [None]:
ggplot(data = reporting_rate) +
  geom_line(aes(x = MONTH,
                 y = REPORTING_RATE,
                 group = ADM2_ID,
                 color = REPORTING_RATE_CATEGORY), 
                 alpha = 0.3,
                 show.legend = FALSE
                 ) +
  geom_point(aes(x = MONTH,
                 y = REPORTING_RATE,
                 group = ADM2_ID,
                 color = REPORTING_RATE_CATEGORY)) + 
  facet_grid(~YEAR) + 
  scale_color_manual(
      values = reporting_rate_palette,
      na.value = "white",
      name = "Reporting Rate Categories"
    ) +
  scale_x_continuous(breaks = seq(1, 12, 1)) +
  # Use same breaks as palette!
  scale_y_continuous(
    breaks = c(0, 0.5, 0.8, 0.9, 0.95, 1.0), # ‚ö†Ô∏è TO DO: make it automatically based on palette!
    limits = c(0, 1.1)
  ) +
  labs(
    title =  "Reporting Rate (Data Element)",
    x = "Month",
    y = "Reporting Rate\n(Data Element)"  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(margin=margin(0,0,20,0)),
    legend.position = "none",
    # legend.title.position = "top",
    legend.title = element_blank(),
    legend.key.width = unit(3, "cm"),
    legend.key.height = unit(0.25, "cm"),
    axis.title.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank(),
    strip.placement = "outside",
    strip.text = element_text(face = "bold", size = 10)
  )

In [None]:
output_file <- paste0(COUNTRY_CODE, "_reporting_rate_dataelement_adm2_linepoint.png")
output_location <- file.path(REPORTING_NB_OUTPUTS_PATH, "figures")

ggsave(filename = output_file, 
path = output_location, 
create.dir = TRUE,
height = 15,
width = 45,
units = "cm",
bg = "white",
dpi = 200
)

log_msg(glue::glue("üìä Plot (linepoint) saved to: {file.path(output_location, output_file)}"))

#### 3.2. MAP of Reporting Rate with method Dataset

**Format data: join shapes to reporting rate data**

In [None]:
data_to_plot <- reporting_rate %>%
  left_join(shapes, by = c("ADM2_ID"))

In [None]:
ggplot(data = data_to_plot) +
  geom_sf(aes(
     fill = REPORTING_RATE_CATEGORY,
     geometry = geometry), 
     color = "white",
     size = 0.01) +
  scale_fill_manual(
   values = reporting_rate_palette,
   na.value = "white",
   ) +
  theme_void() +
  theme(
   plot.title = element_text(margin=margin(0,0,20,0)),
   legend.position = "bottom",
   legend.title.position = "top",
   # legend.title = element_blank(),
   # legend.key.width = unit(3, "cm"),
   legend.key.height = unit(0.25, "cm")
  ) +
  labs(title = paste("Reporting Rate (Data Element)")) +
  facet_grid(
   rows = vars(YEAR),   
   cols = vars(MONTH),
   switch = "both") 

In [None]:
output_file <- paste0(COUNTRY_CODE, "_reporting_rate_dataelement_adm2_map.png")
output_location <- file.path(REPORTING_NB_OUTPUTS_PATH, "figures")

ggsave(filename = output_file, 
path = output_location, 
create.dir = TRUE,
width = 50, height = 20, units = "cm", 
dpi = 200)

# Add log message
log_msg(glue::glue("üìä Plot (map) saved to: {file.path(output_location, output_file)}"))

#### The End :)

In [None]:
log_msg("üéâ Reporting Rate (Data Element) report notebook completed successfully!")