# Dataset reporting rate: based on reporting rates extracted from DHIS2

The reporting rate measures how well facilities submit data compared to what is expected, calculated for each administrative level 2 unit and for each period.

Specifically:  

* **Numerator**: the number of facilities that _actually_ reported. This value comes from the selected dataset _ACTUAL_REPORTS_, which is extracted from DHIS2.
    <br>
* **Denominator**:  the number of facilities _expected_ to report. This value comes from the selected dataset _EXPECTED_REPORTS_ indicator extracted from DHIS2.
    <br>
* **Output**: Reporting rate table aggregated at administrative level 2 with extensions csv and parquet saved to dataset **SNT_DHIS2_REPORTING_RATE**:
    * cols: YEAR, MONTH, ADM2_ID, REPORTING_RATE
    * Filename: `XXX_reporting_rate_dataset.<extension>` 

In [None]:
# Parameters
# SNT_ROOT_PATH <- "/home/hexa/workspace" 

## 1. Setup

In [None]:
# Project paths
SNT_ROOT_PATH <- "/home/hexa/workspace" 
CODE_PATH <- file.path(SNT_ROOT_PATH, 'code') # this is where we store snt_utils.r
CONFIG_PATH <- file.path(SNT_ROOT_PATH, 'configuration') # .json config file
DATA_PATH <- file.path(SNT_ROOT_PATH, 'data', 'dhis2')  

# Load utils
source(file.path(CODE_PATH, "snt_utils.r"))

# Load libraries 
required_packages <- c("arrow", "tidyverse", "stringi", "jsonlite", "httr", "reticulate", "glue")
install_and_load(required_packages)

# Environment variables
Sys.setenv(PROJ_LIB = "/opt/conda/share/proj")
Sys.setenv(GDAL_DATA = "/opt/conda/share/gdal")
Sys.setenv(RETICULATE_PYTHON = "/opt/conda/bin/python")

# Load OpenHEXA sdk
openhexa <- import("openhexa.sdk")

#### 1.1. Load and check `snt config` file

In [None]:
# Load SNT config
config_json <- tryCatch({ jsonlite::fromJSON(file.path(CONFIG_PATH, "SNT_config.json")) },
    error = function(e) {
        msg <- paste0("[ERROR] Error while loading configuration", conditionMessage(e))  
        cat(msg)   
        stop(msg) 
    })

log_msg(paste0("SNT configuration loaded from : ", file.path(CONFIG_PATH, "SNT_config.json")))

In [None]:
# Configuration settings
COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE
ADMIN_1 <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_1)
ADMIN_2 <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_2)

# How to treat 0 values (in this case: "SET_0_TO_NA" converts 0 to NAs)
NA_TREATMENT <- config_json$SNT_CONFIG$NA_TREATMENT
DHIS2_INDICATORS <- names(config_json$DHIS2_DATA_DEFINITIONS$DHIS2_INDICATOR_DEFINITIONS)  # Which indicators to use to evaluate "activity" of an HF
REPORTING_RATE_PRODUCT_ID <- config_json$SNT_CONFIG$REPORTING_RATE_PRODUCT_UID  # which reporting rate PRODUCT_UID to use

fixed_cols <- c('PERIOD', 'YEAR', 'MONTH', 'ADM1_ID', 'ADM2_ID', 'OU_ID')
fixed_cols_rr <- c('YEAR', 'MONTH', 'ADM2_ID', 'REPORTING_RATE') # Fixed cols for exporting RR tables

#### 1.2. Validate parameters

In [None]:
# Current options (if present in the dataset): 
# "COUNTRY_CODE_routine.parquet" (RAW data)
# "COUNTRY_CODE_routine_outliers-mean_removed.parquet" 
# "COUNTRY_CODE_routine_outliers-mean_imputed.parquet"
# "COUNTRY_CODE_routine_outliers-median_removed.parquet"
# "COUNTRY_CODE_routine_outliers-median_imputed.parquet"            
# "COUNTRY_CODE_routine_outliers-iqr_removed.parquet"
# "COUNTRY_CODE_routine_outliers-iqr_imputed.parquet"
# "COUNTRY_CODE_routine_outliers-trend_removed.parquet"
# "COUNTRY_CODE_routine_outliers-trend_imputed.parquet" 

if (!exists("ROUTINE_FILE")) ROUTINE_FILE <- glue("{COUNTRY_CODE}_routine.parquet") # raw default

#### 1.3. üîç Check REPORTING_RATE_PRODUCT_ID is configured

In [None]:
# Check REPORTING_RATE_PRODUCT_ID configured
if (is.null(REPORTING_RATE_PRODUCT_ID)) {
    stop("[ERROR] The REPORTING_RATE_PRODUCT_ID is not defined in `SNT_config.json` file.")
}

## 2. Load Data

### 2.1. Load routine data (DHIS2) 
Already formatted routine data, we use this as the master table

In [None]:
# select dataset
if (ROUTINE_FILE == glue("{COUNTRY_CODE}_routine.parquet")) {
    rountine_dataset_name <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_FORMATTED
} else {
    rountine_dataset_name <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_OUTLIERS_IMPUTATION
}

# Load file from dataset
dhis2_routine <- tryCatch({ get_latest_dataset_file_in_memory(rountine_dataset_name, ROUTINE_FILE) }, 
                  error = function(e) {
                      msg <- paste("Error while loading DHIS2 routine data file for: " , COUNTRY_CODE, conditionMessage(e))  # log error message
                      cat(msg)
                      stop(msg)
})

dhis2_routine <- dhis2_routine %>% mutate(across(c(PERIOD, YEAR, MONTH), as.numeric)) # Ensure correct data type for numerical columns 

# log
log_msg(glue("DHIS2 routine file {ROUTINE_FILE} loaded from dataset : {rountine_dataset_name} dataframe dimensions: {paste(dim(dhis2_routine), collapse=', ')}"))
dim(dhis2_routine)
head(dhis2_routine, 2)

### 2.2. Load reporting rate data (DHIS2)

In [None]:
dataset_name <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_FORMATTED
file_name <- paste0(COUNTRY_CODE, "_reporting.parquet")  # reporting file

# Load file from dataset
dhis2_reporting <- tryCatch({ get_latest_dataset_file_in_memory(dataset_name, file_name) }, 
                  error = function(e) {
                      msg <- paste("[ERROR] Error while loading DHIS2 dataset reporting rates file for: " , COUNTRY_CODE, conditionMessage(e))  # log error message
                      cat(msg)
                      stop(msg)
})
dhis2_reporting <- dhis2_reporting %>% mutate(across(c(PERIOD, YEAR, MONTH, VALUE), as.numeric))  # numeric values

# log
msg <- paste0("DHIS2 pre-computed REPORTING data loaded from file `", file_name, "` (from dataset : `", dataset_name, "`). Dataframe dimensions: ", 
              paste(dim(dhis2_reporting), collapse=", "))
log_msg(msg)
head(dhis2_reporting, 3)

## 3. Compute reporting rates

#### 3.1. Build table with `actual` and `expected` reports

Use `dhis2_reporting_expected$ACTUAL_REPORTS` as new numerator for REPORTING_RATE calculations.  
Use `dhis2_reporting_expected$EXPECTED_REPORTS` as new denominator for REPORTING_RATE calculations.

In [None]:
product_name <- dhis2_reporting %>% 
    filter(PRODUCT_UID %in% REPORTING_RATE_PRODUCT_ID) %>% 
    pull(PRODUCT_NAME) %>% 
    unique()

dhis2_reporting_wide <- dhis2_reporting %>%
    filter(PRODUCT_UID %in% REPORTING_RATE_PRODUCT_ID) %>%
    select(-PRODUCT_UID, -PRODUCT_NAME) %>% # remove cols
    pivot_wider(
        names_from = PRODUCT_METRIC, 
        values_from = VALUE
    )
 
log_msg(glue::glue("Using REPORTING_RATE_PRODUCT_ID == `{REPORTING_RATE_PRODUCT_ID}`, corresponding to DHIS2 Product name : `{product_name}`."))
print(dim(dhis2_reporting_wide))
head(dhis2_reporting_wide, 3)

#### 3.2. Compute reporting rate with **DHIS2** dataset data

Exrtacted from DHIS2 and formatted.  
-Straightforward computation: `ACTUAL_REPORTS` / `EXPECTED_REPORTS`  
-Format final table (column selection)

In [None]:
reporting_rate_results <- dhis2_reporting_wide %>%
    mutate(REPORTING_RATE = ACTUAL_REPORTS / EXPECTED_REPORTS)

print(dim(reporting_rate_results))
head(reporting_rate_results, 3)

#### 3.3. Left join reporting indicators with DHIS2 routine data.

Make sure we have a consistent reporting rates table matching periods x org units (safety measure only).

In [None]:
reporting_rate_dataset <- dhis2_routine %>% 
    select(YEAR, MONTH, ADM2_ID) %>%
    distinct() %>%
    left_join(reporting_rate_results %>% select(all_of(fixed_cols_rr)), by=c("YEAR", "MONTH", "ADM2_ID"))

print(dim(reporting_rate_dataset))
head(reporting_rate_dataset, 3)

## 4. üìÅ Export to `data/` folder

#### 4.2. Write files

In [None]:
output_data_path <- file.path(DATA_PATH, "reporting_rate")

# parquet
file_path <- file.path(output_data_path, paste0(COUNTRY_CODE, "_reporting_rate_dataset.parquet")) 
write_parquet(reporting_rate_dataset, file_path)
log_msg(glue("Exported : {file_path}"))

# csv
file_path <- file.path(output_data_path, paste0(COUNTRY_CODE, "_reporting_rate_dataset.csv"))
write.csv(reporting_rate_dataset, file_path, row.names = FALSE)
log_msg(glue("Exported : {file_path}"))