# Plots for under-5 mortality (DHS data)

## Preliminary steps

In [None]:
rm(list = ls())

options(scipen=999)

# Global paths
Sys.setenv(PROJ_LIB = "/opt/conda/share/proj")
Sys.setenv(GDAL_DATA = "/opt/conda/share/gdal")

# Paths
ROOT_PATH <- '~/workspace'
CONFIG_PATH <- file.path(ROOT_PATH, 'configuration')
CODE_PATH <- file.path(ROOT_PATH, 'code')
DATA_PATH <- file.path(ROOT_PATH, 'data')
DHS_DATA_PATH <- file.path(DATA_PATH, 'dhs', 'raw')
OUTPUT_DATA_PATH <- file.path(DATA_PATH, 'dhs', 'indicators', 'mortality')
OUTPUT_PLOTS_PATH <- file.path(ROOT_PATH, 'pipelines', 'snt_dhs_indicators', 'reporting', 'outputs')

# Load utils
source(file.path(CODE_PATH, "snt_utils.r"))

# List required pcks
required_packages <- c("sf", "glue", "data.table", "ggplot2", "stringi", "jsonlite", "httr", "reticulate")

# Execute function
install_and_load(required_packages)

Sys.setenv(RETICULATE_PYTHON = "/opt/conda/bin/python")
reticulate::py_config()$python
openhexa <- import("openhexa.sdk")

# Load SNT config
CONFIG_FILE_NAME <- "SNT_config.json"
config_json <- tryCatch({ fromJSON(file.path(CONFIG_PATH, CONFIG_FILE_NAME)) },
                        error = function(e) {
                          msg <- paste0("Error while loading configuration", conditionMessage(e))  
                          cat(msg)   
                          stop(msg) 
                        })

msg <- paste0("SNT configuration loaded from  : ", file.path(CONFIG_PATH, CONFIG_FILE_NAME)) 
log_msg(msg)

# Set config variables
COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE

# COUNTRY_CODE <- "BFA"
print(paste("Country code: ", COUNTRY_CODE))

data_source <- 'DHS'
# dhs_dataset <- config_json$SNT_DATASET_IDENTIFIERS$DHS_DATASET

In [None]:
# Geo data ----------------------------------------------------------------

admin_level <- 'ADM1'
admin_id_col <- glue(admin_level, 'ID', .sep='_')
admin_name_col <- glue(admin_level, 'NAME', .sep='_')
admin_cols <- c(admin_id_col, admin_name_col)

# Load spatial file from dataset

dhis2_dataset <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_FORMATTED

spatial_data_filename <- paste(COUNTRY_CODE, "shapes.geojson", sep = "_")
# spatial_data <- read_sf(file.path(DATA_PATH, 'dhis2', 'formatted', spatial_data_filename))
spatial_data <- get_latest_dataset_file_in_memory(dhis2_dataset, spatial_data_filename)
log_msg(glue("File {spatial_data_filename} successfully loaded from dataset version: {dhis2_dataset}"))

spatial_data <- st_as_sf(spatial_data)

# aggregate geometries by the admin columns
spatial_data <- aggregate_geometry(
  sf_data=spatial_data,
  admin_id_colname=admin_id_col,
  admin_name_colname=admin_name_col
)

# keep class
spatial_data <- st_as_sf(spatial_data)

if(COUNTRY_CODE == "COD"){
  spatial_data[[admin_name_col]] <- clean_admin_names(spatial_data[[admin_name_col]])
}

## Map

In [None]:
indicator_u5mr <- 'U5MR_PERMIL'
lower_bound_col <- glue("{toupper(indicator_u5mr)}_CI_LOWER_BOUND")
upper_bound_col <- glue("{toupper(indicator_u5mr)}_CI_UPPER_BOUND")
sample_avg_col <- glue("{toupper(indicator_u5mr)}_SAMPLE_AVERAGE")

filename_without_extension <- glue("{COUNTRY_CODE}_{data_source}_{admin_level}_{toupper(indicator_u5mr)}")
u5mort_table <- fread(file.path(OUTPUT_DATA_PATH, paste0(filename_without_extension, '.csv')))

In [None]:
plot_data =  merge(spatial_data, u5mort_table, by = admin_cols, all = TRUE)

In [None]:
u5_mort_permil_plot <- make_dhs_map(
      plot_dt = plot_data,
      plot_colname = sample_avg_col,
      title_name = "Under-5 mortality (\u2030)",
      legend_title = "\u2030",
      scale_limits = c(0, 200)
    )

In [None]:
# u5_mort_permil_plot
plot_filename <- glue("{COUNTRY_CODE}_{data_source}_{admin_level}_{toupper(indicator_u5mr)}_plot.png")
ggsave(u5_mort_permil_plot, file = file.path(OUTPUT_PLOTS_PATH, plot_filename), dpi = 500)

## Confidence interval plot

In [None]:
ci_plot_title <- glue("{COUNTRY_CODE} {data_source} {indicator_u5mr} (95% Confidence Intervals)")
ci_plot_xlab <- admin_name_col
ci_plot_ylab <- glue("Under-5 mortality (\u2030)")
ci_plot_filename <- glue("{COUNTRY_CODE}_{data_source}_{admin_level}_{toupper(indicator_u5mr)}_CI_plot.png")
ci_u5_mort_permil_plot <- make_ci_plot(
  df_to_plot=plot_data,
  admin_colname=admin_name_col,
  point_estimation_colname=sample_avg_col,
  ci_lower_colname=lower_bound_col,
  ci_upper_colname=upper_bound_col,
  title_name=ci_plot_title,
  x_title=ci_plot_xlab,
  y_title=ci_plot_ylab
)

ggsave(filename=file.path(OUTPUT_PLOTS_PATH, ci_plot_filename), plot=ci_u5_mort_permil_plot, width = 8, height = 6, dpi = 300)