# Plots for careseeking behavior upon child fever (DHS data)

## Preliminary steps

In [None]:
rm(list = ls())

options(scipen=999)

In [None]:
# Global paths
Sys.setenv(PROJ_LIB = "/opt/conda/share/proj")
Sys.setenv(GDAL_DATA = "/opt/conda/share/gdal")

# Paths
ROOT_PATH <- '~/workspace'
CONFIG_PATH <- file.path(ROOT_PATH, 'configuration')
CODE_PATH <- file.path(ROOT_PATH, 'code')
DATA_PATH <- file.path(ROOT_PATH, 'data')
DHS_DATA_PATH <- file.path(DATA_PATH, 'dhs', 'raw')
OUTPUT_DATA_PATH <- file.path(DATA_PATH, 'dhs', 'indicators', 'careseeking')
OUTPUT_PLOTS_PATH <- file.path(ROOT_PATH, 'pipelines', 'snt_dhs_indicators', 'reporting', 'outputs')

In [None]:
# Load utils
source(file.path(CODE_PATH, "snt_utils.r"))

# List required pcks
required_packages <- c("sf", "glue", "data.table", "ggplot2", "stringi", "jsonlite", "httr", "reticulate")

# Execute function
install_and_load(required_packages)

In [None]:
Sys.setenv(RETICULATE_PYTHON = "/opt/conda/bin/python")
reticulate::py_config()$python
openhexa <- import("openhexa.sdk")

# Load SNT config
CONFIG_FILE_NAME <- "SNT_config.json"
config_json <- tryCatch({ fromJSON(file.path(CONFIG_PATH, CONFIG_FILE_NAME)) },
                        error = function(e) {
                          msg <- paste0("Error while loading configuration", conditionMessage(e))  
                          cat(msg)   
                          stop(msg) 
                        })

msg <- paste0("SNT configuration loaded from  : ", file.path(CONFIG_PATH, CONFIG_FILE_NAME)) 
log_msg(msg)

# Set config variables
COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE
print(paste("Country code: ", COUNTRY_CODE))

## Geo data

In [None]:
admin_level <- 'ADM1'
admin_id_col <- glue(admin_level, 'ID', .sep='_')
admin_name_col <- glue(admin_level, 'NAME', .sep='_')
admin_cols <- c(admin_id_col, admin_name_col)

In [None]:
# Load spatial file from dataset

dhis2_dataset <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_FORMATTED

spatial_data_filename <- paste(COUNTRY_CODE, "shapes.geojson", sep = "_")
# spatial_data <- read_sf(file.path(DATA_PATH, 'dhis2', 'formatted', spatial_data_filename))
spatial_data <- get_latest_dataset_file_in_memory(dhis2_dataset, spatial_data_filename)
log_msg(glue("File {spatial_data_filename} successfully loaded from dataset version: {dhis2_dataset}"))

spatial_data <- st_as_sf(spatial_data)

# aggregate geometries by the admin columns
spatial_data <- aggregate_geometry(
  sf_data=spatial_data,
  admin_id_colname=admin_id_col,
  admin_name_colname=admin_name_col
)

# keep class
spatial_data <- st_as_sf(spatial_data)

if(COUNTRY_CODE == "COD"){
  spatial_data[[admin_name_col]] <- clean_admin_names(spatial_data[[admin_name_col]])
}

## DHS tables/names

In [None]:
data_source <- 'DHS'
# indicator_public_care <- 'PUBLIC_CARE'
# indicator_private_care <- 'PRIVATE_CARE'
# indicator_no_care <- 'NO_CARE'

In [None]:
filename_without_extension <- glue("{COUNTRY_CODE}_{data_source}_{admin_level}_PCT_CARESEEKING_SAMPLE_AVERAGE")
careseeking_table <- fread(file.path(OUTPUT_DATA_PATH, paste0(filename_without_extension, '.csv')))

# all columns which are not admin columns, are indicator columns
all_indicators <- setdiff(names(careseeking_table), admin_cols)

## Maps

In [None]:
plot_data =  merge(spatial_data, careseeking_table, by = admin_cols, all = TRUE)

In [None]:
for (indicator_name in all_indicators){
  
  plot_label = gsub('PCT ', '', gsub('_', ' ', indicator_name))

  indicator_plot <- make_dhs_map(
      plot_dt = plot_data,
      plot_colname = indicator_name,
      title_name = glue("Percentage children: {plot_label}"),
      legend_title = glue("%"),
      scale_limits = c(0, 100)
    )
  # indicator_plot <- ggplot(plot_data) +
  #   geom_sf(aes(fill = get(indicator_name))) +
  #   # geom_sf(aes(fill = U5_PREV_RDT_SAMPLE_AVERAGE)) +
  #   scale_fill_gradient(
  #     limits = c(0,1),
  #     low = "white",
  #     high = "navy",
  #     na.value = "grey90"
  #   ) +
  #   coord_sf() + # map projection
  #   theme_classic() +
  #   theme(plot.title = element_text(face = "bold", hjust = 0.5),
  #         legend.position = "bottom", legend.key.width = unit(2,"cm"), legend.text=element_text(size=10)) +
  #   labs(fill = glue("Percentage {plot_label}"))
  
  # print(indicator_plot)
  ggsave(indicator_plot, file = file.path(OUTPUT_PLOTS_PATH, glue('{COUNTRY_CODE}_{data_source}_{admin_level}_{toupper(indicator_name)}_plot.png')), dpi = 500)
}

## Confidence interval plots

In [None]:
for (indicator_name in all_indicators){
  
  indicator_label <- gsub('_', ' ', indicator_name)
    
  ci_data <- fread(file.path(
    OUTPUT_DATA_PATH,
    glue("{COUNTRY_CODE}_{data_source}_{admin_level}_{indicator_name}.csv")
    ))
  
  sample_avg_col <- glue("{indicator_name}_SAMPLE_AVERAGE")
  lower_bound_col <- glue("{indicator_name}_CI_LOWER_BOUND")
  upper_bound_col <- glue("{indicator_name}_CI_UPPER_BOUND")
  ci_plot_title <- glue("{COUNTRY_CODE} {data_source} {indicator_label} CI")
  ci_plot_xlab <- admin_name_col
  ci_plot_ylab <- glue("{indicator_label} (%)")
  ci_plot_filename <- glue("{COUNTRY_CODE}_{data_source}_{admin_level}_{toupper(indicator_name)}_CI_plot.png")
  
  ci_plot <- make_ci_plot(
    df_to_plot=ci_data,
    admin_colname=admin_name_col,
    point_estimation_colname=sample_avg_col,
    ci_lower_colname=lower_bound_col,
    ci_upper_colname=upper_bound_col,
    title_name=ci_plot_title,
    x_title=ci_plot_xlab,
    y_title=ci_plot_ylab
  )
  
  ggsave(plot=ci_plot, filename=file.path(OUTPUT_PLOTS_PATH, ci_plot_filename), width = 8, height = 6, dpi = 300)
}