# Plots for DTP Vaccination and attrition rates (DHS data)

## Preliminaries

In [None]:
rm(list = ls())

options(scipen=999)

In [None]:
# Global paths
Sys.setenv(PROJ_LIB = "/opt/conda/share/proj")
Sys.setenv(GDAL_DATA = "/opt/conda/share/gdal")

In [None]:
# Paths
ROOT_PATH <- '~/workspace'
CONFIG_PATH <- file.path(ROOT_PATH, 'configuration')
CODE_PATH <- file.path(ROOT_PATH, 'code')
DATA_PATH <- file.path(ROOT_PATH, 'data')
DHS_DATA_PATH <- file.path(DATA_PATH, 'dhs', 'raw')
OUTPUT_DATA_PATH <- file.path(DATA_PATH, 'dhs', 'indicators', 'vaccination')
OUTPUT_PLOTS_PATH <- file.path(ROOT_PATH, 'pipelines', 'snt_dhs_indicators', 'reporting', 'outputs')

In [None]:
# Load utils
source(file.path(CODE_PATH, "snt_utils.r"))

# List required pcks
required_packages <- c("haven", "glue", "survey", "data.table", "sf", "ggplot2", "stringi", "reticulate", "jsonlite", "arrow")

# Execute function
install_and_load(required_packages)

In [None]:
Sys.setenv(RETICULATE_PYTHON = "/opt/conda/bin/python")
reticulate::py_config()$python
openhexa <- import("openhexa.sdk")

# Load SNT config
CONFIG_FILE_NAME <- "SNT_config.json"
config_json <- tryCatch({ fromJSON(file.path(CONFIG_PATH, CONFIG_FILE_NAME)) },
                        error = function(e) {
                          msg <- paste0("Error while loading configuration", conditionMessage(e))  
                          cat(msg)   
                          stop(msg) 
                        })

msg <- paste0("SNT configuration loaded from  : ", file.path(CONFIG_PATH, CONFIG_FILE_NAME)) 
log_msg(msg)

# Set config variables
COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE
print(paste("Country code: ", COUNTRY_CODE))

## Geo data

In [None]:
admin_level <- 'ADM1'
admin_id_col <- glue(admin_level, 'ID', .sep='_')
admin_name_col <- glue(admin_level, 'NAME', .sep='_')
admin_cols <- c(admin_id_col, admin_name_col)

In [None]:
# Load spatial file from dataset

dhis2_dataset <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_FORMATTED

spatial_data_filename <- paste(COUNTRY_CODE, "shapes.geojson", sep = "_")
# spatial_data <- read_sf(file.path(DATA_PATH, 'dhis2', 'formatted', spatial_data_filename))
spatial_data <- get_latest_dataset_file_in_memory(dhis2_dataset, spatial_data_filename)
log_msg(glue("File {spatial_data_filename} successfully loaded from dataset version: {dhis2_dataset}"))

spatial_data <- st_as_sf(spatial_data)

# aggregate geometries by the admin columns
spatial_data <- aggregate_geometry(
  sf_data=spatial_data,
  admin_id_colname=admin_id_col,
  admin_name_colname=admin_name_col
  )

# keep class
spatial_data <- st_as_sf(spatial_data)

# DRC provinces need to be cleaned
if(COUNTRY_CODE == "COD"){
  spatial_data[[admin_name_col]] <- clean_admin_names(spatial_data[[admin_name_col]])
}

## Import DHS data

In [None]:
data_source <- 'DHS'
vaccination_doses <- c(1, 2, 3)
indicator_access <- 'PCT_DTP'
indicator_attrition <- 'PCT_DROPOUT_DTP'

For each vaccine dose, do everything :D
- add the admin units and save to .csv and parquet
- add the spatial data
- make percentage maps (sample average) and save them
- make confidence interval plots for the regions and save them

## Maps and CI plots

In [None]:
for (dose_number in vaccination_doses){
  table_name <- glue("{toupper(indicator_access)}{dose_number}")
  filename_without_extension <- glue("{COUNTRY_CODE}_{data_source}_{admin_level}_{table_name}")
  df <- fread(file.path(OUTPUT_DATA_PATH, paste0(filename_without_extension, '.csv')))
    
  vaccine_colname <- glue("{toupper(indicator_access)}{dose_number}")
  
  # change the names of the columns
  sample_avg_col <- paste(vaccine_colname, 'SAMPLE_AVERAGE', sep = '_')
  lower_bound_col <- paste(vaccine_colname, 'CI_LOWER_BOUND', sep = '_')
  upper_bound_col <- paste(vaccine_colname, 'CI_UPPER_BOUND', sep = '_')
  
  # add spatial data
  plot_data <- merge(spatial_data, df, by = admin_cols, all = TRUE)
  
  print(glue('Processing data for', vaccine_colname, .sep = ' '))

  plot_filename <- glue("{COUNTRY_CODE}_{data_source}_{admin_level}_{toupper(indicator_access)}{dose_number}_plot.png")
  save_path = file.path(OUTPUT_PLOTS_PATH, plot_filename)
  # print(save_path)

  map_title = glue("{COUNTRY_CODE} DTP{dose_number} vaccine coverage (%)")
  # make, show and save the plot
  dose_plot <- make_dhs_map(
    plot_dt = plot_data,
    plot_colname = sample_avg_col,
    title_name = glue("{COUNTRY_CODE} DTP{dose_number} vaccine coverage (%)"),
    legend_title = "%",
    scale_limits = c(0, 100)
    )
  ggsave(filename = save_path, plot = dose_plot, width = 8, height = 6, dpi = 300)
  
  # make the confidence interval plot
  ci_plot_title <- glue("{COUNTRY_CODE} {data_source} DTP{dose_number} CI")
  ci_plot_xlab <- admin_name_col
  ci_plot_ylab <- glue("DTP{dose_number} vaccinated (%)")
  ci_plot_filename <- glue("{COUNTRY_CODE}_{data_source}_{admin_level}_{indicator_access}{dose_number}_CI_plot.png")
  ci_dtp_plot <- make_ci_plot(
    df_to_plot=plot_data,
    admin_colname=admin_name_col,
    point_estimation_colname=sample_avg_col,
    ci_lower_colname=lower_bound_col,
    ci_upper_colname=upper_bound_col,
    title_name=ci_plot_title,
    x_title=ci_plot_xlab,
    y_title=ci_plot_ylab
  )
  ggsave(filename=file.path(OUTPUT_PLOTS_PATH, ci_plot_filename), plot=ci_dtp_plot, width = 8, height = 6, dpi = 300)
}

## Dropout rates plots, for each vaccine dose

In [None]:
dtp_dropout_filename_without_extension <- glue("{COUNTRY_CODE}_{data_source}_{admin_level}_{indicator_attrition}")
DTP_DROPOUT <- fread(file.path(OUTPUT_DATA_PATH, paste0(dtp_dropout_filename_without_extension, ".csv")))

In [None]:
for(current_dose in vaccination_doses){
  for (reference_dose in 1:(current_dose - 1)){
    if((reference_dose >= 1) & (reference_dose < current_dose)){
    dropout_colname <- glue("{indicator_attrition}_{reference_dose}_{current_dose}")
    print(glue('Plotting attrition for {dropout_colname}'))
    dropout_plot_title = glue("{COUNTRY_CODE} DTP vaccine dropout doses {reference_dose} to {current_dose} (%)")
    dropout_plot_data <- merge(spatial_data, DTP_DROPOUT, by = admin_cols)
    dropout_plot <- make_dhs_map(
      plot_dt = dropout_plot_data,
      plot_colname = dropout_colname,
      title_name = dropout_plot_title,
      legend_title = '%',
      scale_limits = c(0, 100)
    )
    dropout_plot_filename <- glue("{COUNTRY_CODE}_{data_source}_{admin_level}_{toupper(dropout_colname)}_plot.png")
    ggsave(filename = file.path(OUTPUT_PLOTS_PATH, dropout_plot_filename), plot = dropout_plot, width = 8, height = 6, dpi = 300)

    }
  }
}