## Setup start 

In [None]:
# Parameters

SNT_ROOT_PATH   <- '~/workspace'   # SNT root

In [None]:
# Set project folders
CODE_PATH      <- file.path(SNT_ROOT_PATH, "code")
CONFIG_PATH    <- file.path(SNT_ROOT_PATH, "configuration")
FORMATTED_DATA_PATH <- file.path(SNT_ROOT_PATH, "data", "dhis2", "extracts_formatted")

**Load functions**

In [None]:
source(file.path(CODE_PATH, "snt_utils.r"))
# source(file.path(CODE_PATH, "snt_functions.r"))

**Check and load required libraries**  

In [None]:
# List required pcks  ---------------->  check  what are the really required libraries
required_packages <- c("lubridate", "zoo", "arrow", "dplyr", "tidyr", "stringr", "stringi", "jsonlite", "httr", "reticulate")

# Execute function
install_and_load(required_packages)

In [None]:
# Set environment to load openhexa.sdk from the right environment
Sys.setenv(RETICULATE_PYTHON = "/opt/conda/bin/python")

In [None]:
# Load openhexa.sdk 
reticulate::py_config()$python
openhexa <- import("openhexa.sdk")

### Load SNT configuration


In [None]:
# Load SNT config
config_json <- tryCatch({
        fromJSON(file.path(CONFIG_PATH, "SNT_config.json"))
    },
    error = function(e) {
        msg <- paste0("Error while loading configuration", conditionMessage(e))  
        cat(msg)   
        stop(msg) 
    })

# print(config.json$SNT_CONFIG)
msg <- paste0("SNT configuration loaded from  : ", file.path(CONFIG_PATH, "SNT_config.json"))
log_msg(msg)

**Checks for SNT mandatory configuration fields**

In [None]:
# CHECK SNT configuration 
snt_config_mandatory <- c("COUNTRY_CODE", "DHIS2_ADMINISTRATION_1", "DHIS2_ADMINISTRATION_2") #, "ORG_UNITS_LEVELS_SELECTION")
for (conf in snt_config_mandatory) {
    print(paste(conf, ":", config_json$SNT_CONFIG[conf]))
    if (is.null(config_json$SNT_CONFIG[[conf]])) {
        msg <- paste("Missing configuration input:", conf)
        cat(msg)   
        stop(msg)
    }
}

# Save this country code in a variable
COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE
ADMIN_2 <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_2)

### Load DHIS2 pyramid data

-Load DHIS2 pyramid from latest dataset version 


In [None]:
# DHIS2 Dataset extract identifier
dataset_name <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_EXTRACTS

# Load file from dataset
dhis2_data <- tryCatch({ get_latest_dataset_file_in_memory(dataset_name, paste0(COUNTRY_CODE, "_dhis2_raw_pyramid.parquet")) },
                  error = function(e) {
                      msg <- paste("Error while loading DHIS2 pyramid file for: " , COUNTRY_CODE, conditionMessage(e))  # log error message
                      cat(msg)
                      stop(msg)
})

msg <- paste0("DHIS2 pyramid data loaded from dataset : ", dataset_name, " dataframe dimensions: ", paste(dim(dhis2_data), collapse=", "))
log_msg(msg)

In [None]:
head(dhis2_data, 3)

## SNT pyramid formatting

In [None]:
# Set value
pyramid_data <- dhis2_data

# remove columns with only NA values values
pyramid_data <- pyramid_data[, colSums(!is.na(dhis2_data)) > 0]

name_columns <- colnames(pyramid_data)[grepl("_NAME", colnames(pyramid_data))]
for (column in name_columns){
    print(paste0("Format : ", column))
    # Clean strings 
    pyramid_data[[column]] <- format_names(pyramid_data[[column]])     
}
  
# Column names to upper case
colnames(pyramid_data) <- clean_column_names(pyramid_data)

In [None]:
# Select columns to keep (in case we need until level of adm2)
# admin_col_selection <- c()
# for (i in 1:as.numeric(str_extract(ADMIN_2, "\\d+"))) {
#   admin_col_selection <-  c(admin_col_selection, paste0("LEVEL_", i, "_ID"), paste0("LEVEL_", i, "_NAME"))
# }
# print(admin_col_selection)

# pyramid_data <- pyramid_data[ , admin_col_selection]
# pyramid_data <- pyramid_data[!duplicated(pyramid_data), ]
# head(pyramid_data)

In [None]:
head(pyramid_data, 3)

### Extract longitude/latitude from geometry column (geoJson)

In [None]:
# convert 
pyramid_data <- pyramid_data %>%
  rowwise() %>%
  mutate(
    parsed = list(if (!is.na(GEOMETRY)) fromJSON(GEOMETRY) else NULL),
    LONGITUDE = if (!is.null(parsed)) parsed$coordinates[1] else NA_real_,
    LATITUDE  = if (!is.null(parsed)) parsed$coordinates[2] else NA_real_
  ) %>%
  ungroup() %>%
  select(-parsed, -GEOMETRY)

head(pyramid_data, 3)    

### Output formatted pyramid data

In [None]:
out_msg <- paste0("Pyramid data saved under: ", file.path(FORMATTED_DATA_PATH, paste0(COUNTRY_CODE, "_pyramid.parquet")))

# write parquet file
write_parquet(pyramid_data, file.path(FORMATTED_DATA_PATH, paste0(COUNTRY_CODE, "_pyramid.parquet")))

# write csv file
write.csv(pyramid_data, file.path(FORMATTED_DATA_PATH, paste0(COUNTRY_CODE, "_pyramid.csv")), row.names = FALSE)

In [None]:
# log
log_msg(out_msg)

### Data Summary 

In [None]:
# Data summary
print(summary(pyramid_data))