## Setup start 

In [None]:
# Parameters

SNT_ROOT_PATH   <- '~/workspace'   # SNT root

In [None]:
# Set project folders
CODE_PATH      <- file.path(SNT_ROOT_PATH, "code")
CONFIG_PATH    <- file.path(SNT_ROOT_PATH, "configuration")
FORMATTED_DATA_PATH <- file.path(SNT_ROOT_PATH, "data", "dhis2", "extracts_formatted")

**Load functions**

In [None]:
source(file.path(CODE_PATH, "snt_utils.r"))
# source(file.path(CODE_PATH, "snt_functions.r"))

**Check and load required libraries**  

In [None]:
# List required pcks  ---------------->  check  what are the really required libraries
required_packages <- c("lubridate", "zoo", "arrow", "dplyr", "tidyr", "stringr", "stringi", "jsonlite", "httr", "reticulate", "sf", "rmapshaper")

# Execute function
install_and_load(required_packages)

In [None]:
# Set variables to load openhexa.sdk from the right environment
Sys.setenv(PROJ_LIB = "/opt/conda/share/proj")
Sys.setenv(GDAL_DATA = "/opt/conda/share/gdal")
Sys.setenv(RETICULATE_PYTHON = "/opt/conda/bin/python")

# Load openhexa.sdk 
reticulate::py_config()$python
openhexa <- import("openhexa.sdk")

### Load SNT configuration


In [None]:
# Load SNT config
config_json <- tryCatch({
        fromJSON(file.path(CONFIG_PATH, "SNT_config.json"))
    },
    error = function(e) {
        msg <- paste0("Error while loading configuration", conditionMessage(e))  
        cat(msg)   
        stop(msg) 
    })

# print(config.json$SNT_CONFIG)
msg <- paste0("SNT configuration loaded from  : ", file.path(CONFIG_PATH, "SNT_config.json"))
log_msg(msg)

**Checks for SNT mandatory configuration fields**

In [None]:
# CHECK SNT configuration 
snt_config_mandatory <- c("COUNTRY_CODE", "DHIS2_ADMINISTRATION_1", "DHIS2_ADMINISTRATION_2") #, "ORG_UNITS_LEVELS_SELECTION")
for (conf in snt_config_mandatory) {
    print(paste(conf, ":", config_json$SNT_CONFIG[conf]))
    if (is.null(config_json$SNT_CONFIG[[conf]])) {
        msg <- paste("Missing configuration input:", conf)
        cat(msg)   
        stop(msg)
    }
}

# Save this country code in a variable
COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE
ADMIN_1 <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_1)
ADMIN_2 <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_2)

### Load DHIS2 shapes data

-Load DHIS2 shapes from latest dataset version 


In [None]:
# DHIS2 Dataset extract identifier
dataset_name <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_EXTRACTS

# Load file from dataset
dhis2_data <- tryCatch({ get_latest_dataset_file_in_memory(dataset_name, paste0(COUNTRY_CODE, "_dhis2_raw_shapes.parquet")) }, 
                  error = function(e) {
                      msg <- paste("Error while loading DHIS2 shapes file for: " , COUNTRY_CODE, conditionMessage(e))  # log error message
                      cat(msg)
                      stop(msg)
})

msg <- paste0("DHIS2 shapes data loaded from dataset : ", dataset_name, " dataframe dimensions: ", paste(dim(dhis2_data), collapse=", "))
log_msg(msg)

## SNT Shapes formatting

In [None]:
# log
# msg <- paste0("SNT Shapes Formatting.")
# log_msg(msg)

In [None]:
# Select administrative levels 
adm_1_id_col <- gsub("_NAME", "_ID", ADMIN_1)
adm_1_name_col <- ADMIN_1
adm_2_id_col <- gsub("_NAME", "_ID", ADMIN_2)
adm_2_name_col <- ADMIN_2

# Administrative columns list
admin_columns <- c(
    adm_1_id_col,
    adm_1_name_col,
    adm_2_id_col,
    adm_2_name_col
)

# # Rename to lower (for transformation?)
# if ("GEOMETRY" %in% names(dhis2_data)) {
#   names(dhis2_data)[names(dhis2_data) == "NAME"] <- "geometry"
# }

shapes_data <- dhis2_data[, c(admin_columns, "GEOMETRY")]

# Clean strings for admin 1 and admin 2
shapes_data[[ADMIN_1]] <- format_names(shapes_data[[ADMIN_1]]) # (format_names() in snt_utils.r)
shapes_data[[ADMIN_2]] <- format_names(shapes_data[[ADMIN_2]])

# Select and Rename columns
shapes_data <- shapes_data %>%
    select(
        ADM1_NAME = !!sym(adm_1_name_col),
        ADM1_ID = !!sym(adm_1_id_col),
        ADM2_NAME = !!sym(adm_2_name_col),
        ADM2_ID = !!sym(adm_2_id_col),
        GEOMETRY
        )

# Column names to upper case
colnames(shapes_data) <- clean_column_names(shapes_data)
head(shapes_data[, c("ADM1_ID", "ADM1_NAME", "ADM2_ID", "ADM2_NAME")])

### Transform shapes data to valid geojson 

In [None]:
# Safe Convert geometry column from GeoJSON to 'sfc' (simple feature geometry)
# Ignore wrong and empty geometries.
geometry_sfc <- lapply(shapes_data$GEOMETRY, function(g) {
  if (is.na(g) || is.null(g)) return(st_geometrycollection())  # empty valid geometry
  tryCatch({
    geo <- geojsonsf::geojson_sfc(g)
    geo[[1]]  # extract sfg
  }, error = function(e) {
    st_geometrycollection()  # return empty but valid geometry
  })
})

# Convert to sfc safely
geometry_sfc <- sf::st_sfc(geometry_sfc)

# Bind geometry as a real 'sfc' column to the data
shapes_data_sf <- sf::st_sf(shapes_data[, !names(shapes_data) %in% "GEOMETRY"], 
                        GEOMETRY = geometry_sfc, 
                        crs = 4326)

In [None]:
# Create a temporary column for simplified geometry
shapes_data_sf$GEOMETRY_TEMP <- shapes_data_sf$GEOMETRY

# Identify only MULTIPOLYGON geometries
valid_idx <- which(sf::st_geometry_type(shapes_data_sf$GEOMETRY) == "MULTIPOLYGON")

if (length(valid_idx) > 0) {
  # Subset only valid rows and keep only the geometry
  valid_sf <- sf::st_sf(GEOMETRY = shapes_data_sf$GEOMETRY[valid_idx])
  
  # Simplify and validate
  simplified <- rmapshaper::ms_simplify(valid_sf, keep = 0.05, keep_shapes = TRUE)
  simplified$GEOMETRY <- sf::st_make_valid(simplified$GEOMETRY)
  
  # Insert back the simplified geometry
  shapes_data_sf$GEOMETRY_TEMP[valid_idx] <- simplified$GEOMETRY
}

In [None]:
shapes_data_sf$GEOMETRY <- shapes_data_sf$GEOMETRY_TEMP
shapes_data_sf$GEOMETRY_TEMP <- NULL

In [None]:
cat("Dimensions:", nrow(shapes_data_sf), "rows x", ncol(shapes_data_sf), "columns\n")
head(as.data.frame(shapes_data_sf))

In [None]:
# ADM1 shapes union
# Step 1: Validate or fix invalid geometries
# shapes_geo_simple <- shapes_geo_simple %>%
#   mutate(geometry = st_make_valid(geometry))

# # Step 2: Then proceed with the union
# aggregated_provinces <- shapes_geo_simple %>%
#   group_by(level_2_id, level_2_name) %>%
#   summarise(geometry = st_union(geometry), .groups = "drop") %>%
#   st_sf()

# Province level
# plot(shapes_geo_simple["level_2_name"], key.pos = NULL)

In [None]:
# ADM 2 level
plot(shapes_data_sf[,"ADM2_ID"])

### Output formatted shapes data

In [None]:
out_msg <- paste0("Shapes data saved under: ", file.path(FORMATTED_DATA_PATH, paste0(COUNTRY_CODE, "_shapes.geojson")))

# save file
fp <- file.path(FORMATTED_DATA_PATH, paste0(COUNTRY_CODE, "_shapes.geojson"))

# Check if the file exists and delete it
if (file.exists(fp)) {
  file.remove(fp)
}

# save geojson
sf::st_write(shapes_data_sf, dsn = fp, layer = fp, delete_dsn = TRUE)

# log
log_msg(out_msg)

### Data Summary 

In [None]:
# Data summary
print(summary(shapes_data_sf))