In [0]:
pacman::p_load(SparkR, dplyr, labelled, haven, sf, stringr)

#### Import GADM gpkg

In [0]:
read_path = "/Volumes/idm_dhs/dhs_data/dhs_data_shp/gadm_410.gpkg"

gadm <- st_read(read_path)

str(gadm)

In [0]:
gadm1 <- gadm %>%
  dplyr::mutate(across(c(GID_0, GID_1, GID_2, GID_3, GID_4, GID_5), ~ str_remove_all(.x, pattern = fixed(" ")))) %>%
  dplyr::mutate(across(c(-geom), as.character)) %>%
  dplyr::mutate(across(c(-geom), na_if, "")) %>%
  dplyr::mutate(GID_n = ifelse(is.na(GID_5), GID_4, GID_5),
         GID_n = ifelse(is.na(GID_4), GID_3, GID_n),
         GID_n = ifelse(is.na(GID_3), GID_2, GID_n),
         GID_n = ifelse(is.na(GID_2), GID_1, GID_n),
         GID_n = ifelse(is.na(GID_1), GID_0, GID_n)) %>%
  dplyr::filter(GID_0 == "NGA")

str(gadm1)

#### Import DHS GPS data

In [0]:
shp_files <- c("NGGE23FL", "NGGE4BFL", "NGGE52FL", "NGGE61FL", "NGGE6AFL", "NGGE71FL", "NGGE7BFL", "NGGE81FL")

# READ IN GEOSPATIAL FILES AND BIND TOGETHER TO CREATE A SINGLE FILE
pts_comb <- data.frame()
for (gf in shp_files){
  read_path = paste0("/Volumes/idm_dhs/dhs_data/dhs_data_shp/", gf, "/", gf, ".shp")
  pts <- st_read(read_path, quiet=TRUE) %>%
    st_make_valid() %>%
    dplyr::mutate(geo_file = gf,
                  survey = paste0(substr(gf, 1, 2), substr(gf, nchar(gf) - 3, nchar(gf)))) %>%
    dplyr::mutate(across(c(-geometry), ~ case_when(. == "NULL" ~ NA,
                                                   TRUE ~ .)))
  pts_comb <- rbind(pts_comb, pts)
  print(gf)
}

pts_comb <- pts_comb %>%
  dplyr::mutate(survey = case_when(survey == "NG23FL" ~ "NG21FL",
                                   survey == "NG52FL" ~ "NG53FL",
                                   TRUE ~ survey))

pts_comb1 <- st_join(pts_comb, gadm1, join=st_within) %>%
  dplyr::mutate(cluster_lon = sf::st_coordinates(.)[,1],
                cluster_lat = sf::st_coordinates(.)[,2]) %>%
  st_drop_geometry() %>%
  dplyr::select(survey, geo_file, DHSCC, DHSYEAR, DHSCLUST, GID_0, NAME_1, cluster_lon, cluster_lat) %>%
  distinct() %>%
  setNames(c("survey", "geo_file", "dhs_cc", "survey_year", "v001", "iso_code", "admin1", "cluster_lon", "cluster_lat"))

str(pts_comb1)

#### Import DHS KR data

In [0]:
db = "nga"
df_tables <- SparkR::sql(paste0("SHOW TABLES IN idm_dhs.", db)) %>% as.data.frame()

df_tables_kr <- df_tables %>%
  filter(grepl('kr', tableName)) %>%
  mutate(table_name = paste0(database, ".", tableName))

# print(df_tables_kr$table_name)
display(df_tables_kr)

In [0]:
raw_dta_folder = "/Volumes/idm_dhs/dhs_data/dhs_data_dta"
file_list <- list()
for (file_name in dbutils.fs.ls(raw_dta_folder)){
  file_list <- append(file_list, file_name$name)
}

df_file_list <- data.frame(file_name = unlist(file_list))

df_file_list <- df_file_list %>%
  filter(startsWith(toupper(file_name), "NGKR"))

display(df_file_list)

In [0]:
ri_indicators <- data.frame()
for (dhs_file in unique(df_file_list$file_name)){
  print(dhs_file)

  file_path = paste0("/Volumes/idm_dhs/dhs_data/dhs_data_dta/", dhs_file)
  KRdata <- read_dta(file_path) %>%
    dplyr::mutate(survey = word(dhs_file, 1, sep = "\\.")) %>%
    plyr::mutate(survey = toupper(paste0(substr(survey, 1, 2), substr(survey, nchar(survey) - 3, nchar(survey)))))


  # /*****************************************************************************************************
  # Program: 			  CH_VAC.R
  # Purpose: 			  Code vaccination variables.
  # Data inputs: 		KR dataset
  # Data outputs:		coded variables
  # Author:				  Shireen Assaf
  # Date last modified: August 16, 2022 by Shireen Assaf 
  # Notes:				Estimates can be created for two age groups (12-23) and (24-35). 
  # 					
  # 					!! Please choose the age group of interest in line 100. Default is age group 12-23
  # 					This code will create a subset of the KR data file KRvac that selects for children in the age group of interest 

  # 					Vaccination indicators are country specific. However, most common vaccines are coded below and the same logic can be applied to others.
  # 					When the vaccine is a single dose, the logic for single dose vaccines can be used (ex: bcg).
  # 					When the vaccine has 3 doses, the logic for multiple dose vaccines can be used (ex: dpt)
  # *****************************************************************************************************/
  # /*----------------------------------------------------------------------------
  # Variables created in this file:
  # ch_bcg_card			"BCG vaccination according to card"
  # ch_bcg_moth			"BCG vaccination according to mother"
  # ch_bcg_either		"BCG vaccination according to either source"
  # 
  # ch_pent1_card		"Pentavalent 1st dose vaccination according to card"
  # ch_pent1_moth		"Pentavalent 1st dose vaccination according to mother"
  # ch_pent1_either	"Pentavalent 1st dose vaccination according to either source"
  # ch_pent2_card		"Pentavalent 2nd dose vaccination according to card"
  # ch_pent2_moth		"Pentavalent 2nd dose vaccination according to mother"
  # ch_pent2_either	"Pentavalent 2nd dose vaccination according to either source"
  # ch_pent3_card		"Pentavalent 3rd dose vaccination according to card"
  # ch_pent3_moth		"Pentavalent 3rd dose vaccination according to mother"
  # ch_pent3_either	"Pentavalent 3rd dose vaccination according to either source"
  # 
  # ch_polio0_card		"Polio at birth vaccination according to card"
  # ch_polio0_moth		"Polio at birth vaccination according to mother"
  # ch_polio0_either	"Polio at birth vaccination according to either source"
  # ch_polio1_card		"Polio 1st dose vaccination according to card"
  # ch_polio1_moth		"Polio 1st dose vaccination according to mother"
  # ch_polio1_either	"Polio 1st dose vaccination according to either source"
  # ch_polio2_card		"Polio 2nd dose vaccination according to card"
  # ch_polio2_moth		"Polio 2nd dose vaccination according to mother"
  # ch_polio2_either	"Polio 2nd dose vaccination according to either source"
  # ch_polio3_card		"Polio 3rd dose vaccination according to card"
  # ch_polio3_moth		"Polio 3rd dose vaccination according to mother"
  # ch_polio3_either	"Polio 3rd dose vaccination according to either source"
  # 
  # ch_pneumo1_card		"Pneumococcal 1st dose vaccination according to card"
  # ch_pneumo1_moth		"Pneumococcal 1st dose vaccination according to mother"
  # ch_pneumo1_either	"Pneumococcal 1st dose vaccination according to either source"
  # ch_pneumo2_card		"Pneumococcal 2nd dose vaccination according to card"
  # ch_pneumo2_moth		"Pneumococcal 2nd dose vaccination according to mother"
  # ch_pneumo2_either	"Pneumococcal 2nd dose vaccination according to either source"
  # ch_pneumo3_card		"Pneumococcal 3rd dose vaccination according to card"
  # ch_pneumo3_moth		"Pneumococcal 3rd dose vaccination according to mother"
  # ch_pneumo3_either	"Pneumococcal 3rd dose vaccination according to either source"
  # 
  # ch_rotav1_card		"Rotavirus 1st dose vaccination according to card"
  # ch_rotav1_moth		"Rotavirus 1st dose vaccination according to mother"
  # ch_rotav1_either	"Rotavirus 1st dose vaccination according to either source"
  # ch_rotav2_card		"Rotavirus 2nd dose vaccination according to card"
  # ch_rotav2_moth		"Rotavirus 2nd dose vaccination according to mother"
  # ch_rotav2_either	"Rotavirus 2nd dose vaccination according to either source"
  # ch_rotav3_card		"Rotavirus 3rd dose vaccination according to card"
  # ch_rotav3_moth		"Rotavirus 3rd dose vaccination according to mother"
  # ch_rotav3_either	"Rotavirus 3rd dose vaccination according to either source"
  # 
  # ch_meas_card		  "Measles vaccination according to card"
  # ch_meas_moth		  "Measles vaccination according to mother"
  # ch_meas_either		"Measles vaccination according to either source"
  # 
  # ch_allvac_card		"All basic vaccinations according to card"
  # ch_allvac_moth		"All basic vaccinations according to mother"
  # ch_allvac_either	"All basic vaccinations according to either source"
  # 
  # ch_novac_card		  "No vaccinations according to card"
  # ch_novac_moth		  "No vaccinations according to mother"
  # ch_novac_either		"No vaccinations according to either source"
  # 
  # ch_card_ever_had	"Ever had a vaccination card"
  # ch_card_seen		  "Vaccination card seen"
  # ----------------------------------------------------------------------------*/

  # weight variable 
  KRdata <- KRdata %>%
    mutate(wt = v005/1000000)

  # age of child. If b19 is not available in the data use v008 - b3
  if ("TRUE" %in% (!("b19" %in% names(KRdata))))
    KRdata [[paste("b19")]] <- NA
  if ("TRUE" %in% all(is.na(KRdata$b19)))
  { b19_included <- 0} else { b19_included <- 1}

  if (b19_included==1) {
    KRdata <- KRdata %>%
      mutate(age = b19)
  } else {
    KRdata <- KRdata %>%
      mutate(age = v008 - b3)
  }
    
  # *** Two age groups used for reporting. 
  KRdata <- KRdata %>%
    mutate(agegroup = 
            case_when(
              age <= 11 ~ 0,
              age >= 12 & age <= 23 ~ 1,
              age >= 24 & age <= 35 ~ 2,
              age >= 36 & age <= 47 ~ 3,
              age >= 48 & age <= 59 ~ 4)) %>%
    set_value_labels(agegroup = c("0-11" = 0, "12-23" = 1, "24-35" = 2, "36-47" = 3, "48-59" = 4)) %>%
    set_variable_labels(agegroup = "age group of child for vaccination")

  # Selecting children
  # Create subset of KRfile to select for children for VAC indicators
  # Select agegroup 1 or agegroup 2
  KRvac <- KRdata %>%
    # subset(agegroup==1 & b5==1) # select age group and live children 
    # subset(agegroup == 1 & b5 %in% c("yes", "Yes", 1))
    subset(b5 %in% c("yes", "Yes", 1))
    
  # *******************************************************************************

  # Source of vaccination information. We need this variable to code vaccination indicators by source.
 if (!"h1" %in% names(KRvac)){KRvac$h1 <- NA}

  KRvac <- KRvac %>%
    mutate(source = 
            case_when(h1==1 ~ 1, h1==0 | h1==2 | h1==3 ~ 2  )) %>%
    set_value_labels(source = c("card" = 1, "mother"=2)) %>%
    set_variable_labels(source = "source of vaccination information")

  # *** BCG ***
  # //BCG either source
 if (!"h2" %in% names(KRvac)){KRvac$h2 <- NA}

  KRvac <- KRvac %>%
    mutate(ch_bcg_either = 
            case_when(h2%in%c(1,2,3) ~ 1, h2%in%c(0,8)   ~ 0  )) %>%
    set_value_labels(ch_bcg_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_bcg_either = "BCG vaccination according to either source")

  # //BCG mother's report
  KRvac <- KRvac %>%
    mutate(ch_bcg_moth = 
            case_when(h2%in%c(1,2,3) & source==2 ~ 1, TRUE ~ 0)) %>%
    set_value_labels(ch_bcg_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_bcg_moth = "BCG vaccination according to mother")

  # //BCG by card
  KRvac <- KRvac %>%
    mutate(ch_bcg_card = 
            case_when(h2%in%c(1,2,3) & source==1 ~ 1, TRUE ~ 0)) %>%
    set_value_labels(ch_bcg_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_bcg_card = "BCG vaccination according to card")
  
  # *** Pentavalent ***
  # //DPT 1, 2, 3 either source
  if (!"h3" %in% names(KRvac)){KRvac$h3 <- NA}
  if (!"h5" %in% names(KRvac)){KRvac$h5 <- NA}
  if (!"h7" %in% names(KRvac)){KRvac$h7 <- NA}

  KRvac <- KRvac %>%
    mutate(dpt1 = case_when(h3%in%c(1,2,3) ~ 1, h3%in%c(0,8) ~ 0  )) %>%
    mutate(dpt2 = case_when(h5%in%c(1,2,3) ~ 1, h5%in%c(0,8) ~ 0  )) %>%
    mutate(dpt3 = case_when(h7%in%c(1,2,3) ~ 1, h7%in%c(0,8) ~ 0  )) %>%
    mutate(dptsum = dpt1 + dpt2 + dpt3)
  # This step is performed for multi-dose vaccines to take care of any gaps in the vaccination history. 
  # See DHS guide to statistics for further explanation
  KRvac <- KRvac %>%
    mutate(ch_pent1_either = case_when(dptsum >=1 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pent1_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pent1_either = "Pentavalent 1st dose vaccination according to either source") %>%
    mutate(ch_pent2_either = case_when(dptsum >=2 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pent2_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pent2_either = "Pentavalent 2nd dose vaccination according to either source") %>%
    mutate(ch_pent3_either = case_when(dptsum >=3 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pent3_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pent3_either = "Pentavalent 3rd dose vaccination according to either source") 

  # //DPT 1, 2, 3 mother's report
  KRvac <- KRvac %>%
    mutate(ch_pent1_moth = case_when(dptsum >=1 & source==2~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pent1_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pent1_moth = "Pentavalent 1st dose vaccination according to mother") %>%
    mutate(ch_pent2_moth = case_when(dptsum >=2 & source==2 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pent2_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pent2_moth = "Pentavalent 2nd dose vaccination according to mother") %>%
    mutate(ch_pent3_moth = case_when(dptsum >=3 & source==2 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pent3_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pent3_moth = "Pentavalent 3rd dose vaccination according to mother") 

  # //DPT 1, 2, 3 by card
  KRvac <- KRvac %>%
    mutate(ch_pent1_card = case_when(dptsum >=1 & source==1~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pent1_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pent1_card = "Pentavalent 1st dose vaccination according to card") %>%
    mutate(ch_pent2_card = case_when(dptsum >=2 & source==1 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pent2_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pent2_card = "Pentavalent 2nd dose vaccination according to card") %>%
    mutate(ch_pent3_card = case_when(dptsum >=3 & source==1 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pent3_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pent3_card = "Pentavalent 3rd dose vaccination according to card") 

  # *** Polio ***
  # //polio 0, 1, 2, 3 either source
  if (!"h4" %in% names(KRvac)){KRvac$h4 <- NA}
  if (!"h6" %in% names(KRvac)){KRvac$h6 <- NA}
  if (!"h8" %in% names(KRvac)){KRvac$h8 <- NA}

  KRvac <- KRvac %>%
    mutate(polio1 = case_when(h4%in%c(1,2,3) ~ 1, h4%in%c(0,8) ~ 0  )) %>%
    mutate(polio2 = case_when(h6%in%c(1,2,3) ~ 1, h6%in%c(0,8) ~ 0  )) %>%
    mutate(polio3 = case_when(h8%in%c(1,2,3) ~ 1, h8%in%c(0,8) ~ 0  )) %>%
    mutate(poliosum=polio1 + polio2 + polio3)
  # This step is performed for multi-dose vaccines to take care of any gaps in the vaccination history. 
  # See DHS guide to statistics for further explanation
  if (!"h0" %in% names(KRvac)){KRvac$h0 <- NA}

  KRvac <- KRvac %>%
    mutate(ch_polio0_either = case_when(h0%in%c(1,2,3) ~ 1, h0%in%c(0,8) ~ 0 )) %>%
    set_value_labels(ch_polio0_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_polio0_either = "Polio at birth vaccination according to either source") %>%
    mutate(ch_polio1_either = case_when(poliosum >=1 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_polio1_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_polio1_either = "Polio 1st dose vaccination according to either source") %>%
    mutate(ch_polio2_either = case_when(poliosum >=2 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_polio2_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_polio2_either = "Polio 2nd dose vaccination according to either source") %>%
    mutate(ch_polio3_either = case_when(poliosum >=3 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_polio3_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_polio3_either = "Polio 3rd dose vaccination according to either source") 
  
  # //polio 0, 1, 2, 3 mother's report
  KRvac <- KRvac %>%
    mutate(ch_polio0_moth = case_when(h0%in%c(1,2,3) & source==2 ~ 1, TRUE ~ 0 )) %>%
    set_value_labels(ch_polio0_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_polio0_moth = "Polio at birth vaccination according to mother") %>%
    mutate(ch_polio1_moth = case_when(poliosum >=1 & source==2~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_polio1_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_polio1_moth = "Polio 1st dose vaccination according to mother") %>%
    mutate(ch_polio2_moth = case_when(poliosum >=2 & source==2 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_polio2_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_polio2_moth = "Polio 2nd dose vaccination according to mother") %>%
    mutate(ch_polio3_moth = case_when(poliosum >=3 & source==2 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_polio3_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_polio3_moth = "Polio 3rd dose vaccination according to mother") 

  # //polio 0, 1, 2, 3 by card
  KRvac <- KRvac %>%
    mutate(ch_polio0_card = case_when(h0%in%c(1,2,3) & source==1 ~ 1, TRUE ~ 0 )) %>%
    set_value_labels(ch_polio0_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_polio0_card = "Polio at birth vaccination according to card") %>%
    mutate(ch_polio1_card = case_when(poliosum >=1 & source==1~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_polio1_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_polio1_card = "Polio 1st dose vaccination according to card") %>%
    mutate(ch_polio2_card = case_when(poliosum >=2 & source==1 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_polio2_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_polio2_card = "Polio 2nd dose vaccination according to card") %>%
    mutate(ch_polio3_card = case_when(poliosum >=3 & source==1 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_polio3_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_polio3_card = "Polio 3rd dose vaccination according to card") 

  # *** Pneumococcal  ***
  # //Pneumococcal 1, 2, 3 either source
  # Some surveys that do not have information on this vaccine.
  if (!"h54" %in% names(KRvac)){KRvac$h54 <- NA}
  if (!"h55" %in% names(KRvac)){KRvac$h55 <- NA}
  if (!"h56" %in% names(KRvac)){KRvac$h56 <- NA}
  if (!"h57" %in% names(KRvac)){KRvac$h57 <- NA}
  if (!"h58" %in% names(KRvac)){KRvac$h58 <- NA}
  if (!"h59" %in% names(KRvac)){KRvac$h59 <- NA}
  
  KRvac <- KRvac %>%
    mutate(Pneumo1 = case_when(h54%in%c(1,2,3) ~ 1, h54%in%c(0,8) ~ 0  )) %>%
    mutate(Pneumo2 = case_when(h55%in%c(1,2,3) ~ 1, h55%in%c(0,8) ~ 0  )) %>%
    mutate(Pneumo3 = case_when(h56%in%c(1,2,3) ~ 1, h56%in%c(0,8) ~ 0  )) %>%
    mutate(Pneumosum= Pneumo1+Pneumo2+Pneumo3)
  # This step is performed for multi-dose vaccines to take care of any gaps in the vaccination history. 
  # See DHS guide to statistics for further explanation
  KRvac <- KRvac %>%
    mutate(ch_pneumo1_either = case_when(Pneumosum >=1 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pneumo1_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pneumo1_either = "Pneumococcal 1st dose vaccination according to either source") %>%
    mutate(ch_pneumo2_either = case_when(Pneumosum >=2 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pneumo2_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pneumo2_either = "Pneumococcal 2nd dose vaccination according to either source") %>%
    mutate(ch_pneumo3_either = case_when(Pneumosum >=3 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pneumo3_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pneumo3_either = "Pneumococcal 3rd dose vaccination according to either source") 

  # //Pneumococcal 1, 2, 3 mother's report
  KRvac <- KRvac %>%
    mutate(ch_pneumo1_moth = case_when(Pneumosum >=1 & source==2~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pneumo1_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pneumo1_moth = "Pneumococcal 1st dose vaccination according to mother") %>%
    mutate(ch_pneumo2_moth = case_when(Pneumosum >=2 & source==2 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pneumo2_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pneumo2_moth = "Pneumococcal 2nd dose vaccination according to mother") %>%
    mutate(ch_pneumo3_moth = case_when(Pneumosum >=3 & source==2 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pneumo3_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pneumo3_moth = "Pneumococcal 3rd dose vaccination according to mother") 

  # //Pneumococcal 1, 2, 3 by card
  KRvac <- KRvac %>%
    mutate(ch_pneumo1_card = case_when(Pneumosum >=1 & source==1~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pneumo1_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pneumo1_card = "Pneumococcal 1st dose vaccination according to card") %>%
    mutate(ch_pneumo2_card = case_when(Pneumosum >=2 & source==1 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pneumo2_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pneumo2_card = "Pneumococcal 2nd dose vaccination according to card") %>%
    mutate(ch_pneumo3_card = case_when(Pneumosum >=3 & source==1 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_pneumo3_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_pneumo3_card = "Pneumococcal 3rd dose vaccination according to card") 
  
  # *** Rotavirus  ****
  # //Rotavirus 1, 2, 3 either source
  # Some surveys that do not have information on this vaccine.
  KRvac <- KRvac %>%
    mutate(rotav1 = case_when(h57%in%c(1,2,3) ~ 1, h57%in%c(0,8) ~ 0  )) %>%
    mutate(rotav2 = case_when(h58%in%c(1,2,3) ~ 1, h58%in%c(0,8) ~ 0  )) %>%
    mutate(rotav3 = case_when(h59%in%c(1,2,3) ~ 1, h59%in%c(0,8) ~ 0  )) %>%
    mutate(rotavsum= rotav1+rotav2+rotav3)
  # This step is performed for multi-dose vaccines to take care of any gaps in the vaccination history. 
  # See DHS guide to statistics for further explanation
  KRvac <- KRvac %>%
    mutate(ch_rotav1_either = case_when(rotavsum >=1 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_rotav1_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_rotav1_either = "Rotavirus 1st dose vaccination according to either source") %>%
    mutate(ch_rotav2_either = case_when(rotavsum >=2 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_rotav2_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_rotav2_either = "Rotavirus 2nd dose vaccination according to either source") %>%
    mutate(ch_rotav3_either = case_when(rotavsum >=3 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_rotav3_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_rotav3_either = "Rotavirus 3rd dose vaccination according to either source") 

  # //Rotavirus 1, 2, 3 mother's report
  KRvac <- KRvac %>%
    mutate(ch_rotav1_moth = case_when(rotavsum >=1 & source==2~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_rotav1_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_rotav1_moth = "Rotavirus 1st dose vaccination according to mother") %>%
    mutate(ch_rotav2_moth = case_when(rotavsum >=2 & source==2 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_rotav2_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_rotav2_moth = "Rotavirus 2nd dose vaccination according to mother") %>%
    mutate(ch_rotav3_moth = case_when(rotavsum >=3 & source==2 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_rotav3_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_rotav3_moth = "Rotavirus 3rd dose vaccination according to mother") 

  # //Rotavirus 1, 2, 3 by card
  KRvac <- KRvac %>%
    mutate(ch_rotav1_card = case_when(rotavsum >=1 & source==1~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_rotav1_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_rotav1_card = "Rotavirus 1st dose vaccination according to card") %>%
    mutate(ch_rotav2_card = case_when(rotavsum >=2 & source==1 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_rotav2_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_rotav2_card = "Rotavirus 2nd dose vaccination according to card") %>%
    mutate(ch_rotav3_card = case_when(rotavsum >=3 & source==1 ~ 1, TRUE ~ 0  )) %>%
    set_value_labels(ch_rotav3_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_rotav3_card = "Rotavirus 3rd dose vaccination according to card") 
  
  # *** Measles ***
  # //Measles either source
  if (!"h9" %in% names(KRvac)){KRvac$h9 <- NA}

  KRvac <- KRvac %>%
    mutate(ch_meas_either = 
            case_when(h9%in%c(1,2,3) ~ 1, h9%in%c(0,8)   ~ 0  )) %>%
    set_value_labels(ch_meas_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_meas_either = "Measles vaccination according to either source")

  # //Measles mother's report
  KRvac <- KRvac %>%
    mutate(ch_meas_moth = 
            case_when(h9%in%c(1,2,3) & source==2 ~ 1, TRUE ~ 0)) %>%
    set_value_labels(ch_meas_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_meas_moth = "Measles vaccination according to mother")

  # //Measles by card
  KRvac <- KRvac %>%
    mutate(ch_meas_card = 
            case_when(h9%in%c(1,2,3) & source==1 ~ 1, TRUE ~ 0)) %>%
    set_value_labels(ch_meas_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_meas_card = "Measles vaccination according to card")

  # *** All vaccinations ***
  KRvac <- KRvac %>%
    mutate(ch_allvac_either = 
            case_when(ch_bcg_either==1&ch_pent3_either==1&ch_polio3_either==1&ch_meas_either==1 ~ 1, TRUE ~ 0)) %>%
    set_value_labels(ch_allvac_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_allvac_either = "All basic vaccinations according to either source")

  KRvac <- KRvac %>%
    mutate(ch_allvac_moth = 
            case_when(ch_bcg_either==1&ch_pent3_either==1&ch_polio3_either==1&ch_meas_either==1 & source==2 ~ 1, TRUE ~ 0)) %>%
    set_value_labels(ch_allvac_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_allvac_moth = "All basic vaccinations according to mother")

  KRvac <- KRvac %>%
    mutate(ch_allvac_card = 
            case_when(ch_bcg_either==1&ch_pent3_either==1&ch_polio3_either==1&ch_meas_either==1 & source==1 ~ 1, TRUE ~ 0)) %>%
    set_value_labels(ch_allvac_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_allvac_card = "All basic vaccinations according to card")

  # *** No vaccinations ***
  KRvac <- KRvac %>%
    mutate(ch_novac_either = 
            case_when(ch_bcg_either==0&ch_pent1_either==0&ch_pent2_either==0&ch_pent3_either==0& 
                      ch_polio0_either==0&ch_polio1_either==0&ch_polio2_either==0&ch_polio3_either==0&
                      ch_meas_either==0 ~ 1, 
                      TRUE ~ 0)) %>%
    set_value_labels(ch_novac_either = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_novac_either = "No vaccinations according to either source")

  KRvac <- KRvac %>%
    mutate(ch_novac_moth = 
            case_when(ch_bcg_either==0&ch_pent1_either==0&ch_pent2_either==0&ch_pent3_either==0& 
                        ch_polio0_either==0&ch_polio1_either==0&ch_polio2_either==0&ch_polio3_either==0&
                        ch_meas_either==0& source==2 ~ 1, 
                        TRUE ~ 0)) %>%  
    set_value_labels(ch_novac_moth = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_novac_moth = "No vaccinations according to mother")

  KRvac <- KRvac %>%
    mutate(ch_novac_card = 
            case_when(ch_bcg_either==0&ch_pent1_either==0&ch_pent2_either==0&ch_pent3_either==0& 
                        ch_polio0_either==0&ch_polio1_either==0&ch_polio2_either==0&ch_polio3_either==0&
                        ch_meas_either==0& source==1 ~ 1, 
                        TRUE ~ 0)) %>% 
    set_value_labels(ch_novac_card = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_novac_card = "No vaccinations according to card")
  # 
  # *** vaccination card possession ***
  KRvac <- KRvac %>%
    mutate(ch_card_ever_had = 
            case_when(h1%in%c(1,2,3) ~ 1, TRUE  ~ 0  )) %>%
    set_value_labels(ch_card_ever_had = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_card_ever_had = "Ever had a vaccination card")

  KRvac <- KRvac %>%
    mutate(ch_card_seen = 
            case_when(h1==1 ~ 1, TRUE  ~ 0  )) %>%
    set_value_labels(ch_card_seen = c("Yes" = 1, "No"=0)) %>%
    set_variable_labels(ch_card_seen = "Vaccination card seen")

  # KRvac <- KRvac %>%
  #   dplyr::mutate_all(as.character)

  ri_indicators <- bind_rows(ri_indicators, KRvac)

}

In [0]:
# SUBSET AND BIND TOGETHER
KRvac1 <- ri_indicators %>%
  dplyr::select(survey, caseid, midx, v001, wt, v021, v023, v024, v025, b19, v008, b3, age, agegroup, h1, source, h2, ch_bcg_either, ch_bcg_moth, ch_bcg_card, h3, dpt1, h5, dpt2, h7, dpt3, dptsum, ch_pent1_either, ch_pent2_either, ch_pent3_either, ch_pent1_moth, ch_pent2_moth, ch_pent3_moth, ch_pent1_card, ch_pent2_card, ch_pent3_card, h4, polio1, h6, polio2, h8, polio3, poliosum, ch_polio0_either, ch_polio1_either, ch_polio2_either, ch_polio3_either, ch_polio0_moth, ch_polio1_moth, ch_polio2_moth, ch_polio3_moth, h0, ch_polio0_card, ch_polio1_card, ch_polio2_card, ch_polio3_card, h9, ch_meas_either, ch_meas_moth, ch_meas_card, ch_allvac_either, ch_allvac_moth, ch_allvac_card, ch_novac_either, ch_novac_moth, ch_novac_card, ch_card_ever_had, ch_card_seen) %>%
  dplyr::mutate(across(where(is.labelled), as_factor)) %>%
  dplyr::mutate_if(is.factor, as.character) %>%
  distinct()

KRvac1 <- KRvac1 %>%
  base::merge(pts_comb1, by=c("survey", "v001"), all.x=TRUE) %>%
  distinct()

In [0]:
# THESE HAVE BOGUS GPS COORDINATES, CAN FIX MOST OF THEM BY HAND USING THE SURVEY STRATIFICATION VARIABLES
display(
  KRvac1 %>%
  filter(is.na(admin1)) %>%
  group_by(survey) %>%
  dplyr::summarize(count = n())
  )

In [0]:
KRvac1 %>%
group_by(ch_meas_either) %>%
dplyr::summarize(count = n())

#### Summarize to state level

In [0]:
# KRvac_cntry <- KRvac1 %>%
#   group_by(survey, survey_year) %>%
#   dplyr::summarize(ch_meas_either = sum(ifelse(ch_meas_either=="Yes", wt, 0), na.rm=TRUE),
#                    ch_meas_moth = sum(ifelse(ch_meas_moth=="Yes", wt, 0), na.rm=TRUE),
#                    ch_meas_card = sum(ifelse(ch_meas_card=="Yes", wt, 0), na.rm=TRUE),
#                    record_count = sum(wt[!is.na(ch_meas_either)], na.rm=TRUE),
#                    prop_meas_either = round(ch_meas_either/record_count, 3),
#                    prop_ch_meas_moth = round(ch_meas_moth/record_count, 3),
#                    prop_ch_meas_card = round(ch_meas_card/record_count, 3)) %>%
#   dplyr::mutate(admin1 = "All")

# KRvac_admin1 <- KRvac1 %>%
#   group_by(survey, survey_year, admin1) %>%
#   dplyr::summarize(ch_meas_either = sum(ifelse(ch_meas_either=="Yes", wt, 0), na.rm=TRUE),
#                   ch_meas_moth = sum(ifelse(ch_meas_moth=="Yes", wt, 0), na.rm=TRUE),
#                   ch_meas_card = sum(ifelse(ch_meas_card=="Yes", wt, 0), na.rm=TRUE),
#                   record_count = sum(wt[!is.na(ch_meas_either)], na.rm=TRUE),
#                   prop_meas_either = round(ch_meas_either/record_count, 3),
#                   prop_ch_meas_moth = round(ch_meas_moth/record_count, 3),
#                   prop_ch_meas_card = round(ch_meas_card/record_count, 3))

# KRvac_admin1 <- rbind(KRvac_admin1, KRvac_cntry) 

# display(KRvac_cntry)

In [0]:
# display(KRvac_admin1 %>% dplyr::filter(prop_meas_either != 0))

In [0]:
# KRvac_admin1_sdf <- KRvac_admin1 %>%
#   as.DataFrame()

KRvac_sdf <- KRvac1 %>%
  as.DataFrame()

In [0]:
# delta_path = "abfss://dhs-projects@idmdpdls01.dfs.core.windows.net/equity_dive_2024/unity_catalog/ri_summary_indicators.delta"
# write.df(KRvac_admin1_sdf, source = 'delta', path = delta_path, mode = 'overwrite')

# catalog = "idm_dhs_projects"
# schema = "equity_dive_2024"
# table_name = "ri_admin1_summary"
# file_path = delta_path

# # SparkR::sql(paste0("CREATE CATALOG IF NOT EXISTS ", catalog))
# # SparkR::sql(paste0("CREATE SCHEMA IF NOT EXISTS ", catalog, ".", schema))
# SparkR::sql(paste0("DROP TABLE IF EXISTS ", catalog, ".", schema, ".", table_name))
# SparkR::createTable(paste0(catalog, ".", schema, ".", table_name), path=file_path, source="delta")
# print(paste0(catalog, ".", schema, ".", table_name))

In [0]:
delta_path = "abfss://dhs-data@idmdpdls01.dfs.core.windows.net/dhs_indicators/dhs_ch_vac.delta"
dbutils.fs.rm(delta_path, "true")
write.df(KRvac_sdf, source = 'delta', path = delta_path, mode = 'overwrite')

catalog = "idm_dhs"
schema = "dhs_indicators"
table_name = "dhs_ch_vac"
file_path = delta_path

# SparkR::sql(paste0("CREATE CATALOG IF NOT EXISTS ", catalog))
SparkR::sql(paste0("CREATE SCHEMA IF NOT EXISTS ", catalog, ".", schema))
SparkR::sql(paste0("DROP TABLE IF EXISTS ", catalog, ".", schema, ".", table_name))
SparkR::createTable(paste0(catalog, ".", schema, ".", table_name), path=file_path, source="delta")
print(paste0(catalog, ".", schema, ".", table_name))