Big-Life-Lab · kittychenn · Sep 6, 2023 · Sep 18, 2023 · Nov 22, 2023 · Nov 22, 2023
diff --git a/R/recode-with-table.R b/R/recode-with-table.R
@@ -364,6 +364,8 @@ recode_call <-
 #' @param variable_being_checked the name of the recoded variable
 #'
 #' @return the data equivalent of variable_being_checked
+#' 
+#' @keywords internal
 get_data_variable_name <-
   function(data_name,
            data,
@@ -424,6 +426,8 @@ get_data_variable_name <-
 #' @param else_default default else value to use if no else is present
 #'
 #' @return Returns recoded and labeled data
+#' 
+#' @keywords internal
 recode_columns <-
   function(data,
            variables_to_process,
@@ -689,7 +693,7 @@ recode_columns <-
 #' Compare Value Based On Interval
 #'
 #' Compare values on the scientific notation interval
-#'
+#' 
 #' @param left_boundary the min value
 #' @param right_boundary the max value
 #' @param data the data that contains values being compared
@@ -698,6 +702,7 @@ recode_columns <-
 #'
 #' @return a boolean vector containing true for rows where the
 #' comparison is true
+#' @keywords internal
 compare_value_based_on_interval <-
   function(left_boundary,
            right_boundary,
@@ -797,6 +802,8 @@ update_variable_details_based_on_variable_sheet <-
 #' @param var_type the toType of a variable
 #' 
 #' @return an appropriately coded tagged NA
+#' 
+#' @keywords internal
 recode_variable_NA_formating <- function(cell_value, var_type) {
   recode_value <- NULL
   if (grepl("NA", cell_value)) {

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -38,6 +38,8 @@ navbar:
         href: articles/tagged_na_usage.html
       - text: CCHS datasets that overlap each other
         href: articles/duplicate_datasets.html
+      - text: Harmozning variabels using the full dataset
+        href: articles/how_to_harmonize.Rmd
     reference:
       text: Reference
       href: reference/index.html
@@ -141,3 +143,5 @@ reference:
     desc: Utility functions that are used in other cchsflow functions
     contents:
     - if_else2
+    - is_equal
+    - label_data
diff --git a/vignettes/how_to_harmonize.Rmd b/vignettes/how_to_harmonize.Rmd
@@ -0,0 +1,120 @@
+---
+title: "How to harmonize across survey cycles"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Vignette Title}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r setup, include = FALSE}
+knitr::opts_chunk$set(
+  echo = T,
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+## Introduction 
+
+This vignette explains how you can transform variables across multiple CCHS datasets using the full datasets to the _cchsflow_ package. The full PUMF datasets can be found [here](https://odesi.ca/). A full harmonized dataset of all _cchsflow_ variables
-This vignette explains how you can transform variables across multiple CCHS datasets using the full datasets to the _cchsflow_ package. The full PUMF datasets can be found [here](https://odesi.ca/). A full harmonized dataset of all _cchsflow_ variables
+This vignette explains how you can transform variables across multiple Canadian Community Health Survey (CCHS) cycles using complete datasets with the _cchsflow_ package. The Public Use Microdata Files (PUMF) containing the complete data can be found [here](https://odesi.ca/). A full harmonized dataset of all _cchsflow_ variables
+
-This vignette explains how you can transform variables across multiple CCHS datasets using the full datasets to the _cchsflow_ package. The full PUMF datasets can be found [here](https://odesi.ca/). A full harmonized dataset of all _cchsflow_ variables
+This vignette explains how you can transform variables across multiple Canadian Community Health Survey (CCHS) cycles using complete datasets with the _cchsflow_ package. The Public Use Microdata Files (PUMF) containing the complete data can be found [here](https://odesi.ca/). A full harmonized dataset of all _cchsflow_ variables
+
+can be found [here](https://osf.io/j5wgu). With the original PUMF datasets, data file should be renamed such that it specifies the survey and cycle year, which follows the format of the _p sample data (ex. cchs2001_p, cchs2013_2014_p).
+
+To harmonize the data files, the `rec_with_table()` function is used to transform the indicated variables. 
-To harmonize the data files, the `rec_with_table()` function is used to transform the indicated variables. 
+To harmonize the data files, the `cchsflow::rec_with_table()` function is used to transform the indicated variables. 
+
-To harmonize the data files, the `rec_with_table()` function is used to transform the indicated variables. 
+To harmonize the data files, the `cchsflow::rec_with_table()` function is used to transform the indicated variables. 
+
+
+Note: Harmonizing cycles before 2014 with cycles from 2015 onward is not advised as Statistics Canada has made major survey design changes.
+
+## How to combine a single variable across multiple cycles
+
+In this example, the sex variable from 2001 to 2018 CCHS datasets will be transformed and labeled using  `rec_with_table()`, which is then combined into one dataset and labeled using  `merge_rec_data()`.
+
+```{r results= 'hide', message = FALSE, warning=FALSE}
+library(cchsflow)
+```
+
+
+```{r }
+# Harmonize individual datasets
+sex2001 <- rec_with_table(cchs2001_p, "DHH_SEX", log = TRUE)
+sex2003 <- rec_with_table(cchs2003_p, "DHH_SEX", log = TRUE)
+sex2005 <- rec_with_table(cchs2005_p, "DHH_SEX", log = TRUE)
+sex2007_2008 <- rec_with_table(cchs2007_2008_p, "DHH_SEX", log = TRUE)
+sex2009_2010 <- rec_with_table(cchs2009_2010_p, "DHH_SEX", log = TRUE)
+sex2011_2012 <- rec_with_table(cchs2011_2012_p, "DHH_SEX", log = TRUE)
+sex2013_2014 <- rec_with_table(cchs2013_2014_p, "DHH_SEX", log = TRUE)
+sex2015_2016 <- rec_with_table(cchs2015_2016_p, "DHH_SEX", log = TRUE)
+sex2017_2018 <- rec_with_table(cchs2017_2018_p, "DHH_SEX", log = TRUE)
+
+# Merge harmonized data
+combined_sex <- merge_rec_data(sex2001, sex2003, sex2005, sex2007_2008, sex2009_2010, sex2011_2012, sex2013_2014, sex2015_2016, sex2017_2018)
+
+# Summary statistics of combined dataset
+summary(combined_sex)
+```
+
+
+## How to combine multiple variables across multiple cycles
+
+In this example, the continuous age and sex variable from 2001 to 2018 CCHS datasets will be transformed and labeled using  `rec_with_table()`, which is then combined into one dataset and labeled using  `merge_rec_data()`.
+
+```{r ,eval=F, results = "hide"}
+# Harmonize individual datasets
+age_sex2001 <- rec_with_table(cchs2001_p, c("DHHGAGE_cont", "DHH_SEX"))
+age_sex2003 <- rec_with_table(cchs2003_p, c("DHHGAGE_cont", "DHH_SEX"))
+age_sex2005 <- rec_with_table(cchs2005_p, c("DHHGAGE_cont", "DHH_SEX"))
+age_sex2007_2008 <- rec_with_table(cchs2007_2008_p, c("DHHGAGE_cont", "DHH_SEX"))
+age_sex2009_2010 <- rec_with_table(cchs2009_2010_p, c("DHHGAGE_cont", "DHH_SEX"))
+age_sex2011_2012 <- rec_with_table(cchs2011_2012_p, c("DHHGAGE_cont", "DHH_SEX"))
+age_sex2013_2014 <- rec_with_table(cchs2013_2014_p, c("DHHGAGE_cont", "DHH_SEX"))
+age_sex2015_2016 <- rec_with_table(cchs2015_2016_p, c("DHHGAGE_cont", "DHH_SEX"))
+age_sex2017_2018 <- rec_with_table(cchs2017_2018_p, c("DHHGAGE_cont", "DHH_SEX"))
+
+# Merge harmonized data
+combined_age_sex <- merge_rec_data(age_sex2001, age_sex2003, age_sex2005, age_sex2007_2008, age_sex2009_2010, age_sex2011_2012, age_sex2013_2014, age_sex2015_2016, age_sex2017_2018)
+
+```
+
+## How to combine all variables in the variable_details sheet across multiple cycles
+
+To combine a large number of variables, it is best to use `variables.csv` and `variable_details.csv`. There are vignettes that further describe variables and variable_details, including how to add or customize transformed variables.
+
+### Option 1: Using _cchsflow_ variable_details sheet
+
+When the variable argument in `rec_with_table()` is not specified, all variables listed in `variables.csv` and `variable_details.csv` will be transformed. In this example, all variables from the _cchsflow_ `variables.csv` and `variable_details.csv` sheets from 2001 to 2018 CCHS datasets will be transformed and labeled using  `rec_with_table()`, which is then combined into one dataset and labeled using  `merge_rec_data()`.
+
+```{r ,eval=F, results = "hide"}
+# Harmonize individual datasets
+harmonized_2001 <- rec_with_table(cchs2001_p)
+harmonized_2003 <- rec_with_table(cchs2003_p)
+harmonized_2005 <- rec_with_table(cchs2005_p)
+harmonized_2007_2008 <- rec_with_table(cchs2007_2008_p)
+harmonized_2009_2010 <- rec_with_table(cchs2009_2010_p)
+harmonized_2011_2012 <- rec_with_table(cchs2011_2012_p)
+harmonized_2013_2014 <- rec_with_table(cchs2013_2014_p)
+harmonized_2015_2016 <- rec_with_table(cchs2015_2016_p)
+harmonized_2017_2018 <- rec_with_table(cchs2017_2018_p)
+
+# Merge harmonized data
+combined_all_cycles <- merge_rec_data(harmonized_2001, harmonized_2003, harmonized_2005, harmonized_2007_2008, harmonized_2009_2010, harmonized_2011_2012, harmonized_2013_2014, harmonized_2015_2016, harmonized_2017_2018)
+```
+
+### Option 2: Using your own variable_details sheet
+
+In this example, all variables from personalized `variables.csv` and `variable_details.csv` sheets from 2001 to 2018 CCHS datasets will be transformed and labeled using  `rec_with_table()`, which is then combined into one dataset and labeled using  `merge_rec_data()`.
+
+```{r , eval=F, results = "hide"}
+# Harmonize individual datasets
+harmonized_2001 <- rec_with_table(cchs2001_p, variables = sample_variables, variable_details = sample_variable_details)
+harmonized_2003 <- rec_with_table(cchs2003_p, variables = sample_variables, variable_details = sample_variable_details)
+harmonized_2005 <- rec_with_table(cchs2005_p, variables = sample_variables, variable_details = sample_variable_details)
+harmonized_2007_2008 <- rec_with_table(cchs2007_2008_p, variables = sample_variables, variable_details = variable_details)
+harmonized_2009_2010 <- rec_with_table(cchs2009_2010_p, variables = sample_variables, variable_details = sample_variable_details)
+harmonized_2011_2012 <- rec_with_table(cchs2011_2012_p, variables = sample_variables, variable_details = sample_variable_details)
+harmonized_2013_2014 <- rec_with_table(cchs2013_2014_p, variables = sample_variables, variable_details = sample_variable_details)
+harmonized_2015_2016 <- rec_with_table(cchs2015_2016_p, variables = sample_variables, variable_details = sample_variable_details)
+harmonized_2017_2018 <- rec_with_table(cchs2017_2018_p, variables = sample_variables, variable_details = sample_variable_details)
+
+# Merge harmonized data
+combined_all_cycles <- merge_rec_data(harmonized_2001, harmonized_2003, harmonized_2005, harmonized_2007_2008, harmonized_2009_2010, harmonized_2011_2012, harmonized_2013_2014, harmonized_2015_2016, harmonized_2017_2018)
+```
+