Amended vignettes and amended all functions and test scriopts to ensu…

…re that examples are corrected for default type=full and that type=standard always includes ALL data fields but no metadata.
ukhsa-collaboration · Mar 1, 2019 · 3426a50 · 3426a50
1 parent 077ff11
commit 3426a50
Show file tree

Hide file tree

Showing 13 changed files with 515 additions and 84 deletions.
diff --git a/.RData b/.RData
diff --git a/R/DSR.R b/R/DSR.R
@@ -12,8 +12,8 @@
 #'               unquoted string referencing a numeric vector or field name from data depending on value of stdpoptype; default = esp2013
 #' @param stdpoptype whether the stdpop has been specified as a vector or a field name from data;
 #'                   quoted string "field" or "vector"; default = "vector"
-#' @param type type of output - defines whether to include metadata columns in output to reference the arguments passed;
-#'             can be "value", "lower", "upper", "standard" (for all 3 previous fields) or "full"; quoted string; default = "full"
+#' @param type defines the data and metadata columns to be included in output;
+#'             can be "value", "lower", "upper", "standard" (for all data) or "full" (for all data and metadata); quoted string; default = "full"
 #' @param confidence the required level of confidence expressed as a number between 0.9 and 1
 #'                   or 90 and 100; numeric; default 0.95
 #' @param multiplier the multiplier used to express the final values (eg 100,000 = rate per 100,000); numeric; default 100,000
@@ -42,7 +42,7 @@
 #'
 #' df %>%
 #'     group_by(indicatorid, year, sex) %>%
-#'     phe_dsr(obs, pop, type = "full")
+#'     phe_dsr(obs, pop, type = "standard")
 #'
 #' @section Notes: User MUST ensure that x, n and stdpop vectors are all ordered by
 #' the same standardisation category values as records will be matched by position. \cr \cr
@@ -151,7 +151,7 @@ phe_dsr <- function(data, x, n, stdpop = esp2013, stdpoptype = "vector", type =
         select(-total_count, -total_pop, -lowercl, -uppercl, -confidence, -statistic, -method)
     } else if (type == "standard") {
         phe_dsr <- phe_dsr %>%
-        select(-total_count, -total_pop, -confidence, -statistic, -method)
+        select(-confidence, -statistic, -method)
     }
 
     return(phe_dsr)

diff --git a/R/ISR.R b/R/ISR.R
@@ -37,7 +37,7 @@
 #'
 #' df %>%
 #'     group_by(indicatorid, year, sex) %>%
-#'     phe_isr(obs, pop, refdf$refcount, refdf$refpop, type="full", confidence=99.8)
+#'     phe_isr(obs, pop, refdf$refcount, refdf$refpop, type="standard", confidence=99.8)
 #'
 #' @section Notes: User MUST ensure that x, n, x_ref and n_ref vectors are all ordered by
 #' the same standardisation category values as records will be matched by position. \cr  \cr
@@ -148,7 +148,7 @@ phe_isr <- function(data, x, n, x_ref, n_ref, refpoptype = "vector",
             select(-observed, -expected, -ref_rate, -lowercl, -uppercl, -confidence, -statistic, -method)
     } else if (type == "standard") {
         phe_isr <- phe_isr %>%
-            select(-observed, -expected, -ref_rate, -confidence, -statistic, -method)
+            select(-confidence, -statistic, -method)
     }
 
     return(phe_isr)

diff --git a/R/Means.R b/R/Means.R
@@ -24,7 +24,7 @@
 #'                   values = c(20,30,40,200,300,400)) %>%
 #'                   group_by(area)
 #' phe_mean(df2,values)
-#' phe_mean(df2,values,type="full", confidence=0.998)
+#' phe_mean(df2,values,type="standard", confidence=0.998)
 #'
 #'
 #' @import dplyr
@@ -83,7 +83,7 @@ phe_mean <- function(data, x, type = "full", confidence=0.95) {
             select(-value_sum, -value_count, -stdev, -lowercl, -uppercl, -confidence, -statistic, -method)
     } else if (type == "standard") {
         phe_mean <- phe_mean %>%
-          select(-value_sum, -value_count, -stdev, -confidence, -statistic, -method)
+          select(-confidence, -statistic, -method)
     }
 
 

diff --git a/R/Quantiles.R b/R/Quantiles.R
@@ -18,7 +18,7 @@
 #'               depending on value of inverttype; default = TRUE (ie highest values assigned to quantile 1)
 #' @param inverttype whether the invert argument has been specified as a single logical value or a field name from data;
 #'                   quoted string "field" or "logical"; default = "logical"
-#' @param type type of output - defines whether to include metadata columns in output to reference the arguments passed; can be "standard" or "full"; quoted string; default = "full"
+#' @param type defines whether to include metadata columns in output to reference the arguments passed; can be "standard" or "full"; quoted string; default = "full"
 #'
 #' @inheritParams phe_dsr
 #'

diff --git a/R/SMR.R b/R/SMR.R
@@ -38,7 +38,7 @@
 #'
 #' df %>%
 #'     group_by(indicatorid, year, sex) %>%
-#'     phe_smr(obs, pop, refdf$refcount, refdf$refpop, type="full", confidence=99.8, refvalue=100)
+#'     phe_smr(obs, pop, refdf$refcount, refdf$refpop, type="standard", confidence=99.8, refvalue=100)
 #'
 #' @section Notes: User MUST ensure that x, n, x_ref and n_ref vectors are all ordered by
 #' the same standardisation category values as records will be matched by position. \cr \cr
@@ -143,7 +143,7 @@ phe_smr <- function(data, x, n, x_ref, n_ref, refpoptype = "vector", type = "ful
             select(-observed, -expected, -lowercl, -uppercl, -confidence, -statistic, -method)
     } else if (type == "standard") {
         phe_smr <- phe_smr %>%
-            select(-observed, -expected, -confidence, -statistic, -method)
+            select(-confidence, -statistic, -method)
     }
 
     return(phe_smr)

diff --git a/tests/testthat/testDSRs.R b/tests/testthat/testDSRs.R
@@ -8,20 +8,20 @@ test_that("dsrs and CIs calculate correctly",{
                check.attributes=FALSE, check.names=FALSE,info="test default")
 
   expect_equal(data.frame(phe_dsr(test_DSR_1976, count, pop, stdpop = test_DSR_1976$esp1976, type="standard")),
-               select(slice(test_DSR_results,12),4:6),
+               select(slice(test_DSR_results,12),2:6),
                check.attributes=FALSE, check.names=FALSE,info="test with user specified vector")
 
   expect_equal(data.frame(phe_dsr(test_DSR_1976, count, pop, stdpop = esp1976, stdpoptype="field", type="standard")),
-               data.frame(select(slice(test_DSR_results,12),4:6)),
+               data.frame(select(slice(test_DSR_results,12),2:6)),
                check.attributes=FALSE, check.names=FALSE,info="test with user specified stdpop by col name")
 
   expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, type="standard",
                stdpop = c(5000, 5500, 5500, 5500, 6000, 6000, 6500, 7000, 7000, 7000, 7000, 6500, 6000, 5500, 5000, 4000, 2500, 1500, 1000))),
-               data.frame(select(slice(test_DSR_results,9:11),1,4:6)),
+               data.frame(select(slice(test_DSR_results,9:11),1:6)),
                check.attributes=FALSE, check.names=FALSE,info="test stdpop as specified vector")
 
   expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, stdpop = esp2013, type="standard")),
-               data.frame(select(slice(test_DSR_results,9:11),1,4:6)),
+               data.frame(select(slice(test_DSR_results,9:11),1:6)),
                check.attributes=FALSE, check.names=FALSE,info="test standard")
 
   expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, stdpop = esp2013, type="value")),
@@ -41,7 +41,7 @@ test_that("dsrs and CIs calculate correctly",{
                check.attributes=FALSE, check.names=FALSE,info="test confidence")
 
   expect_equal(data.frame(phe_dsr(test_multiarea, count, pop, stdpop = esp2013, multiplier=10000, type="standard")),
-               data.frame(select(slice(test_DSR_results,1:3),1,4:6)),
+               data.frame(select(slice(test_DSR_results,1:3),1:6)),
                check.attributes=FALSE, check.names=FALSE,info="test multiplier")
 
 })

diff --git a/tests/testthat/testISRs.R b/tests/testthat/testISRs.R
@@ -9,28 +9,28 @@ test_that("isrs and CIs calculate correctly",{
                check.attributes=FALSE, check.names=FALSE,info="test default")
 
   expect_equal(data.frame(phe_isr(test_ISR_ownref, count, pop, refcount, refpop, refpoptype="field", type="standard")),
-               data.frame(select(slice(test_ISR_results,1:3),1,5:7)),
+               data.frame(select(slice(test_ISR_results,1:3),1:7)),
                check.attributes=FALSE, check.names=FALSE,info="test default with own ref data by col name")
 
   expect_equal(data.frame(phe_isr(select(test_ISR_ownref,-refcount,-refpop), count, pop,
                                   test_ISR_ownref$refcount[1:19], test_ISR_ownref$refpop[1:19], type="standard")),
-               data.frame(select(slice(test_ISR_results,1:3),1,5:7)),
+               data.frame(select(slice(test_ISR_results,1:3),1:7)),
                check.attributes=FALSE, check.names=FALSE,info="test default with own ref data as vector")
 
   expect_equal(data.frame(phe_isr(test_err2, count, pop, x_ref = test_ISR_refdata$refcount,
                                   n_ref = test_ISR_refdata$refpop, type="standard")),
-               data.frame(select(slice(test_ISR_results,25:26),1,5:7)),
+               data.frame(select(slice(test_ISR_results,25:26),1:7)),
                check.attributes=FALSE, check.names=FALSE,info="test zero population")
 
   expect_equal(data.frame(phe_isr(select(test_ISR_ownref,-refcount,-refpop), count, pop, type="standard",
                        x_ref = c(10303,2824,NA,3615,3641,3490,3789,3213,3031,2771,3089,3490,3595,4745,5514,7125,5694,6210,5757),
                        n_ref = c(50520,57173,60213,54659,44345,50128,62163,67423,62899,55463,60479,49974,44140,40888,37239,30819,18136,15325,13918))),
-               data.frame(select(slice(test_ISR_results,1:3),1,5:7)),
+               data.frame(select(slice(test_ISR_results,1:3),1:7)),
                check.attributes=FALSE, check.names=FALSE,info="test ref as specified vector")
 
   expect_equal(data.frame(phe_isr(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount,
                                   n_ref = test_ISR_refdata$refpop, type="standard")),
-               data.frame(select(slice(test_ISR_results,1:3),1,5:7)),
+               data.frame(select(slice(test_ISR_results,1:3),1:7)),
                check.attributes=FALSE, check.names=FALSE,info="test standard")
 
   expect_equal(data.frame(phe_isr(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount,
@@ -50,12 +50,12 @@ test_that("isrs and CIs calculate correctly",{
 
   expect_equal(data.frame(phe_isr(test_multiarea, count, pop, type="standard", x_ref = test_ISR_refdata$refcount,
                                   n_ref = test_ISR_refdata$refpop,confidence = 99.8)),
-               data.frame(select(slice(test_ISR_results,4:6),1,5:7)),
+               data.frame(select(slice(test_ISR_results,4:6),1:7)),
                check.attributes=FALSE, check.names=FALSE,info="test confidence")
 
   expect_equal(data.frame(phe_isr(test_multiarea, count, pop, type="standard", x_ref = test_ISR_refdata$refcount,
                                   n_ref = test_ISR_refdata$refpop, multiplier=1000)),
-               data.frame(select(slice(test_ISR_results,7:9),1,5:7)),
+               data.frame(select(slice(test_ISR_results,7:9),1:7)),
                check.attributes=FALSE, check.names=FALSE,info="test multiplier")
 
 })

diff --git a/tests/testthat/testMeans.R b/tests/testthat/testMeans.R
@@ -6,7 +6,7 @@ test_that("means and CIs calculate correctly",{
                data.frame(select(slice(test_Mean_results,3),2:10)),check.attributes=FALSE, check.names=FALSE,info="test default")
 
   expect_equal(data.frame(phe_mean(test_Mean_Grp,values, type="standard")),
-                          data.frame(select(slice(test_Mean_results,1:2),1,5:7)),check.attributes=FALSE, check.names=FALSE,info="test grouped & standard")
+                          data.frame(select(slice(test_Mean_results,1:2),1:7)),check.attributes=FALSE, check.names=FALSE,info="test grouped & standard")
 
   expect_equal(data.frame(phe_mean(test_Mean_Grp,values, type="value")),
                           data.frame(select(slice(test_Mean_results,1:2),1,5)),check.attributes=FALSE, check.names=FALSE,info="test value")
@@ -18,7 +18,7 @@ test_that("means and CIs calculate correctly",{
                           data.frame(select(slice(test_Mean_results,1:2),1,7)),check.attributes=FALSE, check.names=FALSE,info="test upper")
 
   expect_equal(data.frame(phe_mean(test_Mean_Grp,values, confidence = 99.8, type="standard")),
-                          data.frame(select(slice(test_Mean_results,4:5),1,5:7)),check.attributes=FALSE, check.names=FALSE,info="test confidence")
+                          data.frame(select(slice(test_Mean_results,4:5),1:7)),check.attributes=FALSE, check.names=FALSE,info="test confidence")
 })
 
 

diff --git a/tests/testthat/testSMRs.R b/tests/testthat/testSMRs.R
@@ -19,20 +19,20 @@ test_that("smrs and CIs calculate correctly",{
 
   expect_equal(data.frame(phe_smr(test_err2, count, pop, type="standard", x_ref = test_ISR_refdata$refcount,
                                   n_ref = test_ISR_refdata$refpop, refvalue=100)),
-               data.frame(select(slice(test_ISR_results,27:28),1,5:7)),
+               data.frame(select(slice(test_ISR_results,27:28),1:3,5:7)),
                check.attributes=FALSE, check.names=FALSE,info="test n = 0")
 
   expect_equal(data.frame(phe_smr(select(test_ISR_ownref,-refcount,-refpop), count, pop, type="standard",
                                   x_ref = c(10303,2824,NA,3615,3641,3490,3789,3213,3031,2771,
                                             3089,3490,3595,4745,5514,7125,5694,6210,5757),
                                   n_ref = c(50520,57173,60213,54659,44345,50128,62163,67423,
                                             62899,55463,60479,49974,44140,40888,37239,30819,18136,15325,13918))),
-               data.frame(select(slice(test_ISR_results,13:15),1,5:7)),
+               data.frame(select(slice(test_ISR_results,13:15),1:3,5:7)),
                check.attributes=FALSE, check.names=FALSE,info="test ref as specified vector")
 
   expect_equal(data.frame(phe_smr(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount,
                                   n_ref = test_ISR_refdata$refpop, type="standard")),
-               data.frame(select(slice(test_ISR_results,13:15),1,5:7)),
+               data.frame(select(slice(test_ISR_results,13:15),1:3,5:7)),
                check.attributes=FALSE, check.names=FALSE,info="test standard")
 
   expect_equal(data.frame(phe_smr(test_multiarea, count, pop, x_ref = test_ISR_refdata$refcount,
@@ -53,13 +53,13 @@ test_that("smrs and CIs calculate correctly",{
   expect_equal(data.frame(phe_smr(test_multiarea, count, pop, type="standard",
                                   x_ref = test_ISR_refdata$refcount,
                                   n_ref = test_ISR_refdata$refpop,confidence = 99.8)),
-               data.frame(select(slice(test_ISR_results,16:18),1,5:7)),
+               data.frame(select(slice(test_ISR_results,16:18),1:3,5:7)),
                check.attributes=FALSE, check.names=FALSE,info="test confidence")
 
   expect_equal(data.frame(phe_smr(test_multiarea, count, pop, type="standard",
                                   x_ref = test_ISR_refdata$refcount,
                                   n_ref = test_ISR_refdata$refpop, refvalue=100)),
-               data.frame(select(slice(test_ISR_results,19:21),1,5:7)),
+               data.frame(select(slice(test_ISR_results,19:21),1:3,5:7)),
                check.attributes=FALSE, check.names=FALSE,info="test refvalue")
 
 })

diff --git a/vignettes/DSR-vignette.Rmd b/vignettes/DSR-vignette.Rmd
@@ -39,6 +39,7 @@ The function can be used to calculate DSRs for grouping single or multiple geogr
 | n            | unquoted string  | field name from data containing the populations for each standardisation category (eg ageband) within each grouping set (eg area or indicator)| none          |
 | stdpop       | unquoted string  | standard populations for each standardisation category (eg age band) specified as a field name from data or a vector.    | esp2013         | 
 | stdpoptype   | quoted string    | whether the stdpop argument has been specified as a vector or a field name | "vector"      | 
+| type         | quoted string    | defines the data and metadata columns to include in output. Can by 'value', 'lower', 'upper', 'standard' or 'full' | "full"      | 
 | confidence   | numeric value    | the required level of confidence expressed as a number between 0.9 and 1 or 90 and 100  | 0.95          |
 | multiplier   | numeric value    | the multiplier used to express the final values (eg 100,000 = rate per 100,000 | 100,000       |
 
@@ -145,7 +146,7 @@ wrong <- df %>%
           arrange(desc(ageband)) %>%
           phe_dsr(dths,pop)
 
-# the following statement shows that execution of the phe_dsr function on the incorrectly sorted data frame produces different (and incorrect) results.
+# the following statement shows that execution of the phe_dsr function on the deliberately-incorrectly sorted data frame produces different (and incorrect) results.
 identical(right,wrong)
 ```
 
@@ -161,16 +162,22 @@ By default the function will apply 95% confidence, a 100,000 multiplier and will
 phe_dsr(df, dths, pop)
 ```
 
+</br>
+</br>
+
 Alternatively, we can add further arguments to specify:
 
-* the level of detail required in the output (type)
+* the level of detail required in the output (type = standard will drop metadata fields)
 * the confidence level (confidence)
 * the multiplier (multiplier)
 
 ``` {r alternative dsr}
-phe_dsr(df, dths, pop, type = "full", confidence = 99.8, multiplier = 10000)
+phe_dsr(df, dths, pop, type = "standard", confidence = 99.8, multiplier = 10000)
 ```
 
+</br>
+</br>
+
 ## Alternative Standard Populations
 
 In some cases you may wish to standardise against a different population to the default esp2013 one provided - such as the 1976 European Standard Population or an age and sex standardised population.  There are two ways to specify an alternative standard population:
@@ -192,6 +199,9 @@ phe_dsr(df18,dths,pop,stdpop = esp1976)
 
 ```
 
+</br>
+</br>
+
 #### 2. Append the standard populations to your data frame before executing the function
 In the example below, the esp2013 standard population is appended to our data frame prior to calling the phe_dsr function.  The field name can then be specified in the function call.  If stdpop is specified as a field name we must also tell the function this by specifying stdpoptype = "field" as below:
 
@@ -203,10 +213,13 @@ phe_dsr(df_with_stdpop, dths, pop, stdpop = spop, stdpoptype = "field")
 
 ```
 
+</br>
+</br>
+
 ## And what if the data are not so tidy?
 
 #### Zero deaths for a specific age band within a small geography
-This would be a fairly common scenario - maybe you have Local Authority data and there are no deaths in some of the younger age groups for some of the smaller areas.  
+This would be a fairly common scenario - maybe you have Local Authority data and there are no deaths in some of the younger age groups for some of the smaller areas. From PHEindicatormethods version 1.1.0 onwards, the phe_dsr function can handle this scenario and will automatically assign a zero death count where a death count is missing or recorded as NA.  
 
 Let's fudge a couple of data frames to represent this.  In this example, there are no deaths in the 10-14, 15-20 and 20-14 age bands:
 
@@ -218,14 +231,14 @@ deaths2 <- data.frame(ageband = c(0,5,25,30,35,40,45,50,55,60,65,70,75,80,85,90)
                       dths    = c(1,1, 1, 1, 3, 3, 3, 3,10,10,10,10, 8, 8, 8, 8))
 ```
 
-If we simply join these data frames to produce the input data frame required for the phe_dsr function then we get NA values in the Deaths column and the function will return an error:
+If we join these data frames to produce the input data frame required for the phe_dsr function then we get NA values in the Deaths column.  From PHEindicatormethods version 1.1.0 onwards, the phe_dsr function will return the correct DSR, assuming zero deaths in the age groups with no deaths recorded.  If you are using an earlier version of PHEindicatormethods then an error will be returned.  See what you get.....
 
 ``` {r error test}
 df2 <- left_join(pops2, deaths2, by="ageband")
 phe_dsr(df2, dths, pop)
 ```
 
-The NA values must be replaced with zeros before executing the function: 
+For earlier versions of PHEindicatormethods, the NA values must be replaced with zeros before executing the function: 
 
 ``` {r prep data}
 df3 <- df2 %>%