Merge 6345631 into 5796637

DeclareDesign · Aug 22, 2018 · 1766a68 · 1766a68
2 parents 5796637 + 6345631
commit 1766a68
Show file tree

Hide file tree

Showing 23 changed files with 195 additions and 111 deletions.
diff --git a/R/block_cluster_two_arm_designer.R b/R/block_cluster_two_arm_designer.R
@@ -7,12 +7,14 @@
 #' Units are assigned to treatment using complete block cluster random assignment. Treatment effects can be specified either by providing \code{control_mean} and \code{treatment_mean}
 #' or by specifying an \code{ate}. Estimation uses differences in means accounting for blocks and clusters.
 #' 
-#' Total N is given by \code{N_blocks*N_clusters_in_block*N_i_in_cluster} 
+#' Total N is given by \code{N_blocks*N_clusters_in_block*N_i_in_cluster}.
 #' 
 #' Normal shocks can be specified at the individual, cluster, and block levels. If individual level shocks are not specified and cluster and block 
 #' level variances sum to less than 1, then individual level shocks are set such that total variance in outcomes equals 1.
 #' 
-#' Key limitations: The designer assumes covariance between potential outcomes at individual level only.
+#' Key limitations: The designer assumes covariance between potential outcomes at the individual level only.
+#' 
+#' See \href{https://declaredesign.org/library/articles/block_cluster_two_arm.html}{vignette online}.
 #' 
 #' @param N_blocks An integer. Number of blocks. Defaults to 1 for no blocks. 
 #' @param N_clusters_in_block An integer. Number of clusters in each block. This is the total \code{N} when \code{N_blocks} and \code{N_i_in_cluster} are at default values. 
@@ -22,10 +24,10 @@
 #' @param sd_i_0 A nonnegative number. Standard deviation of individual level shock in control. For small \code{sd_block} and \code{sd_cluster}, \code{sd_i_0} defaults to make total variance = 1.
 #' @param sd_i_1 A nonnegative number. Standard deviation of individual level shock in treatment. Defaults to \code{sd_i_0}.
 #' @param rho A number in [-1,1]. Correlation in individual shock between potential outcomes for treatment and control.
-#' @param prob A number in [0,1]. Treatment assignment probability.
+#' @param prob A number in (0,1). Treatment assignment probability.
 #' @param control_mean A number. Average outcome in control.
-#' @param ate A number. Average treatment effect. Alternative to specifying \code{treatment_mean}. Note that ate is an argument for the designer but it does not appear as an argument in design code (design code uses \code{control_mean} and \code{treatment_mean} only.) only.
-#' @param treatment_mean A number. Average outcome in treatment. Note: if \code{treatment_mean} is not provided then it is calculated from \code{ate}. If both \code{ate} and  \code{treatment_mean} are provided then only  \code{treatment_mean} is used. 
+#' @param ate A number. Average treatment effect. Alternative to specifying \code{treatment_mean}. Note that \code{ate} is an argument for the designer but it does not appear as an argument in design code (design code uses \code{control_mean} and \code{treatment_mean} only).
+#' @param treatment_mean A number. Average outcome in treatment. If \code{treatment_mean} is not provided then it is calculated as \code{control_mean + ate}. If both \code{ate} and  \code{treatment_mean} are provided then only  \code{treatment_mean} is used. 
 #' @return A block cluster two-arm design.
 #' @author \href{https://declaredesign.org/}{DeclareDesign Team}
 #' @concept experiment 
@@ -50,13 +52,17 @@ block_cluster_two_arm_designer <- function(N_blocks = 1,
                                            control_mean = 0,
                                            ate = 0,
                                            treatment_mean = control_mean + ate
-                                           ){  
+){  
   N <- u_0 <- Y_Z_1 <- Y_Z_0 <- blocks <- clusters <- NULL
+  if(any(N_blocks < 1, N_clusters_in_block < 1, N_i_in_cluster < 1) ||
+     any(!rlang::is_integerish(N_blocks), 
+         !rlang::is_integerish(N_clusters_in_block), 
+         !rlang::is_integerish(N_i_in_cluster))) stop("N_* arguments must be positive integers")
   if(sd_block < 0) stop("sd_block must be nonnegative")
   if(sd_cluster < 0) stop("sd_cluster must be nonnegative")
   if(sd_i_0 < 0) stop("sd_i_0 must be nonnegative")
   if(sd_i_1 < 0) stop("sd_i_1 must be nonnegative")
-  if(prob< 0 || prob > 1) stop("prob must be in [0,1]")
+  if(prob<= 0 || prob >= 1) stop("prob must be in (0,1)")
   if(rho< -1 || rho > 1) stop("correlation must be in [-1,1]")
   {{{    
     # M: Model
@@ -76,7 +82,7 @@ block_cluster_two_arm_designer <- function(N_blocks = 1,
 
     potentials <- declare_potential_outcomes(
       Y ~ (1 - Z) * (control_mean    + u_0*sd_i_0 + u_b + u_c) + 
-          Z *       (treatment_mean  + u_1*sd_i_1 + u_b + u_c) )
+        Z *       (treatment_mean  + u_1*sd_i_1 + u_b + u_c) )
 
     # I: Inquiry
     estimand <- declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0))
@@ -96,7 +102,7 @@ block_cluster_two_arm_designer <- function(N_blocks = 1,
 
     # Design
     block_cluster_two_arm_design <-  population + potentials + estimand + assignment + 
-                                     reveal + estimator
+      reveal + estimator
   }}}
 
   attr(block_cluster_two_arm_design, "code") <- 

diff --git a/R/cluster_sampling_designer.R b/R/cluster_sampling_designer.R
@@ -1,14 +1,17 @@
 #' Create a design for cluster random sampling
 #'
-#' Builds a cluster sampling design of a population with \code{N_clusters} containing \code{N_subjects_per_cluster}. Estimations sample \code{n_clusters} each comprising \code{n_subjects_per_cluster} units. Outcomes within clusters have ICC approximately equal to \code{ICC}.
+#' Builds a cluster sampling design of a population with \code{N_blocks}, \code{N_clusters_in_block} containing \code{N_i_in_cluster}. Estimations sample \code{N_clusters_in_block} each comprising \code{n_i_in_cluster} units. Outcomes within clusters have ICC approximately equal to \code{ICC}.
 #'
 #' @details 
 #' Key limitations: The design assumes clusters draw with equal probability (rather than, for example, proportionate to size).
 #' 
-#' @param N_clusters An integer. Total number of clusters in the population.
-#' @param N_subjects_per_cluster An integer of vector of integers of length \code{N_clusters}. Total number of subjects per cluster in the population.
-#' @param n_clusters An integer. Number of clusters to sample.
-#' @param n_subjects_per_cluster An integer. Number of subjects to sample per cluster.
+#' See \href{https://declaredesign.org/library/articles/cluster_sampling.html}{vignette online}.
+#' 
+#' @param N_blocks An integer. Number of blocks. Defaults to 1 for no blocks. 
+#' @param N_clusters_in_block An integer. Total number of clusters in the population.
+#' @param N_i_in_cluster An integer of vector of integers of length \code{N_clusters_in_block}. Total number of subjects per cluster in the population.
+#' @param N_clusters_in_block An integer. Number of clusters to sample.
+#' @param n_i_in_cluster An integer. Number of subjects to sample per cluster.
 #' @param icc A number in [0,1]. Intra-cluster Correlation Coefficient (ICC). 
 #' @return A cluster sampling design.
 #' @author \href{https://declaredesign.org/}{DeclareDesign Team}
@@ -22,38 +25,42 @@
 #' cluster_sampling_design <- cluster_sampling_designer()
 #' # A design with varying cluster size
 #' cluster_sampling_design <- cluster_sampling_designer(
-#'   N_clusters = 10, N_subjects_per_cluster = 3:12, 
-#'   n_clusters = 5,  n_subjects_per_cluster = 2)
+#'   N_clusters_in_block = 10, N_i_in_cluster = 3:12, 
+#'   n_clusters_in_block = 5,  n_i_in_cluster = 2)
 
-cluster_sampling_designer <- function(N_clusters = 1000,
-                                      N_subjects_per_cluster = 50,
-                                      n_clusters = 100,
-                                      n_subjects_per_cluster = 10,
+cluster_sampling_designer <- function(N_blocks = 1,
+                                      N_clusters_in_block = 1000,
+                                      N_i_in_cluster = 50,
+                                      n_clusters_in_block = 100,
+                                      n_i_in_cluster = 10,
                                       icc = 0.2
 ){
   N <- cluster <- latent <- Y <- u_a <- NULL
-  if(n_clusters > N_clusters) stop(paste0("n_clusters sampled must be smaller than the total number of ", N_clusters, " clusters."))
-  if(n_subjects_per_cluster > min(N_subjects_per_cluster)) stop(paste0("n_subjects_per_cluster must be smaller than the maximum of ", N_subjects_per_cluster, " subjects per cluster."))
+  if(n_clusters_in_block > N_clusters_in_block) stop(paste0("N_clusters_in_block sampled must be smaller than the total number of ", N_clusters_in_block, " clusters."))
+  if(n_i_in_cluster > min(N_i_in_cluster)) stop(paste0("n_i_in_cluster must be smaller than or equal to the minimum of ", N_i_in_cluster, " subjects per cluster."))
+  if(icc < 0 || icc > 1) stop("icc must be a number in [0,1]")
   {{{
     # M: Model
     fixed_pop <-
       declare_population(
-        cluster = add_level(N = N_clusters),
-        subject = add_level(N = N_subjects_per_cluster,
+        block = add_level(N = N_blocks),
+        cluster = add_level(N = N_clusters_in_block),
+        subject = add_level(N = N_i_in_cluster,
                             latent = draw_normal_icc(mean = 0, N = N, clusters = cluster, ICC = icc),
                             Y = draw_ordered(x = latent, breaks = qnorm(seq(0, 1, length.out = 8)))
         )
       )()
 
     population <- declare_population(data = fixed_pop)
-
+    
     # I: Inquiry
     estimand <- declare_estimand(mean(Y), label = "Ybar")
 
     # D: Data Strategy
-    stage_1_sampling <- declare_sampling(clusters = cluster, n = n_clusters, 
+    stage_1_sampling <- declare_sampling(strata = block, 
+                                         clusters = cluster, n = n_clusters_in_block, 
                                          sampling_variable = "Cluster_Sampling_Prob")
-    stage_2_sampling <- declare_sampling(strata = cluster,   n = n_subjects_per_cluster, 
+    stage_2_sampling <- declare_sampling(strata = cluster,   n = n_i_in_cluster, 
                                          sampling_variable = "Within_Cluster_Sampling_Prob")
 
     # A: Answer Strategy
@@ -75,18 +82,19 @@ cluster_sampling_designer <- function(N_clusters = 1000,
   cluster_sampling_design 
 }
 attr(cluster_sampling_designer, "tips") <- list(
-  n_clusters = "Number of clusters to sample",
-  n_subjects_per_cluster = "Number of subjects per cluster to sample",
+  n_clusters_in_block = "Number of clusters to sample",
+  n_i_in_cluster = "Number of subjects per cluster to sample",
   icc = "Intra-cluster Correlation"
 )
 attr(cluster_sampling_designer, "shiny_arguments") <- list(
-  n_clusters = c(100, seq(10, 30, 10)),
-  n_subjects_per_cluster = seq(10, 40, 10),
+  n_clusters_in_block = c(100, seq(10, 30, 10)),
+  n_i_in_cluster = seq(10, 40, 10),
   icc = c(0.2, seq(0.002, .999, by = 0.2))
 )
 attr(cluster_sampling_designer, "description") <- "
-<p> A cluster sampling design that samples <code>n_clusters</code> clusters each comprising 
-    <code>n_subjects_per_cluster</code> units. The population comprises <code>N_clusters</code> with <code>N_subjects_per_cluster</code> units each. Outcomes within clusters have ICC approximately equal to 
-    <code>ICC</code>. 
+<p> A cluster sampling design that samples <code>n_clusters_in_block</code> clusters each
+comprising  <code>n_i_in_cluster</code> units. The population comprises 
+<code>N_clusters_in_block</code> with <code>N_i_in_cluster</code> units each. Outcomes 
+within clusters have ICC approximately equal to <code>ICC</code>. 
 "
 
diff --git a/R/mediation_analysis_designer.R b/R/mediation_analysis_designer.R
@@ -3,7 +3,11 @@
 #' A mediation analysis design that examines the effect of treatment (Z) on mediator (M) and the effect of mediator (M) on outcome (Y) (given Z=0) 
 #' as well as direct effect of treatment (Z) on outcome (Y) (given M=0). Analysis is implemented using an interacted regression model. 
 #' Note this model is not guaranteed to be unbiased despite randomization of Z because of possible violations of sequential ignorability.
-#'
+#' 
+#' @details 
+#' 
+#' See \href{https://declaredesign.org/library/articles/mediation_analysis.html}{vignette online}.
+#' 
 #' @param N An integer. Size of sample.
 #' @param a A number. Parameter governing effect of treatment (Z) on mediator (M).
 #' @param b A number. Effect of mediator (M) on outcome (Y) when Z=0.

diff --git a/R/multi_arm_designer.R b/R/multi_arm_designer.R
@@ -1,14 +1,18 @@
 #' Create a design with multiple experimental arms
 #'
-#' This designer creates a design \code{m_arms} experimental arms, each assigned with equal probabilities.
-#'
+#' Creates a design with \code{m_arms} experimental arms, each assigned with equal probability.
+#' 
+#' @details 
+#' 
+#' See \href{https://declaredesign.org/library/articles/multi_arm.html}{vignette online}.
+#' 
 #' @param N An integer. Sample size.
 #' @param m_arms An integer. Number of arms.
 #' @param outcome_means A numeric vector of length \code{m_arms}.  Average outcome in each arm.
-#' @param sd A nonnegative scalar. Standard deviations for shock for each unit (common across arms).
-#' @param outcome_sds A nonnegative numeric vector of length \code{m_arms}. Standard deviations for additional shock for each unit for each of the arms.
-#' @param conditions A vector of length \code{m_arms}. The names of each arm. It can be numeric or a character without blank spaces. 
-#' @param fixed A character vector. Names of arguments to be fixed in design. By default \code{m_arms} and \code{conditions} are always fixed.
+#' @param sd A nonnegative scalar. Standard deviation of individual-level shock (common across arms).
+#' @param outcome_sds A nonnegative numeric vector of length \code{m_arms}. Standard deviations for condition-level shocks.
+#' @param conditions A vector of length \code{m_arms}. The names of each arm. It can be given as numeric or character class (without blank spaces). 
+#' @param fixed A character vector. Names of arguments to be fixed in design. By default, \code{m_arms} and \code{conditions} are always fixed.
 #' @return A function that returns a design.
 #' @author \href{https://declaredesign.org/}{DeclareDesign Team}
 #' @concept experiment
@@ -22,7 +26,7 @@
 #'
 #'
 #' # A design with different mean and sd in each arm
-#' design <- multi_arm_designer(outcome_means = c(0, 0.5, 2), sd =  c(1, 0.1, 0.5))
+#' design <- multi_arm_designer(outcome_means = c(0, 0.5, 2), outcome_sds =  c(1, 0.1, 0.5))
 #'
 # A design with fixed sds and means. N is the sole modifiable argument.
 #' design <- multi_arm_designer(N = 80, m_arms = 4, outcome_means = 1:4,
@@ -96,7 +100,7 @@ multi_arm_designer <- function(N = 30,
     MARGIN = 1,
     FUN = function(x) paste0("Y_Z_", x)
   ))
-  estimand_names <- paste0("ate_",all_po_pairs[,1],"_",all_po_pairs[,2])
+  estimand_names <- paste0("ate_Y_",all_pairs[,1],"_",all_pairs[,2])
   estimand_list <- mapply(
     FUN = function(x, y){
       quos(mean(!!sym(x) - !!sym(y)))},
@@ -180,4 +184,7 @@ attr(multi_arm_designer, "shiny_arguments") <-
   list(N = c(10, 20, 50))
 
 attr(multi_arm_designer, "tips") <-
-  list(N = "Sample Size")
+  list(N = "Sample size")
+
+attr(multi_arm_designer,"description") <- "
+<p> A design with <code>m_arms</code> experimental arms, each assigned with equal probability."
diff --git a/R/pretest_posttest_designer.R b/R/pretest_posttest_designer.R
@@ -3,11 +3,14 @@
 #' Produces designs in which an outcome Y is observed pre- and post-treatment.
 #' The design allows for individual post-treatment outcomes to be correlated with pre-treatment outcomes
 #' and for at-random missingness in the observation of post-treatment outcomes. 
-#'
+#' @details 
+#' 
+#' See \href{https://declaredesign.org/library/articles/pretest_posttest.html}{vignette online}.
+#' 
 #' @param N An integer. Size of sample.
 #' @param ate A number. Average treatment effect.
-#' @param sd_1 Non negative number. Standard deviation of period 1 shocks.
-#' @param sd_2 Non negative number. Standard deviation of period 2 shocks.
+#' @param sd_1 Nonnegative number. Standard deviation of period 1 shocks.
+#' @param sd_2 Nonnegative number. Standard deviation of period 2 shocks.
 #' @param rho A number in [-1,1]. Correlation in outcomes between pre- and post-test.
 #' @param attrition_rate A number in [0,1]. Proportion of respondents in pre-test data that appear in post-test data.
 #' @return A pretest-posttest design.
@@ -29,8 +32,9 @@ pretest_posttest_designer <- function(N = 100,
                                       attrition_rate = .1)
 {
   u_t1 <- Y_t2_Z_1 <- Y_t2_Z_0 <- Z <- R <- Y_t1 <- Y_t2 <- NULL
-  if(rho < -1 | rho > 1) stop("'rho' must be a value from -1 to 1")
-  if(attrition_rate < 0 || attrition_rate > 1) stop("'attrition_rate' must be a value from 0 to 1")
+  if(rho < -1 || rho > 1) stop("'rho' must be a value in [-1, 1]")
+  if(any(sd_1 < 0, sd_2 < 0)) stop("'sd_1' and 'sd_2' must be nonnegative")
+  if(attrition_rate < 0 || attrition_rate > 1) stop("'attrition_rate' must be in [0,1]")
   {{{
     # M: Model
     population <- declare_population(

diff --git a/R/randomized_response_designer.R b/R/randomized_response_designer.R
@@ -5,10 +5,12 @@
 #' @details 
 #' \code{randomized_response_designer} employs a specific variation of randomized response designs in which respondents are required to report a fixed answer to the sensitive question with a given probability (see Blair, Imai, and Zhou (2015) for alternative applications and estimation strategies).
 #' 
+#' See \href{https://declaredesign.org/library/articles/randomized_response.html}{vignette online}.
+#' 
 #' @param N An integer. Size of sample.
-#' @param prob_forced_yes A number. Probability of a forced yes.
-#' @param prevalence_rate A number. Probability that individual has the sensitive trait.
-#' @param withholding_rate A number. Probability that an individual with the sensitive trait hides it.
+#' @param prob_forced_yes A number in [0,1]. Probability of a forced yes.
+#' @param prevalence_rate A number in [0,1]. Probability that individual has the sensitive trait.
+#' @param withholding_rate A number in [0,1]. Probability that an individual with the sensitive trait hides it.
 #' @return A randomized response design.
 #' @author \href{https://declaredesign.org/}{DeclareDesign Team}
 #' @concept experiment

diff --git a/R/regression_discontinuity_designer.R b/R/regression_discontinuity_designer.R
@@ -1,12 +1,14 @@
 #' Create a regression discontinuity design
 #'
 #' Builds a design with sample from population of size \code{N}. The average treatment effect local to the cutpoint is equal to \code{tau}. It allows for specification of the order of the polynomial regression (\code{poly_order}), cutoff value on the running variable (\code{cutoff}), and size of bandwidth around the cutoff (\code{bandwidth}).
-#'
+#' @details 
+#' See \href{https://declaredesign.org/library/articles/regression_discontinuity.html}{vignette online}.
+#' 
 #' @param N An integer. Size of population to sample from.
 #' @param tau A number. Difference in potential outcomes functions at the threshold.
 #' @param cutoff A number in (0,1). Threshold on running variable beyond which units are treated.
-#' @param bandwidth A number. Bandwidth around threshold from which to include units.
-#' @param poly_order An integer. Order of the polynomial regression used to estimate the jump at the cutoff.
+#' @param bandwidth A number. The value of the bandwidth on both sides of the threshold from which to include units.
+#' @param poly_order A number greater than or equal to 1. Order of the polynomial regression used to estimate the jump at the cutoff.
 #' @return A regression discontinuity design.
 #' @author \href{https://declaredesign.org/}{DeclareDesign Team}
 #' @concept observational
@@ -25,8 +27,8 @@ regression_discontinuity_designer <- function(
   poly_order = 4
 ){
   X <- noise <- Y <- NULL
-  if(! (cutoff < 1 & cutoff > 0)) stop("cutoff must be in (0,1)")
-  if(poly_order < 1) stop("poly_order must be greater than 0.")
+  if(cutoff <= 0 || cutoff >= 1) stop("cutoff must be in (0,1).")
+  if(poly_order < 1) stop("poly_order must be at least 1.")
   {{{
     # M: Model
     control <- function(X) {
@@ -48,7 +50,7 @@ regression_discontinuity_designer <- function(
 
     # D: Data Strategy
     sampling <- declare_sampling(handler = function(data){
-      subset(data,(X > 0 - bandwidth) & X < 0 + bandwidth)})
+      subset(data,(X > 0 - abs(bandwidth)) & X < 0 + abs(bandwidth))})
 
     # A: Answer Strategy 
     estimator <- declare_estimator(