Skip to content

Commit

Permalink
Merge branch 'master' into r-cmd-check
Browse files Browse the repository at this point in the history
  • Loading branch information
graemeblair committed Jun 5, 2018
2 parents 1534779 + 513ef59 commit 8691d81
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 119 deletions.
149 changes: 72 additions & 77 deletions R/declare_potential_outcomes.R
Expand Up @@ -122,75 +122,74 @@ validation_fn(potential_outcomes_handler) <- function(ret, dots, label) {
#' @importFrom fabricatr fabricate
#' @importFrom rlang quos := !! !!! as_quosure
#' @rdname declare_potential_outcomes
potential_outcomes.formula <-
function(formula,
conditions = c(0, 1),
assignment_variables = "Z", # only used to provide a default - read from names of conditions immediately after.
data,
level = NULL,
label = outcome_variable) {
potential_outcomes.formula <- function(formula,
conditions = c(0, 1),
assignment_variables = "Z", # only used to provide a default - read from names of conditions immediately after.
data,
level = NULL,
label = outcome_variable) {

outcome_variable <- as.character(formula[[2]])
outcome_variable <- as.character(formula[[2]])

to_restore <- assignment_variables %i% colnames(data)
to_null <- setdiff(assignment_variables, to_restore)
to_restore <- assignment_variables %i% colnames(data)
to_null <- setdiff(assignment_variables, to_restore)

# Build a single large fabricate call -
# fabricate( Z=1, Y_Z_1=f(Z), Z=2, Y_Z_2=f(Z), ..., Z=NULL)
condition_quos <- quos()
# Build a single large fabricate call -
# fabricate( Z=1, Y_Z_1=f(Z), Z=2, Y_Z_2=f(Z), ..., Z=NULL)
condition_quos <- quos()

### If assn vars already present, swap them out
if (length(to_restore) > 0) {
restore_mangled <- paste(rep("_", max(nchar(colnames(data)))), collapse = "")
### If assn vars already present, swap them out
if (length(to_restore) > 0) {
restore_mangled <- paste(rep("_", max(nchar(colnames(data)))), collapse = "")

restore_mangled <- setNames(
lapply(to_restore, as.symbol),
paste0(".", restore_mangled, to_restore)
)
restore_mangled <- setNames(
lapply(to_restore, as.symbol),
paste0(".", restore_mangled, to_restore)
)

condition_quos <- c(condition_quos, quos(!!!restore_mangled))
condition_quos <- c(condition_quos, quos(!!!restore_mangled))

}
}

# build call
expr = as_quosure(formula)
for (i in 1:nrow(conditions)) {

condition_values <- conditions[i, , drop = FALSE]
out_name <- paste0(outcome_variable, "_", paste0(assignment_variables, "_", condition_values, collapse = "_"))
# build call
expr = as_quosure(formula)
for (i in 1:nrow(conditions)) {

condition_quos <- c(condition_quos, quos(!!!condition_values, !!out_name := !!expr) )
}
condition_values <- conditions[i, , drop = FALSE]
out_name <- paste0(outcome_variable, "_", paste0(assignment_variables, "_", condition_values, collapse = "_"))

# clean up
if (length(to_restore) > 0) {
to_restore <- setNames(
lapply(names(restore_mangled), as.symbol),
to_restore
)
restore_mangled <- lapply(restore_mangled, function(x) NULL)
condition_quos <- c(condition_quos, quos(!!!to_restore), quos(!!!restore_mangled))
}
condition_quos <- c(condition_quos, quos(!!!condition_values, !!out_name := !!expr) )
}

if (length(to_null) > 0) {
to_null <- lapply(setNames(nm = to_null), function(x) NULL)
condition_quos <- c(condition_quos, quos(!!!to_null))
}
# clean up
if (length(to_restore) > 0) {
to_restore <- setNames(
lapply(names(restore_mangled), as.symbol),
to_restore
)
restore_mangled <- lapply(restore_mangled, function(x) NULL)
condition_quos <- c(condition_quos, quos(!!!to_restore), quos(!!!restore_mangled))
}

if (length(to_null) > 0) {
to_null <- lapply(setNames(nm = to_null), function(x) NULL)
condition_quos <- c(condition_quos, quos(!!!to_null))
}

if (is.character(level)) {
condition_quos <- quos(!!level := modify_level(!!!condition_quos))
}

### Actually do it and return
### Note ID_label=NA
structure(
fabricate(data = data,!!!condition_quos, ID_label = NA),
outcome_variable = outcome_variable,
assignment_variables = assignment_variables)

if (is.character(level)) {
condition_quos <- quos(!!level := modify_level(!!!condition_quos))
}

### Actually do it and return
### Note ID_label=NA
structure(
fabricate(data = data,!!!condition_quos, ID_label = NA),
outcome_variable = outcome_variable,
assignment_variables = assignment_variables)

}


validation_fn(potential_outcomes.formula) <- function(ret, dots, label) {
dots$formula <- eval_tidy(dots$formula)
Expand All @@ -213,30 +212,26 @@ validation_fn(potential_outcomes.formula) <- function(ret, dots, label) {
dots$assignment_variables <- names(dots$conditions)


ret <-
build_step(
currydata(
potential_outcomes.formula,
dots,
strictDataParam = attr(ret, "strictDataParam"),
cloneDots = FALSE
),
handler = potential_outcomes.formula,
dots = dots,
label = label,
step_type = attr(ret, "step_type"),
causal_type = attr(ret, "causal_type"),
call = attr(ret, "call")
)


### Note that this sets a design_validation callback for later use!!! see below
### step_meta is the data that design_validation will use for design time checks
structure(ret,
potential_outcomes_formula = formula,
step_meta = list(outcome_variables = outcome_variable,
assignment_variables = names(dots$conditions)),
design_validation = pofdv)
ret <- build_step(currydata(potential_outcomes.formula,
dots,
strictDataParam = attr(ret, "strictDataParam"),
cloneDots = FALSE
),
handler = potential_outcomes.formula,
dots = dots,
label = label,
step_type = attr(ret, "step_type"),
causal_type = attr(ret, "causal_type"),
call = attr(ret, "call"))


### Note that this sets a design_validation callback for later use!!! see below
### step_meta is the data that design_validation will use for design time checks
structure(ret,
potential_outcomes_formula = formula,
step_meta = list(outcome_variables = outcome_variable,
assignment_variables = names(dots$conditions)),
design_validation = pofdv)
}


Expand Down
90 changes: 48 additions & 42 deletions R/utilities.R
Expand Up @@ -5,32 +5,31 @@
# step 1 will have label set to pop
#
#' @importFrom rlang f_text f_env

maybe_add_labels <- function(quotations){

maybe_add_labels <- function(quotations) {

labeller <- function(quotation, lbl) {
cx <- quotation[[2]]

if (lbl == "") {
lbl <- f_text(quotation)
}

if(is.call(cx) && is.symbol(cx[[1]]) && ! "label" %in% names(cx)){

if (is.call(cx) && is.symbol(cx[[1]]) && !"label" %in% names(cx)) {
f <- get0(as.character(cx[[1]]), f_env(quotation), "function") #match.fun does not repect quosures environment, doing get manually
if("declaration" %in% class(f) && "label" %in% names(formals(f))){
if ("declaration" %in% class(f) && "label" %in% names(formals(f))) {
quotation[[2]][["label"]] <- lbl
}
}
quotation
}

for(i in seq_along(quotations)){
if(names(quotations)[i] == "")
for (i in seq_along(quotations)) {
if (names(quotations)[i] == "")
names(quotations)[i] <- f_text(quotations[[i]])
quotations[[i]] <- labeller(quotations[[i]], names(quotations)[i])
}

if (any(duplicated(names(quotations)))) {
stop(paste0("Please provide unique names for each design step. Duplicates include ",
paste(names(quotations)[duplicated(names(quotations))], collapse = ", "),
Expand All @@ -43,26 +42,26 @@ maybe_add_labels <- function(quotations){
###############################################################################
# Helpers for declare_time checking

declare_time_error <- function(message, declaration){
declare_time_error <- function(message, declaration) {
stop( simpleError(message, call = attr(declaration, "call")) )
}

declare_time_warn <- function(message, declaration){
declare_time_warn <- function(message, declaration) {
warning( simpleWarning(message, call = attr(declaration, "call")) )
}

declare_time_error_if_data <- function(declaration){
if("data" %in% names(attr(declaration, "dots")))
declare_time_error_if_data <- function(declaration) {
if ("data" %in% names(attr(declaration, "dots")))
declare_time_error("`data` should not be a declared argument.", declaration)
}

###############################################################################
# Wrapper function, use future_lapply if we have it, fallback to lapply if not


future_lapply <- function(..., future.seed = NA, future.globals=TRUE){
future_lapply <- function(..., future.seed = NA, future.globals=TRUE) {
if (requireNamespace("future.apply", quietly = TRUE)) {
future.apply::future_lapply(..., future.seed=future.seed, future.globals = future.globals)
future.apply::future_lapply(..., future.seed = future.seed, future.globals = future.globals)
} else {
lapply(...)
}
Expand All @@ -73,20 +72,20 @@ future_lapply <- function(..., future.seed = NA, future.globals=TRUE){

# If <= 5 uniques, table it, ow descriptives if numeric-ish, ow number of levels.
describe_variable <- function(x) {

num_unique=length(unique(x))

if(num_unique > 5) {
num_unique <- length(unique(x))
if (num_unique > 5) {
return(describe_variable_impl(x, num_unique))
}

tab <- table(x, exclude = NULL)

rbind.data.frame(
Frequency=data.frame(as.list(tab), check.names = FALSE),
Proportion=sprintf("%.2f", prop.table(tab))
Frequency = data.frame(as.list(tab), check.names = FALSE),
Proportion = sprintf("%.2f", prop.table(tab))
)

}


Expand All @@ -95,7 +94,7 @@ describe_variable_impl <- function(x, num_unique) UseMethod("describe_variable_i
describe_variable_impl.factor <- function(x, num_unique) {
data.frame(
as.list(summary.factor(x, 5)),
N_unique=num_unique,
N_unique = num_unique,
check.names = FALSE
)
}
Expand All @@ -114,8 +113,14 @@ describe_variable_impl.default <- function(x, num_unique) {
#todo just use summary?
data.frame(
lapply(
list(min=min, median=median, mean=mean, max=max, sd=sd),
function(f,x) round(f(x, na.rm=TRUE), digits=2),
list(
min = min,
median = median,
mean = mean,
max = max,
sd = sd
),
function(f, x) round(f(x, na.rm = TRUE), digits = 2),
x
),
N_missing = sum(is.na(x)),
Expand All @@ -131,36 +136,37 @@ describe_variable_impl.default <- function(x, num_unique) {
rbind_disjoint <- function(list_of_df, infill=NA) {
list_of_df <- Filter(is.data.frame, list_of_df)
all_columns <- Reduce(union, lapply(list_of_df, colnames))

for(i in seq_along(list_of_df)) {
for (i in seq_along(list_of_df)) {
list_of_df[[i]][setdiff(all_columns, colnames(list_of_df[[i]]))] <- infill
}

list_of_df <- lapply(list_of_df, `[`, all_columns)

do.call(rbind.data.frame, append(list_of_df, list(make.row.names = FALSE, stringsAsFactors = FALSE)))
}

add_parens <- function(x, digits = 3) {
return(sprintf("(%s)", format_num(x, digits)))
sprintf("(%s)", format_num(x, digits))
}

format_num <- function(x, digits = 3) {
x <- as.numeric(x)
return(sprintf(paste0("%.", digits, "f"), x))
sprintf(paste0("%.", digits, "f"), as.numeric(x))
}

# Function to check whether there are more sims run than expected, possibly because of repeated labels
check_sim_number <- function(simulations_df,
sims,
grouping_variables = c("design_ID", "estimand_label", "estimator_label", "coefficient")) {

group_by_set <- colnames(simulations_df) %i% grouping_variables
group_by_list <- simulations_df[, group_by_set, drop=FALSE]
group_by_list <- simulations_df[, group_by_set, drop = FALSE]
check_df <- split(group_by_list, group_by_list, drop = TRUE)
check_sims <- unlist(lapply(check_df, nrow)) != prod(sims)
if(any(check_sims)) warning(paste0("More simulations than expected for profiles:",
paste0(names(check_sims)[check_sims], collapse = ", ")))
if (any(check_sims)) {
warning(paste0("More simulations than expected for profiles:",
paste0(names(check_sims)[check_sims], collapse = ", ")))
}
}


Expand All @@ -173,7 +179,7 @@ get_added_variables <- function(last_df = NULL, current_df) {
get_modified_variables <- function(last_df = NULL, current_df) {
is_modified <- function(j) !isTRUE(all.equal(last_df[[j]], current_df[[j]]))
shared <- intersect(names(current_df), names(last_df))

Filter(is_modified, shared)
}

0 comments on commit 8691d81

Please sign in to comment.