diff --git a/R/fabricate.R b/R/fabricate.R index 54ed2c3..335dfc0 100644 --- a/R/fabricate.R +++ b/R/fabricate.R @@ -51,43 +51,20 @@ #' #' @export fabricate <- - function(data = NULL, - N = NULL, - ID_label = NULL, + function(data, + N, + ID_label, ...) { options <- quos(...) - functions_or_not <- - sapply(options, function(i) { - is_lang(get_expr(i)) - }) - - if (length(functions_or_not) > 0) { - options_fn <- - sapply(options[functions_or_not], lang_name) ## function names - if (any(options_fn == "level") & - !all(options_fn == "level")) { - stop( - "Arguments passed to ... must either all be calls to level() or have no calls to level()." - ) - } - all_levels <- - all(options_fn == "level") & - length(options_fn) > 0 & all(functions_or_not) - } else{ - all_levels <- FALSE - } + all_levels <- check_all_levels(options) - if (!is.null(data) & !any(class(data) == "data.frame")) { + if (!missing(data) && !"data.frame" %in% class(data)) { stop( "Please provide a data object to the data argument, e.g. a data.frame, tibble, or sf object." ) } - ID_label <- substitute(ID_label) - if (!is.null(ID_label)) { - ID_label <- as.character(ID_label) - } # check if all the options are level calls if (all_levels) { @@ -95,7 +72,7 @@ fabricate <- # Pop the data from the previous level in the current call # Do this if there existing data to start with or # and beginning with the second level - if (i > 1 | !is.null(data)) { + if (i > 1 | !missing(data)) { options[[i]] <- lang_modify(options[[i]], data_internal_ = data) } @@ -111,12 +88,21 @@ fabricate <- return(data) } else { + if(missing(data)) data <- NULL + if(missing(N)) N <- NULL + if(missing(ID_label)) ID_label <- NULL + + ID_label <- substitute(ID_label) + if (!is.null(ID_label)) { + ID_label <- as.character(ID_label) + } + fabricate_data_single_level( data = data, N = N, ID_label = ID_label, existing_ID = !is.null(data) & is.null(ID_label), - ... = ... + options = options ) } } @@ -127,60 +113,89 @@ fabricate_data_single_level <- function(data = NULL, N = NULL, ID_label = NULL, ..., - existing_ID = FALSE) { - if (sum(!is.null(data),!is.null(N)) != 1) { + existing_ID = FALSE, + options=quos(...)) { + if (is.null(data) == is.null(N)) { stop("Please supply either a data.frame or N and not both.") } - if (is.null(data)) { - if (length(N) > 1) { - stop(paste0( + if (!is.null(N)) { + if (length(N) != 1) { + stop( "At the top level, ", ID_label, ", you must provide a single number to N." - )) + ) } - # make IDs that are nicely padded - data <- - data.frame(sprintf(paste0("%0", nchar(N), "d"), 1:N), stringsAsFactors = FALSE) - - # this creates column names from ID_label - # note if ID_label is NULL that the ID column name is just "ID" -- so safe - colnames(data) <- ifelse(is.null(ID_label), "ID", ID_label) - } else { + data <- data.frame() + existing_ID <- FALSE + } else if(!is.null(data)){ N <- nrow(data) - if (existing_ID == FALSE) { - data[, ifelse(is.null(ID_label), "ID", ID_label)] <- - sprintf(paste0("%0", nchar(nrow(data)), "d"), 1:nrow(data)) - } } - args <- quos(...) - - args_names <- names(args) - - if (length(args) > 0) { - for (i in 1:length(args)) { - # this was changed to move costly data.frame operations inside the loop - # because previously, if you did rnorm(N) in a level - # it literally did a vector of length N, rather than doing - # it for all of the values of the level above it, i.e. if this - # this is the second level and N = 2, it made a vector of length 2 - # even though there were 5 units in the higher level so there should - # have been a vector of 5*2 = 10 - # NB: this is still not super safe; if the expression returns a thing - # of length not exactly to N it's just going to repeat it as it does - # data.frame(data_list). Usually this shouldn't be a problem but we may - # want a warning or error - data_list <- as.list(data) - data_list$N <- N - data_list[[args_names[i]]] <- - eval_tidy(args[[i]], data_list) - data_list$N <- NULL - data <- data.frame(data_list, stringsAsFactors = FALSE) - } + if (!existing_ID) { + if(is.null(ID_label)) ID_label <- "ID" + data <- genID(data, ID_label, N) + } + + + fab(data, options) +} + +# make IDs that are nicely padded +genID <- function(data, ID, N=nrow(data)){ + fmt <- paste0("%0", nchar(N), "d") + data[1:N, ID] <- sprintf(fmt, 1:N) + data +} + +fab <- function(data, args) { + N <- nrow(data) + for (i in names(args)) { + if(i == "") next #Unnamed args are meaningless? + + # this was changed to move costly data.frame operations inside the loop + # because previously, if you did rnorm(N) in a level + # it literally did a vector of length N, rather than doing + # it for all of the values of the level above it, i.e. if this + # this is the second level and N = 2, it made a vector of length 2 + # even though there were 5 units in the higher level so there should + # have been a vector of 5*2 = 10 + # NB: this is still not super safe; if the expression returns a thing + # of length not exactly to N it's just going to repeat it as it does + # data.frame(data_list). Usually this shouldn't be a problem but we may + # want a warning or error + data_list <- as.list(data) + data_list[[i]] <- + eval_tidy(args[[i]], append(data_list, list(N=N))) + + #TODO Factor this out of loop? It's expensive + data <- data.frame(data_list, stringsAsFactors = FALSE, row.names=NULL) + args[[i]] <- NULL } - rownames(data) <- NULL return(data) } + +check_all_levels <- function(options){ + + if (length(options) == 0) return(FALSE) + + + is_function <- sapply(options, function(i) { + is_lang(get_expr(i)) + }) + + is_level <- "level" == sapply(options[is_function], lang_name) ## function names + + if(length(is_level) == 0) return(FALSE) + + if (any(is_level) != all(is_level)) { + stop( + "Arguments passed to ... must either all be calls to level() or have no calls to level()." + ) + } + + is_level[1] && length(is_level) == length(options) +} + diff --git a/R/level.R b/R/level.R index 88050c6..0153ed1 100644 --- a/R/level.R +++ b/R/level.R @@ -32,37 +32,7 @@ level <- ## if data is not provided to fabricate, this part handles the case ## of the top level, where data must be created for the first time - if (is.null(N)) { - stop( - paste0( - "At the top level, ", - ID_label, - ", you must provide N if you did not provide data to fabricate." - ) - ) - } - if (length(N) > 1) { - stop(paste0( - "At the top level, ", - ID_label, - ", you must provide a single number to N." - )) - } - - # make IDs that are nicely padded - data_internal_ <- - data.frame(sprintf(paste0("%0", nchar(N), "d"), 1:N), stringsAsFactors = FALSE) - colnames(data_internal_) <- ID_label - - # now that data_internal_ is the right size, pass to "mutate", i.e., simulate data - - options <- lang_modify(dots, - data = data_internal_, - N = NULL, - ID_label = ID_label) - level_call <- quo(fabricate_data_single_level(!!!options)) - - return(eval_tidy(level_call)) + return(fabricate_data_single_level(N=N, ID_label=ID_label, options=dots)) } else { # at the second level, after data is created, or at the top level if data is provided @@ -80,14 +50,7 @@ level <- N = N) # now that data_internal_ is the right size, pass to "mutate", i.e., simulate data - - options <- lang_modify(dots, - data = data_internal_, - N = NULL, - ID_label = ID_label) - level_call <- quo(fabricate_data_single_level(!!!options)) - - return(eval_tidy(level_call)) + return(fabricate_data_single_level(data_internal_, NULL, ID_label, options=dots)) } else { # otherwise assume you are adding variables to an existing level @@ -103,18 +66,7 @@ level <- unique(data_internal_[, unique(c(ID_label, level_variables)), drop = FALSE]) - # set up - options <- lang_modify( - dots, - data = data, - N = NULL, - ID_label = ID_label, - existing_ID = TRUE - ) - - level_call <- quo(fabricate_data_single_level(!!!options)) - - data <- eval_tidy(level_call) + data <- fabricate_data_single_level(data, NULL, ID_label, existing_ID = TRUE, options=dots) return(merge( data_internal_[, colnames(data_internal_)[!(colnames(data_internal_) %in% diff --git a/man/fabricate.Rd b/man/fabricate.Rd index d854cf0..756b5b8 100644 --- a/man/fabricate.Rd +++ b/man/fabricate.Rd @@ -5,7 +5,7 @@ \alias{level} \title{Fabricate data} \usage{ -fabricate(data = NULL, N = NULL, ID_label = NULL, ...) +fabricate(data, N, ID_label, ...) level(N = NULL, ...) }