Skip to content

Commit

Permalink
Neil's refactor of fabricate and level (#17)
Browse files Browse the repository at this point in the history
* Moving some code around

* Use non-... arguments for passing around expressions

* Update other two usages

* chasing nulls down to tighter scope

* roxygenize

* factoring out genID call

* typo
  • Loading branch information
nfultz authored and aaronrudkin committed Oct 24, 2017
1 parent ba56015 commit 18dd32d
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 125 deletions.
161 changes: 88 additions & 73 deletions R/fabricate.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,51 +51,28 @@
#'
#' @export
fabricate <-
function(data = NULL,
N = NULL,
ID_label = NULL,
function(data,
N,
ID_label,
...) {
options <- quos(...)

functions_or_not <-
sapply(options, function(i) {
is_lang(get_expr(i))
})

if (length(functions_or_not) > 0) {
options_fn <-
sapply(options[functions_or_not], lang_name) ## function names
if (any(options_fn == "level") &
!all(options_fn == "level")) {
stop(
"Arguments passed to ... must either all be calls to level() or have no calls to level()."
)
}
all_levels <-
all(options_fn == "level") &
length(options_fn) > 0 & all(functions_or_not)
} else{
all_levels <- FALSE
}
all_levels <- check_all_levels(options)

if (!is.null(data) & !any(class(data) == "data.frame")) {
if (!missing(data) && !"data.frame" %in% class(data)) {
stop(
"Please provide a data object to the data argument, e.g. a data.frame, tibble, or sf object."
)
}

ID_label <- substitute(ID_label)
if (!is.null(ID_label)) {
ID_label <- as.character(ID_label)
}

# check if all the options are level calls
if (all_levels) {
for (i in seq_along(options)) {
# Pop the data from the previous level in the current call
# Do this if there existing data to start with or
# and beginning with the second level
if (i > 1 | !is.null(data)) {
if (i > 1 | !missing(data)) {
options[[i]] <- lang_modify(options[[i]], data_internal_ = data)
}

Expand All @@ -111,12 +88,21 @@ fabricate <-
return(data)

} else {
if(missing(data)) data <- NULL
if(missing(N)) N <- NULL
if(missing(ID_label)) ID_label <- NULL

ID_label <- substitute(ID_label)
if (!is.null(ID_label)) {
ID_label <- as.character(ID_label)
}

fabricate_data_single_level(
data = data,
N = N,
ID_label = ID_label,
existing_ID = !is.null(data) & is.null(ID_label),
... = ...
options = options
)
}
}
Expand All @@ -127,60 +113,89 @@ fabricate_data_single_level <- function(data = NULL,
N = NULL,
ID_label = NULL,
...,
existing_ID = FALSE) {
if (sum(!is.null(data),!is.null(N)) != 1) {
existing_ID = FALSE,
options=quos(...)) {
if (is.null(data) == is.null(N)) {
stop("Please supply either a data.frame or N and not both.")
}

if (is.null(data)) {
if (length(N) > 1) {
stop(paste0(
if (!is.null(N)) {
if (length(N) != 1) {
stop(
"At the top level, ",
ID_label,
", you must provide a single number to N."
))
)
}
# make IDs that are nicely padded
data <-
data.frame(sprintf(paste0("%0", nchar(N), "d"), 1:N), stringsAsFactors = FALSE)

# this creates column names from ID_label
# note if ID_label is NULL that the ID column name is just "ID" -- so safe
colnames(data) <- ifelse(is.null(ID_label), "ID", ID_label)
} else {
data <- data.frame()
existing_ID <- FALSE
} else if(!is.null(data)){
N <- nrow(data)
if (existing_ID == FALSE) {
data[, ifelse(is.null(ID_label), "ID", ID_label)] <-
sprintf(paste0("%0", nchar(nrow(data)), "d"), 1:nrow(data))
}
}

args <- quos(...)

args_names <- names(args)

if (length(args) > 0) {
for (i in 1:length(args)) {
# this was changed to move costly data.frame operations inside the loop
# because previously, if you did rnorm(N) in a level
# it literally did a vector of length N, rather than doing
# it for all of the values of the level above it, i.e. if this
# this is the second level and N = 2, it made a vector of length 2
# even though there were 5 units in the higher level so there should
# have been a vector of 5*2 = 10
# NB: this is still not super safe; if the expression returns a thing
# of length not exactly to N it's just going to repeat it as it does
# data.frame(data_list). Usually this shouldn't be a problem but we may
# want a warning or error
data_list <- as.list(data)
data_list$N <- N
data_list[[args_names[i]]] <-
eval_tidy(args[[i]], data_list)
data_list$N <- NULL
data <- data.frame(data_list, stringsAsFactors = FALSE)
}
if (!existing_ID) {
if(is.null(ID_label)) ID_label <- "ID"
data <- genID(data, ID_label, N)
}


fab(data, options)
}

# make IDs that are nicely padded
genID <- function(data, ID, N=nrow(data)){
fmt <- paste0("%0", nchar(N), "d")
data[1:N, ID] <- sprintf(fmt, 1:N)
data
}

fab <- function(data, args) {
N <- nrow(data)
for (i in names(args)) {
if(i == "") next #Unnamed args are meaningless?

# this was changed to move costly data.frame operations inside the loop
# because previously, if you did rnorm(N) in a level
# it literally did a vector of length N, rather than doing
# it for all of the values of the level above it, i.e. if this
# this is the second level and N = 2, it made a vector of length 2
# even though there were 5 units in the higher level so there should
# have been a vector of 5*2 = 10
# NB: this is still not super safe; if the expression returns a thing
# of length not exactly to N it's just going to repeat it as it does
# data.frame(data_list). Usually this shouldn't be a problem but we may
# want a warning or error
data_list <- as.list(data)
data_list[[i]] <-
eval_tidy(args[[i]], append(data_list, list(N=N)))

#TODO Factor this out of loop? It's expensive
data <- data.frame(data_list, stringsAsFactors = FALSE, row.names=NULL)
args[[i]] <- NULL
}

rownames(data) <- NULL
return(data)
}

check_all_levels <- function(options){

if (length(options) == 0) return(FALSE)


is_function <- sapply(options, function(i) {
is_lang(get_expr(i))
})

is_level <- "level" == sapply(options[is_function], lang_name) ## function names

if(length(is_level) == 0) return(FALSE)

if (any(is_level) != all(is_level)) {
stop(
"Arguments passed to ... must either all be calls to level() or have no calls to level()."
)
}

is_level[1] && length(is_level) == length(options)
}

54 changes: 3 additions & 51 deletions R/level.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,37 +32,7 @@ level <-

## if data is not provided to fabricate, this part handles the case
## of the top level, where data must be created for the first time
if (is.null(N)) {
stop(
paste0(
"At the top level, ",
ID_label,
", you must provide N if you did not provide data to fabricate."
)
)
}
if (length(N) > 1) {
stop(paste0(
"At the top level, ",
ID_label,
", you must provide a single number to N."
))
}

# make IDs that are nicely padded
data_internal_ <-
data.frame(sprintf(paste0("%0", nchar(N), "d"), 1:N), stringsAsFactors = FALSE)
colnames(data_internal_) <- ID_label

# now that data_internal_ is the right size, pass to "mutate", i.e., simulate data

options <- lang_modify(dots,
data = data_internal_,
N = NULL,
ID_label = ID_label)
level_call <- quo(fabricate_data_single_level(!!!options))

return(eval_tidy(level_call))
return(fabricate_data_single_level(N=N, ID_label=ID_label, options=dots))

} else {
# at the second level, after data is created, or at the top level if data is provided
Expand All @@ -80,14 +50,7 @@ level <-
N = N)

# now that data_internal_ is the right size, pass to "mutate", i.e., simulate data

options <- lang_modify(dots,
data = data_internal_,
N = NULL,
ID_label = ID_label)
level_call <- quo(fabricate_data_single_level(!!!options))

return(eval_tidy(level_call))
return(fabricate_data_single_level(data_internal_, NULL, ID_label, options=dots))

} else {
# otherwise assume you are adding variables to an existing level
Expand All @@ -103,18 +66,7 @@ level <-
unique(data_internal_[, unique(c(ID_label, level_variables)),
drop = FALSE])

# set up
options <- lang_modify(
dots,
data = data,
N = NULL,
ID_label = ID_label,
existing_ID = TRUE
)

level_call <- quo(fabricate_data_single_level(!!!options))

data <- eval_tidy(level_call)
data <- fabricate_data_single_level(data, NULL, ID_label, existing_ID = TRUE, options=dots)

return(merge(
data_internal_[, colnames(data_internal_)[!(colnames(data_internal_) %in%
Expand Down
2 changes: 1 addition & 1 deletion man/fabricate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 18dd32d

Please sign in to comment.