Skip to content

Commit

Permalink
fixing tests
Browse files Browse the repository at this point in the history
  • Loading branch information
nfultz committed Jul 30, 2018
1 parent bcbbeab commit 59860a4
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 99 deletions.
2 changes: 1 addition & 1 deletion R/aaa_fabricate.R
Expand Up @@ -240,7 +240,7 @@ fabricate <- function(..., data = NULL, N = NULL, ID_label = NULL) {
ret <- if (is_empty(dots)) working_environment else
modify_level_internal(
N = N,
ID_label = ID_label,
ID_label = uu,
data_arguments = dots,
workspace = working_environment
)
Expand Down
106 changes: 18 additions & 88 deletions R/modify_level.R
Expand Up @@ -2,30 +2,30 @@
#'
#' @rdname fabricate
#' @export
modify_level <- function(...) {
do_internal(N=NULL, ..., FUN=modify_level_internal, from="modify_level")
modify_level <- function(..., by=NULL) {
do_internal(N=NULL, ..., by=by, FUN=modify_level_internal, from="modify_level")
}

#' @importFrom rlang eval_tidy
#'
modify_level_internal <- function(N = NULL, ID_label = NULL,
workspace = NULL,
workspace = NULL, by = NULL,
data_arguments=NULL) {


modify_level_internal_checks(ID_label, workspace)

uu <- attr(workspace, "active_df")
uu <- ID_label %||% attr(workspace, "active_df")

df <- active_df(workspace)
df <- workspace[[uu]] %||% active_df(workspace)



# There are two possibilities. One is that we are modifying the lowest level
# of data. In which case, we simply add variables, like if someone called
# add_level with a dataset. To check if that's the world we're in, check if
# we have any duplicates in the ID label:
if (!anyDuplicated(df[[ID_label]])) {
if (!is.character(by)) {
# There is no subsetting going on, but modify_level was used anyway.
N <- nrow(df)

Expand Down Expand Up @@ -65,104 +65,34 @@ modify_level_internal <- function(N = NULL, ID_label = NULL,
attr_names <- grep("^fabricatr::", names(attributes(df)), value = TRUE)
attributes(workspace[[uu]])[attr_names] <- attributes(df)[attr_names]

activate(workspace, uu)
# Return results
return(workspace)
}


# If we're here, then at least some subsetting is used in the modify call
# first, subset to unique observations, then generate new data, then
# re-expand. To do this, we need a mapping between observations and unique
# observations. First, get the unique values of the level:
unique_values_of_level <- unique(df[[ID_label]])
idx <- split(seq_len(nrow(df)), df[by], drop = TRUE)

index_maps <- match(df[[ID_label]], unique_values_of_level)

# Now, which variables are we going to from to (do we need to subset)?
input_variables <- unname(unlist(get_symbols_from_quosures(data_arguments)))
input_variables <- intersect(setdiff(input_variables, ID_label), names(df))
input_variables <- df[input_variables]
for(slice in idx) {
wenv <- import_data_list(df[slice, ,drop=FALSE])

# Level unique variables:
level_unique_variables <- get_unique_variables_by_level(
data = df,
ID_label = ID_label,
superset = input_variables
)
wenv <- modify_level_internal(N, ID_label, wenv, data_arguments=data_arguments)

check_uniqueness_at_level(level_unique_variables, input_variables, ID_label)
ret <- active_df(wenv)

# If new columns were created, preallocate them, ow will be ignored w/ a warning
df[setdiff(names(ret), names(df))] <- NA

# Our subset needs these columns -- the level variable, all the unique
# variables we are going to use to write, and then in case the latter is "",
# remove that dummy obs.:
merged_set <- unique(c(ID_label, setdiff(level_unique_variables, "")))
df[slice, names(ret)] <- ret

# And these rows:
row_indices_keep <- !duplicated(df[[ID_label]])


mode2 <- exists(ID_label, workspace)

# Now subset it:
working_subset <- if(mode2) workspace[[ID_label]] else df[
row_indices_keep,
merged_set,
drop = FALSE
]

# Set the N variable correctly moving forward:
super_N <- nrow(df)
N <- nrow(working_subset)

# Get the subset into a list:
working_data_list <- as.list(working_subset)
# And our original working data frame:
super_working_data_list <- as.list(df)

# Now loop
for (i in names(data_arguments)) {
# Evaluate the formula in an environment consisting of:
# 1) The current working data list
# 2) A list that tells everyone what N means in this context.
# Store it in the currently working data list
working_data_list[[i]] <- expand_or_error(eval_tidy(
data_arguments[[i]],
append(working_data_list, list(N = N))
), N, i, data_arguments[[i]])

# Expand the variable and store it in the actual, expanded working data
# list. Why do we keep these in parallel? Because subsequent variables
# might need the non-expanded version to generate new variables.
super_working_data_list[[i]] <- working_data_list[[i]][index_maps]

# clean up as above
data_arguments[[i]] <- NULL
}

# Before handing back data, ensure it's actually rectangular
super_working_data_list <- check_rectangular(super_working_data_list, super_N)

# Overwrite the working data frame.
workspace[[uu]] <- data.frame(
super_working_data_list,
stringsAsFactors = FALSE,
row.names = NULL
)
workspace[[uu]] <- df

if(mode2) {
working_data_list <- check_rectangular(working_data_list, N)
workspace[[ID_label]] <- data.frame(
working_data_list,
stringsAsFactors = FALSE,
row.names = NULL
)
activate(workspace, ID_label)

}

# Return results
activate(workspace, uu)
workspace

}


Expand Down
2 changes: 1 addition & 1 deletion R/sac_level.R
Expand Up @@ -2,7 +2,7 @@
#'
#' @export
sac_level <- function(by = NULL, ...) {
do_internal(N = NULL, by = by, ..., FUN = sac_level_internal, from="sac_level")
do_internal(N = NULL, by = by, ..., FUN = modify_level_internal, from="sac_level")
}


Expand Down
2 changes: 1 addition & 1 deletion man/fabricate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-split-apply-combine-level.R
Expand Up @@ -2,7 +2,7 @@ context("sac_level()")

test_that("base case",{

out <- fabricate(sleep, sleep=sac_level(by="group", ybar=mean(extra)))
out <- fabricate(sleep, sleep=modify_level(by="group", ybar=mean(extra)))

# ybar constant within group
expect_true(all(tapply(out$ybar, out$group, function(x) all(x == x[1]))))
Expand Down
16 changes: 9 additions & 7 deletions tests/testthat/test-start-with-existing-data.R
Expand Up @@ -51,14 +51,16 @@ test_that("Start with existing multi-level data and add variables", {
})

test_that("Modify variable at wrong level", {
df <- fabricate(
country = add_level(N = 50, population = runif(N, 10000, 20000)),
state = add_level(N = 10, latitude = runif(N, 40, 50)),
town = add_level(N = 5, stop_lights = draw_binary(prob = 0.7, N = N))
expect_error(
df <- fabricate(
country = add_level(N = 50, population = runif(N, 10000, 20000)),
state = add_level(N = 10, latitude = runif(N, 40, 50)),
town = add_level(N = 5, stop_lights = draw_binary(prob = 0.7, N = N)),

state = modify_level(
crime = 0.5 + stop_lights + latitude)
)
)
expect_error(fabricate(df,
state = modify_level(
crime = 0.5 + stop_lights + latitude)))
})

test_that("Import -> nest with special length N, test for #80", {
Expand Down

0 comments on commit 59860a4

Please sign in to comment.