Skip to content

Commit

Permalink
Added test coverage for bootstrapping, made a wrapper function to hid…
Browse files Browse the repository at this point in the history
…e internal arguments, added data.table to Suggests
  • Loading branch information
aaronrudkin committed Oct 27, 2017
1 parent f21ab04 commit da29cfb
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 7 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Expand Up @@ -20,6 +20,7 @@ Suggests:
testthat,
dplyr,
knitr,
rmarkdown
rmarkdown,
data.table
FasterWith: data.table
VignetteBuilder: knitr
15 changes: 10 additions & 5 deletions R/resample_data.R
Expand Up @@ -29,7 +29,13 @@
#'
#' @export
#'
resample_data = function(data, N, ID_labels=NULL, outer_level=1, use_dt = 0) {

resample_data = function(data, N, ID_labels=NULL) {
# Mask internal outer_level and use_dt arguments from view.
.resample_data_internal(data, N, ID_labels)
}

.resample_data_internal = function(data, N, ID_labels=NULL, outer_level=1, use_dt = 0) {
# Handle all the data sanity checks in outer_level so we don't have redundant error
# checks further down the recursion.
if(outer_level) {
Expand Down Expand Up @@ -82,8 +88,7 @@ resample_data = function(data, N, ID_labels=NULL, outer_level=1, use_dt = 0) {
{
return(bootstrap_single_level(data,
N[1],
ID_label=ID_labels[1],
check_sanity=0))
ID_label=ID_labels[1]))
}

# OK, if not, we need to recurse
Expand All @@ -102,7 +107,7 @@ resample_data = function(data, N, ID_labels=NULL, outer_level=1, use_dt = 0) {
# layer that it doesn't need to sanity check and we already know
# if data.table is around.
# The list subset on the split is faster than unlisting
resample_data(
.resample_data_internal(
data[split_data_on_boot_id[i][[1]], ],
N=N[2:length(N)],
ID_labels=ID_labels[2:length(ID_labels)],
Expand Down Expand Up @@ -130,7 +135,7 @@ resample_data = function(data, N, ID_labels=NULL, outer_level=1, use_dt = 0) {
return(res)
}

bootstrap_single_level <- function(data, ID_label = NULL, N, check_sanity=1) {
bootstrap_single_level <- function(data, ID_label = NULL, N) {
# dim slightly faster than nrow
if(dim(data)[1] == 0) {
stop("Data being bootstrapped has no rows.")
Expand Down
25 changes: 24 additions & 1 deletion tests/testthat/test-bootstrap.R
Expand Up @@ -22,7 +22,30 @@ test_that("Error handling of Bootstrap", {
)

resampled_two_levels <- resample_data(two_levels) # Missing N
expect_error(resample_data(two_levels, c(100, 10), ID_labels = c("Invalid_ID")))

# Invalid ID
expect_error(resample_data(two_levels, c(100, 10), ID_labels = c("Invalid_ID", "Invalid_ID_2")))
# ID length doesn't match n length
expect_error(resample_data(two_levels, c(100, 10), ID_labels = c("regions")))
# Negative N
expect_error(resample_data(two_levels, c(-1), ID_labels = c("regions")))
# Non-numeric
expect_error(resample_data(two_levels, c("hello world"), ID_labels = c("regions")))
})

test_that("Direct bootstrap_single_level", {
two_levels <- fabricate(
regions = level(N = 5, gdp = rnorm(N)),
cities = level(N = sample(1:5), subways = rnorm(N, mean = gdp))
)

null_data = two_levels[two_levels$gdp > 100, ]
# Trying to bootstrap null data
expect_equal(dim(null_data)[1], 0)
expect_error(bootstrap_single_level(null_data, ID_label="regions", N=10))

# Trying to bootstrap single level with an invalid ID.
expect_error(bootstrap_single_level(two_levels, ID_label="invalid-id", N=10))
})

test_that("Extremely high volume data creation.", {
Expand Down

0 comments on commit da29cfb

Please sign in to comment.