From 2ac8aac51dfdda91c7c753ee5bceecc60f27f88b Mon Sep 17 00:00:00 2001
From: Aaron Rudkin <junk_signups@rudkin.ca>
Date: Thu, 26 Oct 2017 10:43:12 -0700
Subject: [PATCH 1/8] Made the code changes Neil suggested in code review for
 the previous PR.

---
 R/fabricate.R                   | 26 +++++++++--------------
 R/resample_data.R               | 37 +++++++++++++++++----------------
 R/variable_creation_functions.R |  2 +-
 3 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/R/fabricate.R b/R/fabricate.R
index a54026d..1cd97db 100644
--- a/R/fabricate.R
+++ b/R/fabricate.R
@@ -152,24 +152,18 @@ fabricate_data_single_level <- function(data = NULL,
 
   if (!is.null(N)) {
     if (length(N) != 1) {
-      if(is.null(ID_label)) {
-        stop(
-          "At the top level, you must provide a single number to N."
-          )
-      } else {
-        stop(
-          "At the top level, ",
-          ID_label,
-          ", you must provide a single number to N."
-        )
-      }
-    }
-
-    if(is.numeric(N) & any(!N%%1 == 0)) {
-      stop(paste0(
+      stop(
+        "At the top level, ",
+        ifelse(!is.null(ID_label),
+               paste0(ID_label, ", "),
+               ""),
+        "you must provide a single number to N"
+      )
+    } else if(is.numeric(N) & any(!N%%1 == 0)) {
+      stop(
         "The provided N must be an integer number. Provided N was of type ",
         typeof(N)
-      ))
+      )
     }
 
     if(!is.numeric(N)) {
diff --git a/R/resample_data.R b/R/resample_data.R
index ee385b0..c841a72 100644
--- a/R/resample_data.R
+++ b/R/resample_data.R
@@ -60,26 +60,27 @@ resample_data = function(data, N, ID_labels=NULL) {
     return(bootstrap_single_level(data,
                                   N[1],
                                   ID_label=ID_labels[1]))
-  } else {
-    # Do the current bootstrap level
-    current_boot_values = unique(data[, ID_labels[1]])
-    sampled_boot_values = sample(1:length(current_boot_values), N[1], replace=TRUE)
-    app = 0
+  }
+
+  # Do the current bootstrap level
+  current_boot_values = unique(data[, ID_labels[1]])
+  sampled_boot_values = sample(1:length(current_boot_values), N[1], replace=TRUE)
+  app = 0
 
-    # Iterate over each thing chosen at the current level
-    results_all = lapply(sampled_boot_values, function(i) {
-      new_results = resample_data(
-        data[data[, ID_labels[1]] == i, ],
-        N=N[2:length(N)],
-        ID_labels=ID_labels[2:length(ID_labels)]
+  # Iterate over each thing chosen at the current level
+
+  results_all = lapply(sampled_boot_values, function(i) {
+    new_results = resample_data(
+      data[data[, ID_labels[1]] == i, ],
+      N=N[2:length(N)],
+      ID_labels=ID_labels[2:length(ID_labels)]
       )
-    })
-    #res = rbindlist(results_all)
-    res = do.call(rbind, results_all)
-    rownames(res) = NULL
-    # Return to preceding level
-    return(res)
-  }
+  })
+  #res = rbindlist(results_all)
+  res = do.call(rbind, results_all)
+  rownames(res) = NULL
+  # Return to preceding level
+  return(res)
 }
 
 bootstrap_single_level <- function(data, ID_label = NULL, N) {
diff --git a/R/variable_creation_functions.R b/R/variable_creation_functions.R
index bb2de0c..848b33d 100644
--- a/R/variable_creation_functions.R
+++ b/R/variable_creation_functions.R
@@ -154,7 +154,7 @@ draw_discrete <-
       if (length(breaks) < 3) {
         stop("Numeric breaks for ordered data must be of at least length 3.")
       }
-      if (!all(sort(breaks) == breaks)) {
+      if (is.unsorted(breaks)) {
         stop("Numeric breaks must be in ascending order.")
       }
       if(any(breaks[1] > x) | any(breaks[length(breaks)] < x)) {

From 9ed6814880788af0e0f3d8f195cf4b5c3c82ef1a Mon Sep 17 00:00:00 2001
From: Aaron Rudkin <junk_signups@rudkin.ca>
Date: Thu, 26 Oct 2017 10:51:14 -0700
Subject: [PATCH 2/8] Fixed a serious bug which would break bootstrapping in
 cases where the columns being bootstrapped on did not start at 1.

---
 R/resample_data.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/resample_data.R b/R/resample_data.R
index c841a72..16512fc 100644
--- a/R/resample_data.R
+++ b/R/resample_data.R
@@ -65,13 +65,12 @@ resample_data = function(data, N, ID_labels=NULL) {
   # Do the current bootstrap level
   current_boot_values = unique(data[, ID_labels[1]])
   sampled_boot_values = sample(1:length(current_boot_values), N[1], replace=TRUE)
-  app = 0
 
   # Iterate over each thing chosen at the current level
 
   results_all = lapply(sampled_boot_values, function(i) {
     new_results = resample_data(
-      data[data[, ID_labels[1]] == i, ],
+      data[data[, ID_labels[1]] == current_boot_values[i], ],
       N=N[2:length(N)],
       ID_labels=ID_labels[2:length(ID_labels)]
       )

From 6863e51b79a34516864ffb6e559e448bd70fe91c Mon Sep 17 00:00:00 2001
From: Aaron Rudkin <junk_signups@rudkin.ca>
Date: Thu, 26 Oct 2017 11:30:31 -0700
Subject: [PATCH 3/8] Changed resampling to be more memory and speed efficient.

---
 R/resample_data.R | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/R/resample_data.R b/R/resample_data.R
index 16512fc..b258606 100644
--- a/R/resample_data.R
+++ b/R/resample_data.R
@@ -65,17 +65,19 @@ resample_data = function(data, N, ID_labels=NULL) {
   # Do the current bootstrap level
   current_boot_values = unique(data[, ID_labels[1]])
   sampled_boot_values = sample(1:length(current_boot_values), N[1], replace=TRUE)
+  # Split indices of data frame by the thing we're strapping on
+  split_data_on_boot_id = split(1:nrow(data), data[,ID_labels[1]])
 
   # Iterate over each thing chosen at the current level
-
   results_all = lapply(sampled_boot_values, function(i) {
+    # Now just get the row IDs from the current bootstrap index and subset using that.
     new_results = resample_data(
-      data[data[, ID_labels[1]] == current_boot_values[i], ],
+      data[unlist(split_data_on_boot_id[i]), ],
       N=N[2:length(N)],
       ID_labels=ID_labels[2:length(ID_labels)]
       )
   })
-  #res = rbindlist(results_all)
+  #res = rbindlist(results_all) # Still trying to decide if we want to add dependencies.
   res = do.call(rbind, results_all)
   rownames(res) = NULL
   # Return to preceding level
@@ -92,16 +94,14 @@ bootstrap_single_level <- function(data, ID_label = NULL, N) {
     } else if(!ID_label %in% colnames(data)) {
       stop("ID label provided is not a column in the data being bootstrapped.")
     }  else {
-      # Bootstrapping unique values of ID_label (i.e. cluster selection when data
-      # are observations, not clusters
-      boot_ids <-
-        sample(unique(data[, ID_label]), size = N, replace = TRUE)
-      # Need to do the unlist-apply approach to ensure each row
-      # is appropriately duplicated. Faster than other ways to map
-      # cluster ids to row ids.
-      boot_indices <- unlist(lapply(boot_ids, function(i) {
-        which(data[, ID_label] == i)
-      }))
+      # What clusters are we bootstrapping over?
+      cluster_IDs = unique(data[, ID_label])
+      # Get cluster IDs (not the actual cluster values, the indices of the clusters)
+      boot_ids = sample(1:length(cluster_IDs), size=N, replace=TRUE)
+      # Split data by cluster ID, storing all row indices associated with that cluster ID
+      indices_split = split(1:nrow(data), data[, ID_label])
+      # Get all row indices associated with every cluster ID combined
+      boot_indices = unlist(indices_split[boot_ids])
     }
     # Grab the relevant rows
     new_data <- data[boot_indices, , drop = FALSE]

From 86abcb5dc339b485fcdd39e3009bca3c676f8bd3 Mon Sep 17 00:00:00 2001
From: Aaron Rudkin <junk_signups@rudkin.ca>
Date: Thu, 26 Oct 2017 14:35:51 -0700
Subject: [PATCH 4/8] Added a skipped test for fabricating and resampling
 extremely large data

---
 tests/testthat/test-bootstrap.R | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/testthat/test-bootstrap.R b/tests/testthat/test-bootstrap.R
index f65bb50..64d7d10 100644
--- a/tests/testthat/test-bootstrap.R
+++ b/tests/testthat/test-bootstrap.R
@@ -24,3 +24,19 @@ test_that("Error handling of Bootstrap", {
   resampled_two_levels <- resample_data(two_levels) # Missing N
   expect_error(resample_data(two_levels, c(100, 10), ID_labels = c("Invalid_ID")))
 })
+
+test_that("Extremely high volume data creation.", {
+  skip("Slows build substantially.")
+  deep_dive_data = fabricate(
+    countries = level(N = 100, gdp = rlnorm(N)),
+    states = level(N = 50, population = rlnorm(N)),
+    cities = level(N = 50, holiday = runif(N, 1, 365)),
+    neighborhoods = level(N = 5, stoplights = draw_binary(x=0.5, N)),
+    houses = level(N = 5, population = runif(N, 1, 5)),
+    people = level(N = population, sex = ifelse(draw_binary(x=0.5, N), "M", "F"))
+  )
+
+  test_resample = resample_data(deep_dive_data,
+                                ID_labels=c("countries", "states", "cities"),
+                                N=c(100, 50, 50))
+})

From a4c94da7ac3aeb72c9ea8b1e603ebdde52326656 Mon Sep 17 00:00:00 2001
From: Aaron Rudkin <junk_signups@rudkin.ca>
Date: Thu, 26 Oct 2017 17:17:42 -0700
Subject: [PATCH 5/8] Updated description file to indicate package is faster
 with data.table

---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2077300..70be011 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -21,4 +21,5 @@ Suggests:
     dplyr,
     knitr,
     rmarkdown
+FasterWith: data.table
 VignetteBuilder: knitr

From f21ab0439a47026360522fb6830d776db548e35e Mon Sep 17 00:00:00 2001
From: Aaron Rudkin <junk_signups@rudkin.ca>
Date: Thu, 26 Oct 2017 18:52:56 -0700
Subject: [PATCH 6/8] Rewrite of resample_data for efficiency, including
 discussion in #21

---
 R/resample_data.R | 158 ++++++++++++++++++++++++++++++----------------
 1 file changed, 102 insertions(+), 56 deletions(-)

diff --git a/R/resample_data.R b/R/resample_data.R
index b258606..e44e148 100644
--- a/R/resample_data.R
+++ b/R/resample_data.R
@@ -28,83 +28,129 @@
 #'
 #'
 #' @export
-resample_data = function(data, N, ID_labels=NULL) {
-  # User didn't provide an N or an ID label, it's clear they just want a regular bootstrap
-  if (missing(N) & is.null(ID_labels)) {
-    N <- nrow(data)
-    return(bootstrap_single_level(data, nrow(data), ID_label=NULL))
-  }
+#'
+resample_data = function(data, N, ID_labels=NULL, outer_level=1, use_dt = 0) {
+  # Handle all the data sanity checks in outer_level so we don't have redundant error
+  # checks further down the recursion.
+  if(outer_level) {
+    # Optional usage of data.table to speed up functionality
+    if(requireNamespace("data.table", quietly=T)) {
+      use_dt = 1
+    } else {
+      use_dt = 0
+    }
 
-  # Error handling
-  if (!is.null(ID_labels) & (length(N) != length(ID_labels))) {
-    stop(
-      "If you provide more than one ID_labels to resample data for multilevel data, please provide a vector for N of the same length representing the number to resample at each level."
-    )
-  }
+    # User didn't provide an N or an ID label, it's clear they just want a regular bootstrap
+    # of N units by row.
+    if (missing(N) & is.null(ID_labels)) {
+      return(bootstrap_single_level(data, dim(data)[1], ID_label=NULL))
+    }
 
-  if (any(!ID_labels %in% names(data))) {
-    stop(
-      "One or more of the ID labels you provided are not columns in the data frame provided."
-    )
-  }
+    # No negative or non-numeric Ns
+    # Note: this should be rewritten when we implement the "ALL" option for a level.
+    if (any(!is.numeric(N) | N%%1 | N<=0)) {
+      stop(
+        "All specified Ns must be numeric and at least 1."
+      )
+    }
 
-  if(length(N) > 10) {
-    stop(
-      "Multi-level bootstrap with more than 10 levels is not advised."
-    )
+    # N doesn't match ID labels
+    if (!is.null(ID_labels) & (length(N) != length(ID_labels))) {
+      stop(
+        "If you provide more than one ID_labels to resample data for multilevel data, please provide a vector for N of the same length representing the number to resample at each level."
+      )
+    }
+
+    # ID_labels looking for some columns we don't have
+    if (any(!ID_labels %in% names(data))) {
+      stop(
+        "One or more of the ID labels you provided are not columns in the data frame provided."
+      )
+    }
+
+    # Excessive recursion depth
+    if(length(N) > 10) {
+      stop(
+        "Multi-level bootstrap with more than 10 levels is not advised."
+      )
+    }
   }
 
   # Single level bootstrap with explicit bootstrapping on a particular cluster variable
+  # this is the inner-most recursion
   if(length(N)==1)
   {
     return(bootstrap_single_level(data,
                                   N[1],
-                                  ID_label=ID_labels[1]))
+                                  ID_label=ID_labels[1],
+                                  check_sanity=0))
   }
 
-  # Do the current bootstrap level
-  current_boot_values = unique(data[, ID_labels[1]])
-  sampled_boot_values = sample(1:length(current_boot_values), N[1], replace=TRUE)
+  # OK, if not, we need to recurse
+
   # Split indices of data frame by the thing we're strapping on
-  split_data_on_boot_id = split(1:nrow(data), data[,ID_labels[1]])
+  split_data_on_boot_id = split(seq_len(dim(data)[1]), data[,ID_labels[1]])
+
+  # Do the current bootstrap level
+  # sample.int is faster than sample(1:length(.)) or sample(seq.len(length(.))
+  sampled_boot_values = sample.int(length(split_data_on_boot_id), N[1], replace=TRUE)
 
   # Iterate over each thing chosen at the current level
   results_all = lapply(sampled_boot_values, function(i) {
-    # Now just get the row IDs from the current bootstrap index and subset using that.
-    new_results = resample_data(
-      data[unlist(split_data_on_boot_id[i]), ],
+    # Get rowids from current bootstrap index, subset based on that
+    # pass through the recursed Ns and labels, and remind the inner
+    # layer that it doesn't need to sanity check and we already know
+    # if data.table is around.
+    # The list subset on the split is faster than unlisting
+    resample_data(
+      data[split_data_on_boot_id[i][[1]], ],
       N=N[2:length(N)],
-      ID_labels=ID_labels[2:length(ID_labels)]
-      )
+      ID_labels=ID_labels[2:length(ID_labels)],
+      outer_level=0,
+      use_dt = use_dt
+    )
   })
-  #res = rbindlist(results_all) # Still trying to decide if we want to add dependencies.
-  res = do.call(rbind, results_all)
-  rownames(res) = NULL
+
+  # We could probably gain slight efficiency by only doing the rbind on the
+  # outermost loop.
+  if(!use_dt) {
+    # With no data.table, we need to rbind and then remove row names.
+    # Removing row names is as fast this way as other ways to do the same thing
+    res = do.call(rbind, results_all)
+    rownames(res) = NULL
+  } else {
+    # User has data.table, give them a speed benefit for it
+    res = data.table::rbindlist(results_all)
+    # Strip the things that differentiate data.table from data.frame
+    # so we hand back something identical.
+    class(res) = "data.frame"
+    attr(res, ".internal.selfref") = NULL
+  }
   # Return to preceding level
   return(res)
 }
 
-bootstrap_single_level <- function(data, ID_label = NULL, N) {
-    if(dim(data)[1] == 0) {
-      stop("Data being bootstrapped has no rows.")
-    }
-    if (is.null(ID_label)) {
-      # Simple bootstrap
-      boot_indices <- sample(1:nrow(data), N, replace = TRUE)
-    } else if(!ID_label %in% colnames(data)) {
-      stop("ID label provided is not a column in the data being bootstrapped.")
-    }  else {
-      # What clusters are we bootstrapping over?
-      cluster_IDs = unique(data[, ID_label])
-      # Get cluster IDs (not the actual cluster values, the indices of the clusters)
-      boot_ids = sample(1:length(cluster_IDs), size=N, replace=TRUE)
-      # Split data by cluster ID, storing all row indices associated with that cluster ID
-      indices_split = split(1:nrow(data), data[, ID_label])
-      # Get all row indices associated with every cluster ID combined
-      boot_indices = unlist(indices_split[boot_ids])
-    }
-    # Grab the relevant rows
-    new_data <- data[boot_indices, , drop = FALSE]
+bootstrap_single_level <- function(data, ID_label = NULL, N, check_sanity=1) {
+  # dim slightly faster than nrow
+  if(dim(data)[1] == 0) {
+    stop("Data being bootstrapped has no rows.")
+  }
+
+  if (is.null(ID_label)) {
+    # Simple bootstrap
+    return(data[sample(seq_len(dim(data)[1]), N, replace = TRUE), , drop = F])
+  } else if(!ID_label %in% colnames(data)) {
+    stop("ID label provided is not a column in the data being bootstrapped.")
+  }
 
-    return(new_data)
+  # Split data by cluster ID, storing all row indices associated with that cluster ID
+  # nrow passes through transparently to dim, so this is slightly faster
+  indices_split = split(seq_len(dim(data)[1]), data[, ID_label])
+  # Get cluster IDs (not the actual cluster values, the indices of the clusters)
+  # sample.int is slightly faster than sample(1:length(.)) or sample(seq_len(length(.))
+  boot_ids = sample.int(length(indices_split), size=N, replace=TRUE)
+  # Get all row indices associated with every cluster ID combined
+  boot_indices = unlist(indices_split[boot_ids], recursive=F, use.names=F)
+  # Only take the indices we want (repeats will be handled properly)
+  return(data[boot_indices, , drop=F])
 }

From da29cfb91f6856e8c98590cb32d18bb9697395d5 Mon Sep 17 00:00:00 2001
From: Aaron Rudkin <junk_signups@rudkin.ca>
Date: Thu, 26 Oct 2017 19:30:59 -0700
Subject: [PATCH 7/8] Added test coverage for bootstrapping, made a wrapper
 function to hide internal arguments, added data.table to Suggests

---
 DESCRIPTION                     |  3 ++-
 R/resample_data.R               | 15 ++++++++++-----
 tests/testthat/test-bootstrap.R | 25 ++++++++++++++++++++++++-
 3 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 70be011..aab7bdb 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -20,6 +20,7 @@ Suggests:
     testthat,
     dplyr,
     knitr,
-    rmarkdown
+    rmarkdown,
+    data.table
 FasterWith: data.table
 VignetteBuilder: knitr
diff --git a/R/resample_data.R b/R/resample_data.R
index e44e148..cbaf651 100644
--- a/R/resample_data.R
+++ b/R/resample_data.R
@@ -29,7 +29,13 @@
 #'
 #' @export
 #'
-resample_data = function(data, N, ID_labels=NULL, outer_level=1, use_dt = 0) {
+
+resample_data = function(data, N, ID_labels=NULL) {
+  # Mask internal outer_level and use_dt arguments from view.
+  .resample_data_internal(data, N, ID_labels)
+}
+
+.resample_data_internal = function(data, N, ID_labels=NULL, outer_level=1, use_dt = 0) {
   # Handle all the data sanity checks in outer_level so we don't have redundant error
   # checks further down the recursion.
   if(outer_level) {
@@ -82,8 +88,7 @@ resample_data = function(data, N, ID_labels=NULL, outer_level=1, use_dt = 0) {
   {
     return(bootstrap_single_level(data,
                                   N[1],
-                                  ID_label=ID_labels[1],
-                                  check_sanity=0))
+                                  ID_label=ID_labels[1]))
   }
 
   # OK, if not, we need to recurse
@@ -102,7 +107,7 @@ resample_data = function(data, N, ID_labels=NULL, outer_level=1, use_dt = 0) {
     # layer that it doesn't need to sanity check and we already know
     # if data.table is around.
     # The list subset on the split is faster than unlisting
-    resample_data(
+    .resample_data_internal(
       data[split_data_on_boot_id[i][[1]], ],
       N=N[2:length(N)],
       ID_labels=ID_labels[2:length(ID_labels)],
@@ -130,7 +135,7 @@ resample_data = function(data, N, ID_labels=NULL, outer_level=1, use_dt = 0) {
   return(res)
 }
 
-bootstrap_single_level <- function(data, ID_label = NULL, N, check_sanity=1) {
+bootstrap_single_level <- function(data, ID_label = NULL, N) {
   # dim slightly faster than nrow
   if(dim(data)[1] == 0) {
     stop("Data being bootstrapped has no rows.")
diff --git a/tests/testthat/test-bootstrap.R b/tests/testthat/test-bootstrap.R
index 64d7d10..c44368f 100644
--- a/tests/testthat/test-bootstrap.R
+++ b/tests/testthat/test-bootstrap.R
@@ -22,7 +22,30 @@ test_that("Error handling of Bootstrap", {
   )
 
   resampled_two_levels <- resample_data(two_levels) # Missing N
-  expect_error(resample_data(two_levels, c(100, 10), ID_labels = c("Invalid_ID")))
+
+  # Invalid ID
+  expect_error(resample_data(two_levels, c(100, 10), ID_labels = c("Invalid_ID", "Invalid_ID_2")))
+  # ID length doesn't match n length
+  expect_error(resample_data(two_levels, c(100, 10), ID_labels = c("regions")))
+  # Negative N
+  expect_error(resample_data(two_levels, c(-1), ID_labels = c("regions")))
+  # Non-numeric
+  expect_error(resample_data(two_levels, c("hello world"), ID_labels = c("regions")))
+})
+
+test_that("Direct bootstrap_single_level", {
+  two_levels <- fabricate(
+    regions = level(N = 5, gdp = rnorm(N)),
+    cities = level(N = sample(1:5), subways = rnorm(N, mean = gdp))
+  )
+
+  null_data = two_levels[two_levels$gdp > 100, ]
+  # Trying to bootstrap null data
+  expect_equal(dim(null_data)[1], 0)
+  expect_error(bootstrap_single_level(null_data, ID_label="regions", N=10))
+
+  # Trying to bootstrap single level with an invalid ID.
+  expect_error(bootstrap_single_level(two_levels, ID_label="invalid-id", N=10))
 })
 
 test_that("Extremely high volume data creation.", {

From 910d51b175054a737910c3c6c832f33d9d196087 Mon Sep 17 00:00:00 2001
From: Aaron Rudkin <junk_signups@rudkin.ca>
Date: Tue, 31 Oct 2017 12:25:12 -0700
Subject: [PATCH 8/8] Fixes to allow unit tests to override the data.table
 suggestion so that both branches are explored.

---
 R/resample_data.R               | 5 +++--
 tests/testthat/test-bootstrap.R | 9 ++++++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/R/resample_data.R b/R/resample_data.R
index cbaf651..655fe4a 100644
--- a/R/resample_data.R
+++ b/R/resample_data.R
@@ -35,12 +35,13 @@ resample_data = function(data, N, ID_labels=NULL) {
   .resample_data_internal(data, N, ID_labels)
 }
 
-.resample_data_internal = function(data, N, ID_labels=NULL, outer_level=1, use_dt = 0) {
+.resample_data_internal = function(data, N, ID_labels=NULL, outer_level=1, use_dt = NA) {
   # Handle all the data sanity checks in outer_level so we don't have redundant error
   # checks further down the recursion.
   if(outer_level) {
     # Optional usage of data.table to speed up functionality
-    if(requireNamespace("data.table", quietly=T)) {
+    # Short-circuit on the is.na to only attempt the package load if necessary.
+    if(is.na(use_dt) && requireNamespace("data.table", quietly=T)) {
       use_dt = 1
     } else {
       use_dt = 0
diff --git a/tests/testthat/test-bootstrap.R b/tests/testthat/test-bootstrap.R
index c44368f..89d5f10 100644
--- a/tests/testthat/test-bootstrap.R
+++ b/tests/testthat/test-bootstrap.R
@@ -6,7 +6,14 @@ test_that("Bootstrap", {
     cities = level(N = 5, subways = rnorm(N, mean = gdp))
   )
 
-  resampled_two_levels <- resample_data(two_levels, N = c(2, 2), ID_labels = c("regions", "cities"))
+  # Example with data.table codepath
+  resampled_two_levels <- resample_data(two_levels, N = c(2, 2),
+                                        ID_labels = c("regions", "cities"))
+
+  # Example without data.table codepath
+  resampled_two_levels <- .resample_data_internal(two_levels, N = c(2, 2),
+                                                  ID_labels = c("regions", "cities"),
+                                                  use_dt=0)
 
   expect_equal(nrow(resampled_two_levels), 4)