Start converting ahr() and pw_info() to data.table

Merck · Dec 14, 2023 · 24fdc77 · 24fdc77
1 parent 50067c4
commit 24fdc77
Show file tree

Hide file tree

Showing 19 changed files with 136 additions and 87 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: gsDesign2
 Title: Group Sequential Design with Non-Constant Effect
-Version: 1.1.0
+Version: 1.1.0.1
 Authors@R: c(
     person("Keaven", "Anderson", email = "keaven_anderson@merck.com", role = c("aut")),
     person("Yilong", "Zhang", email = "elong0527@gmail.com", role = c("aut")),
@@ -36,6 +36,7 @@ Depends:
     R (>= 3.5.0)
 Imports:
     corpcor,
+    data.table,
     dplyr,
     gsDesign,
     gt,

diff --git a/NAMESPACE b/NAMESPACE
@@ -51,15 +51,21 @@ export(wlr_weight_fh)
 export(wlr_weight_mb)
 export(wlr_weight_n)
 importFrom(Rcpp,sourceCpp)
+importFrom(data.table,":=")
+importFrom(data.table,as.data.table)
+importFrom(data.table,copy)
+importFrom(data.table,first)
+importFrom(data.table,last)
+importFrom(data.table,rbindlist)
+importFrom(data.table,setDF)
+importFrom(data.table,setDT)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,all_of)
 importFrom(dplyr,arrange)
 importFrom(dplyr,desc)
 importFrom(dplyr,filter)
-importFrom(dplyr,first)
 importFrom(dplyr,full_join)
 importFrom(dplyr,group_by)
-importFrom(dplyr,last)
 importFrom(dplyr,lead)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
@@ -70,7 +76,6 @@ importFrom(dplyr,ungroup)
 importFrom(gsDesign,gsDesign)
 importFrom(gsDesign,sfLDOF)
 importFrom(mvtnorm,GenzBretz)
-importFrom(rlang,":=")
 importFrom(stats,pnorm)
 importFrom(stats,qnorm)
 importFrom(stats,stepfun)

diff --git a/R/ahr.R b/R/ahr.R
@@ -31,7 +31,7 @@
 #'   cutoff; this can be a single value or a vector of positive numbers.
 #' @param ratio Ratio of experimental to control randomization.
 #'
-#' @return A tibble with `time` (from `total_duration`),
+#' @return A data frame with `time` (from `total_duration`),
 #'   `ahr` (average hazard ratio), `event` (expected number of events),
 #'   `info` (information under given scenarios), `and` info0
 #'   (information under related null hypothesis) for each value of
@@ -64,15 +64,15 @@
 #'        \item Combine the results for all time points by summarizing the results by adding up the number of events,
 #'       information under the null and the given scenarios.
 #'       }
-#'    \item Return a tibble of overall event count, statistical information and average hazard ratio
+#'    \item Return a data frame of overall event count, statistical information and average hazard ratio
 #'    of each value in total_duration.
 #'    \item Calculation of \code{ahr} for different design scenarios, and the comparison to the
 #'    simulation studies are defined in vignette/AHRVignette.Rmd.
 #'   }
 #' }
 #' \if{html}{The contents of this section are shown in PDF user manual only.}
 #'
-#' @importFrom dplyr filter mutate group_by summarize ungroup first last "%>%"
+#' @importFrom data.table setDF setDT
 #'
 #' @export
 #'
@@ -116,14 +116,16 @@ ahr <- function(
     total_duration = total_duration,
     ratio = ratio
   )
-  ans <- res %>%
-    group_by(time) %>%
-    summarize(
+  setDT(res)
+  ans <- res[,
+    .(
       ahr = exp(sum(log(hr) * event) / sum(event)),
       event = sum(event),
       info = sum(info),
       info0 = sum(info0)
-    ) %>%
-    ungroup()
+    ),
+    by = "time"
+  ]
+  setDF(ans)
   return(ans)
 }
diff --git a/R/expected_accural.R b/R/expected_accural.R
@@ -44,7 +44,7 @@
 #' }
 #' \if{html}{The contents of this section are shown in PDF user manual only.}
 #'
-#' @importFrom dplyr lead
+#' @importFrom dplyr filter lead
 #' @importFrom tibble tibble
 #' @importFrom stats stepfun
 #'

diff --git a/R/expected_event.R b/R/expected_event.R
@@ -32,17 +32,17 @@
 #' @inheritParams ahr
 #' @param total_duration Total follow-up from start of enrollment to data cutoff.
 #' @param simple If default (`TRUE`), return numeric expected number of events,
-#'   otherwise a tibble as described below.
+#'   otherwise a data frame as described below.
 #'
 #' @return The default when `simple = TRUE` is to return the total expected
 #'   number of events as a real number.
-#'   Otherwise, when `simple = FALSE`, a tibble is returned with
+#'   Otherwise, when `simple = FALSE`, a data frame is returned with
 #'   the following variables for each period specified in `fail_rate`:
 #'   - `t`: start of period.
 #'   - `fail_rate`: failure rate during the period.
 #'   - `event`: expected events during the period.
 #'
-#'   The records in the returned tibble correspond to the input tibble `fail_rate`.
+#'   The records in the returned data frame correspond to the input data frame `fail_rate`.
 #'
 #' @section Specification:
 #' \if{latex}{
@@ -54,13 +54,13 @@
 #'    \item Validate if input failure rate contains dropout rate column.
 #'    \item Validate if input trial total follow-up (total duration) is a non-empty vector of positive integers.
 #'    \item Validate if input simple is logical.
-#'    \item Define a tibble with the start opening for enrollment at zero and cumulative duration.
+#'    \item Define a data frame with the start opening for enrollment at zero and cumulative duration.
 #'    Add the event (or failure) time corresponding to the start of the enrollment.
-#'    Finally, add the enrollment rate to the tibble
+#'    Finally, add the enrollment rate to the data frame
 #'    corresponding to the start and end (failure) time.
 #'    This will be recursively used to calculate the expected
 #'    number of events later. For details, see vignette/eEventsTheory.Rmd
-#'    \item Define a tibble including the cumulative duration of failure rates, the corresponding start time of
+#'    \item Define a data frame including the cumulative duration of failure rates, the corresponding start time of
 #'    the enrollment, failure rate and dropout rates.  For details, see vignette/eEventsTheory.Rmd
 #'    \item Only consider the failure rates in the interval of the end failure rate and total duration.
 #'    \item Compute the failure rates over time using \code{stepfun} which is used
@@ -242,8 +242,7 @@ expected_event <- function(
     ans <- do.call(rbind, ans)
     ans$t <- ans$start_fail
     ans <- ans[, c("t", "fail_rate", "event")]
-    ans <- tibble::new_tibble(ans)
-    tibble::validate_tibble(ans)
+    row.names(ans) <- seq_len(nrow(ans))
   }
   return(ans)
 }

diff --git a/R/expected_time.R b/R/expected_time.R
@@ -31,7 +31,7 @@
 #' @param interval An interval that is presumed to include the time at which
 #'   expected event count is equal to `target_event`.
 #'
-#' @return A tibble with `Time` (computed to match events in `target_event`),
+#' @return A data frame with `Time` (computed to match events in `target_event`),
 #'   `AHR` (average hazard ratio), `Events` (`target_event` input),
 #'   `info` (information under given scenarios), and `info0`
 #'   (information under related null hypothesis) for each value of
@@ -41,7 +41,7 @@
 #' \if{latex}{
 #'  \itemize{
 #'    \item Use root-finding routine with `AHR()` to find time at which targeted events accrue.
-#'    \item Return a tibble with a single row with the output from `AHR()` got the specified output.
+#'    \item Return a data frame with a single row with the output from `AHR()` got the specified output.
 #'    }
 #'  }
 #'

diff --git a/R/fixed_design_ahr.R b/R/fixed_design_ahr.R
@@ -30,6 +30,8 @@
 #'
 #' @return A table.
 #'
+#' @importFrom dplyr filter
+#'
 #' @export
 #'
 #' @examples

diff --git a/R/fixed_design_fh.R b/R/fixed_design_fh.R
@@ -32,6 +32,8 @@
 #'
 #' @return A table.
 #'
+#' @importFrom dplyr filter
+#'
 #' @export
 #'
 #' @examples

diff --git a/R/globals.R b/R/globals.R
@@ -19,6 +19,8 @@
 utils::globalVariables(
   unique(
     c(
+      # From data.table expressions
+      ".",
       # From `ahr()`
       c("stratum", "rate", "hr", "treatment", "time", "info0", "info"),
       # From `as_gt.gs_design()`
@@ -110,3 +112,12 @@ utils::globalVariables(
     )
   )
 )
+
+
+# Workaround to remove `R CMD check` NOTE "All declared Imports should be used."
+# https://r-pkgs.org/dependencies-in-practice.html#how-to-not-use-a-package-in-imports
+ignore_unused_imports <- function() {
+  rlang::`:=`
+}
+# Can't use `@importFrom rlang ":="` because it classes with data.table
+# https://github.com/r-lib/rlang/issues/1453
diff --git a/R/pw_info.R b/R/pw_info.R
@@ -72,7 +72,8 @@
 #' }
 #' \if{html}{The contents of this section are shown in PDF user manual only.}
 #'
-#' @importFrom dplyr filter mutate group_by summarize ungroup first last "%>%"
+#' @importFrom data.table ":=" as.data.table copy first last rbindlist setDF
+#' @importFrom dplyr group_by summarize ungroup "%>%"
 #'
 #' @export
 #'
@@ -122,24 +123,33 @@ pw_info <- function(
   check_enroll_rate_fail_rate(enroll_rate, fail_rate)
   check_total_duration(total_duration)
   check_ratio(ratio)
+  enroll_rate <- as.data.table(enroll_rate)
+  class(enroll_rate) <- c("enroll_rate", class(enroll_rate))
+  fail_rate <- as.data.table(fail_rate)
+  class(fail_rate) <- c("fail_rate", class(enroll_rate))
   # compute proportion in each group
   q_e <- ratio / (1 + ratio)
   q_c <- 1 - q_e
   # compute expected events by treatment group, stratum and time period
-  ans <- NULL
   strata <- unique(enroll_rate$stratum)
-  for (td in total_duration) {
-    event <- NULL
-    for (s in strata) {
+  ans_list <- vector(mode = "list", length = length(total_duration) * length(strata))
+  for (i in seq_along(total_duration)) {
+    td <- total_duration[i]
+    event_list <- vector(mode = "list", length = length(strata))
+    for (j in seq_along(strata)) {
+      s <- strata[j]
       # subset to stratum
-      enroll <- enroll_rate %>% filter(stratum == s)
-      fail <- fail_rate %>% filter(stratum == s)
+      enroll <- enroll_rate[stratum == s, ]
+      fail <- fail_rate[stratum == s, ]
       # update enrollment rates
-      enroll_c <- enroll %>% mutate(rate = rate * q_c)
-      enroll_e <- enroll %>% mutate(rate = rate * q_e)
+      enroll_c <- copy(enroll)
+      enroll_c[, rate := rate * q_c]
+      enroll_e <- copy(enroll)
+      enroll_e[, rate := rate * q_e]
       # update failure rates
-      fail_c <- fail
-      fail_e <- fail %>% mutate(fail_rate = fail_rate * hr)
+      fail_c <- copy(fail)
+      fail_e <- copy(fail)
+      fail_e[, fail_rate := fail_rate * hr]
       # compute expected number of events
       event_c <- expected_event(
         enroll_rate = enroll_c,
@@ -154,45 +164,50 @@ pw_info <- function(
         simple = FALSE
       )
       # Combine control and experimental; by period recompute HR, events, information
-      event <- rbind(
-        event_c %>% mutate(treatment = "control"),
-        event_e %>% mutate(treatment = "experimental")
-      ) %>%
-        arrange(t, treatment) %>%
-        ungroup() %>%
-        # recompute HR, events, info by period
-        group_by(t) %>%
-        summarize(
+      setDT(event_c)
+      event_c[, treatment := "control"]
+      setDT(event_e)
+      event_e[, treatment := "experimental"]
+      event_tmp <- rbindlist(list(event_c, event_e))
+      event_tmp <- event_tmp[order(t, treatment), ]
+      # recompute HR, events, info by period
+      event_tmp <- event_tmp[,
+        .(
           stratum = s,
           info = (sum(1 / event))^(-1),
           event = sum(event),
           hr = last(fail_rate) / first(fail_rate)
-        ) %>%
-        rbind(event)
+        ),
+        by = "t"
+      ]
+      event_list[[j]] <- event_tmp
     }
     # summarize events in one stratum
-    ans_new <- event %>%
-      mutate(
-        time = td,
-        ln_hr = log(hr),
-        info0 = event * q_c * q_e
-      ) %>%
-      ungroup() %>%
-      # pool strata together for each time period
-      group_by(time, stratum, hr) %>%
-      summarize(
+    event <- rbindlist(event_list)
+    event[, `:=`(
+      time = td,
+      ln_hr = log(hr),
+      info0 = event * q_c * q_e
+    )]
+    # pool strata together for each time period
+    event <- event[,
+      .(
         t = min(t),
         event = sum(event),
         info0 = sum(info0),
         info = sum(info)
-      )
-    ans <- rbind(ans, ans_new)
+      ),
+      by = .(time, stratum, hr)
+    ]
+    ans_list[[i + j]] <- event
   }
+  ans <- rbindlist(ans_list)
   # output the results
+  ans <- ans[, .(time, stratum, t, hr, event, info, info0)]
   ans <- ans %>%
-    select(time, stratum, t, hr, event, info, info0) %>%
     group_by(time, stratum) %>%
     arrange(t, .by_group = TRUE) %>%
     ungroup()
+  setDF(ans)
   return(ans)
 }
diff --git a/R/utility_tidy_tbl.R b/R/utility_tidy_tbl.R
@@ -33,7 +33,6 @@
 #'   The columns of `table_b` are also included. This is intended for use with `gt()` grouping by
 #'   rows in a.
 #'
-#' @importFrom rlang :=
 #' @importFrom dplyr all_of one_of
 #'
 #' @noRd

diff --git a/man/ahr.Rd b/man/ahr.Rd