From 8aa9f79572b1ab88782639b2a29732e6bbc11e4d Mon Sep 17 00:00:00 2001
From: John Blischak <jdblischak@gmail.com>
Date: Thu, 14 Nov 2024 15:52:16 -0500
Subject: [PATCH] Add argument `formula` to `wlr()`

---
 R/wlr.R                               |  52 +++++++++++--
 man/wlr.Rd                            |  31 +++++++-
 tests/testthat/test-unvalidated-wlr.R | 107 +++++++++++++++++++++++++-
 3 files changed, 178 insertions(+), 12 deletions(-)

diff --git a/R/wlr.R b/R/wlr.R
index 0856abdc..bf510250 100644
--- a/R/wlr.R
+++ b/R/wlr.R
@@ -36,6 +36,15 @@
 #'  - If the `data` is a custom dataset (see Example 2) below,
 #'    + Users are suggested to input the planned randomization ratio to `ratio`;
 #'    + If not, simtrial takes the empirical randomization ratio.
+#' @param formula A formula to specify the columns that contain the
+#'   time-to-event, event, treatment, and stratum variables. Only used by the
+#'   default S3 method because the other classes aleady have the required column
+#'   names. For stratified designs, the formula should have the form `Surv(tte,
+#'   event) ~ treatment + strata(stratum)`, where `tte`, `event`, `treatment`,
+#'   and `stratum` are the column names from `data` with the time-to-event
+#'   measurement, event status, treatment group, and stratum, respectively. For
+#'   unstratified designs, the formula can omit the stratum column: `Surv(tte,
+#'   event) ~ treatment`.
 #'
 #' @return A list containing the test method (`method`),
 #' parameters of this test method (`parameter`),
@@ -134,25 +143,52 @@
 #' x |>
 #'   counting_process(arm = "experimental") |>
 #'   wlr(weight = fh(rho = 0, gamma = 0.5))
-wlr <- function(data, weight, return_variance = FALSE, ratio = NULL) {
+#'
+#' # ---------------------- #
+#' #      Example 3         #
+#' #  Use formula           #
+#' # ---------------------- #
+#' library("survival")
+#'
+#' # Unstratified design
+#' x <- sim_pw_surv(n = 200) |> cut_data_by_event(100) |> as.data.frame()
+#' colnames(x) <- c("tte", "evnt", "strtm", "trtmnt")
+#' wlr(x, weight = fh(0, 0.5), formula = Surv(tte, evnt) ~ trtmnt)
+#'
+#' # Stratified design
+#' x$strtm <- sample(c("s1", "s2"), size = nrow(x), replace = TRUE)
+#' wlr(x, weight = fh(0, 0.5), formula = Surv(tte, evnt) ~ trtmnt + strata(strtm))
+wlr <- function(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL) {
   UseMethod("wlr", data)
 }
 
 #' @rdname wlr
 #' @export
-wlr.default <- function(data, weight, return_variance = FALSE, ratio = NULL) {
+wlr.default <- function(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL) {
+
+  if (!is.null(formula)) {
+    stopifnot(inherits(formula, "formula"))
+    data <- stats::get_all_vars(formula = formula, data = data)
+    colnames(data)[1:3] <- c("tte", "event", "treatment")
+    if (ncol(data) == 4) colnames(data)[4] <- "stratum"
+  }
+
+  if (!all(c("tte", "event",  "treatment") %in% colnames(data))) {
+    stop('Input must have the columns "tte", "event", and "treatment"')
+  }
 
-  if (!all(c("tte", "event", "stratum", "treatment") %in% colnames(data))) {
-    stop('Input must have the columns "tte", "event", "stratum", and "treatment"')
+  if (!"stratum" %in% colnames(data)) {
+    data$stratum <- "All"
   }
 
   wlr.tte_data(data = data, weight = weight, return_variance = return_variance, ratio = ratio)
 }
 
-
 #' @rdname wlr
 #' @export
-wlr.tte_data <- function(data, weight, return_variance = FALSE, ratio = NULL) {
+wlr.tte_data <- function(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL) {
+  if (!is.null(formula)) warning("The formula argument was ignored")
+
   # if the `data` is NOT generated by sim_pw_surv
   # - if user input the randomization ratio, we will directly take its values
   # - otherwise, we calculate the empirical ratio
@@ -171,7 +207,9 @@ wlr.tte_data <- function(data, weight, return_variance = FALSE, ratio = NULL) {
 
 #' @rdname wlr
 #' @export
-wlr.counting_process <- function(data, weight, return_variance = FALSE, ratio = NULL) {
+wlr.counting_process <- function(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL) {
+  if (!is.null(formula)) warning("The formula argument was ignored")
+
   x <- data
 
   if (is.null(ratio)) {
diff --git a/man/wlr.Rd b/man/wlr.Rd
index eaac7cd3..20585f03 100644
--- a/man/wlr.Rd
+++ b/man/wlr.Rd
@@ -7,13 +7,13 @@
 \alias{wlr.counting_process}
 \title{Weighted logrank test}
 \usage{
-wlr(data, weight, return_variance = FALSE, ratio = NULL)
+wlr(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL)
 
-\method{wlr}{default}(data, weight, return_variance = FALSE, ratio = NULL)
+\method{wlr}{default}(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL)
 
-\method{wlr}{tte_data}(data, weight, return_variance = FALSE, ratio = NULL)
+\method{wlr}{tte_data}(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL)
 
-\method{wlr}{counting_process}(data, weight, return_variance = FALSE, ratio = NULL)
+\method{wlr}{counting_process}(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL)
 }
 \arguments{
 \item{data}{Dataset (generated by \code{\link[=sim_pw_surv]{sim_pw_surv()}}) that has been cut by
@@ -43,6 +43,14 @@ there is no need to input the \code{ratio}, as simtrial gets the \code{ratio} vi
 \item If not, simtrial takes the empirical randomization ratio.
 }
 }}
+
+\item{formula}{A formula to specify the columns that contain the
+time-to-event, event, treatment, and stratum variables. Only used by the
+default S3 method because the other classes aleady have the required column
+names. For stratified designs, the formula should have the form \code{Surv(tte, event) ~ treatment + strata(stratum)}, where \code{tte}, \code{event}, \code{treatment},
+and \code{stratum} are the column names from \code{data} with the time-to-event
+measurement, event status, treatment group, and stratum, respectively. For
+unstratified designs, the formula can omit the stratum column: \code{Surv(tte, event) ~ treatment}.}
 }
 \value{
 A list containing the test method (\code{method}),
@@ -143,4 +151,19 @@ x |> wlr(weight = fh(rho = 0, gamma = 0.5))
 x |>
   counting_process(arm = "experimental") |>
   wlr(weight = fh(rho = 0, gamma = 0.5))
+
+# ---------------------- #
+#      Example 3         #
+#  Use formula           #
+# ---------------------- #
+library("survival")
+
+# Unstratified design
+x <- sim_pw_surv(n = 200) |> cut_data_by_event(100) |> as.data.frame()
+colnames(x) <- c("tte", "evnt", "strtm", "trtmnt")
+wlr(x, weight = fh(0, 0.5), formula = Surv(tte, evnt) ~ trtmnt)
+
+# Stratified design
+x$strtm <- sample(c("s1", "s2"), size = nrow(x), replace = TRUE)
+wlr(x, weight = fh(0, 0.5), formula = Surv(tte, evnt) ~ trtmnt + strata(strtm))
 }
diff --git a/tests/testthat/test-unvalidated-wlr.R b/tests/testthat/test-unvalidated-wlr.R
index dae95060..6f82736f 100644
--- a/tests/testthat/test-unvalidated-wlr.R
+++ b/tests/testthat/test-unvalidated-wlr.R
@@ -26,7 +26,7 @@ test_that("wlr() rejects input object without proper columns", {
   x <- mtcars
   expect_error(
     wlr(x),
-    'Input must have the columns "tte", "event", "stratum", and "treatment"'
+    'Input must have the columns "tte", "event", and "treatment"'
   )
 })
 
@@ -58,3 +58,108 @@ test_that("cut_data_by_date() and cut_data_by_event() return the same classes",
 
   expect_identical(class(data_by_event), class(data_by_date))
 })
+
+test_that("wlr() formula argument can rename columns", {
+  x <- sim_pw_surv(n = 300) |> cut_data_by_event(100)
+  expected <- wlr(x, weight = fh(0, 0.5))
+
+  # Rearrange and rename columns to simulate custom user data. Also remove class
+  # "tte_data"
+  y <- x[, rev(colnames(x))]
+  colnames(y) <- toupper(colnames(y))
+  class(y) <- "data.frame"
+
+  observed <- wlr(y, weight = fh(0, 0.5), formula =  Surv(TTE, EVENT) ~ TREATMENT)
+
+  # Sometimes info0 is off by ~2e-5
+  expect_equal(observed, expected, tolerance = 1e-4)
+})
+
+test_that("wlr() accepts formula for unstratified design", {
+  data("ex1_delayed_effect", package = "simtrial", envir = environment())
+  ex1_delayed_effect$trt <- ifelse(
+    ex1_delayed_effect$trt == 1,
+    "experimental",
+    "control"
+  )
+
+  # Use ex1_delayed_effect directly via the argument `formula`
+  observed <- wlr(
+    data = ex1_delayed_effect,
+    formula = Surv(month, evntd) ~ trt,
+    weight = fh(0, 0.5)
+  )
+
+  # Convert ex1_delayed_effect to tte_data class
+  ex1_delayed_effect_tte_data <- as.data.frame(ex1_delayed_effect)
+  colnames(ex1_delayed_effect_tte_data) <- c("id", "tte", "event", "treatment")
+  ex1_delayed_effect_tte_data$stratum <- "All"
+  class(ex1_delayed_effect_tte_data) <- c("tte_data", "data.frame")
+
+  expected <- wlr(ex1_delayed_effect_tte_data, weight = fh(0, 0.5))
+
+  expect_equal(observed, expected)
+})
+
+test_that("wlr() accepts formula for stratified design", {
+  data("ex1_delayed_effect", package = "simtrial", envir = environment())
+  ex1_delayed_effect$trt <- ifelse(
+    ex1_delayed_effect$trt == 1,
+    "experimental",
+    "control"
+  )
+  ex1_delayed_effect$strtm <- sample(
+    x = c("biomarker positive", "biomarker negative"),
+    size = nrow(ex1_delayed_effect),
+    replace = TRUE,
+    prob = c(0.6, 0.4)
+  )
+
+  # Use ex1_delayed_effect directly via the argument `formula`
+  observed <- wlr(
+    data = ex1_delayed_effect,
+    formula = Surv(month, evntd) ~ trt + strata(strtm),
+    weight = fh(0, 0.5)
+  )
+
+  # Convert ex1_delayed_effect to tte_data class
+  ex1_delayed_effect_tte_data <- as.data.frame(ex1_delayed_effect)
+  colnames(ex1_delayed_effect_tte_data) <- c("id", "tte", "event", "treatment", "stratum")
+  class(ex1_delayed_effect_tte_data) <- c("tte_data", "data.frame")
+
+  expected <- wlr(ex1_delayed_effect_tte_data, weight = fh(0, 0.5))
+
+  expect_equal(observed, expected)
+})
+
+test_that("wlr() warns when formula argument is ignored", {
+  x <- sim_pw_surv(n = 300) |> cut_data_by_event(100)
+  expect_warning(
+    wlr(x, weight = fh(0, 0.5), formula = Surv(tte, event) ~ treatment),
+    "The formula argument was ignored"
+  )
+
+  y <- counting_process(x, arm = "experimental")
+  expect_warning(
+    wlr(y, weight = fh(0, 0.5), formula = Surv(tte, event) ~ treatment),
+    "The formula argument was ignored"
+  )
+})
+
+test_that("wlr.default() and wlr.tte_data() require arm='experimental'", {
+  x <- sim_pw_surv(n = 300) |> cut_data_by_event(100)
+  x$treatment <- ifelse(x$treatment == "experimental", "test", x$treatment)
+
+  expect_error(
+    wlr(x, weight = fh(0, 0.5)),
+    "counting_process: arm is not a valid treatment group value."
+  )
+  expect_error(
+    wlr(as.data.frame(x), weight = fh(0, 0.5), formula = Surv(tte, event) ~ treatment),
+    "counting_process: arm is not a valid treatment group value."
+  )
+
+  # To use a different value for arm, have to manually run counting_process()
+  y <- counting_process(x, arm = "test")
+  expect_silent(wlr(y, weight = fh(0, 0.5)))
+})