From 8aa9f79572b1ab88782639b2a29732e6bbc11e4d Mon Sep 17 00:00:00 2001 From: John Blischak Date: Thu, 14 Nov 2024 15:52:16 -0500 Subject: [PATCH] Add argument `formula` to `wlr()` --- R/wlr.R | 52 +++++++++++-- man/wlr.Rd | 31 +++++++- tests/testthat/test-unvalidated-wlr.R | 107 +++++++++++++++++++++++++- 3 files changed, 178 insertions(+), 12 deletions(-) diff --git a/R/wlr.R b/R/wlr.R index 0856abdc..bf510250 100644 --- a/R/wlr.R +++ b/R/wlr.R @@ -36,6 +36,15 @@ #' - If the `data` is a custom dataset (see Example 2) below, #' + Users are suggested to input the planned randomization ratio to `ratio`; #' + If not, simtrial takes the empirical randomization ratio. +#' @param formula A formula to specify the columns that contain the +#' time-to-event, event, treatment, and stratum variables. Only used by the +#' default S3 method because the other classes aleady have the required column +#' names. For stratified designs, the formula should have the form `Surv(tte, +#' event) ~ treatment + strata(stratum)`, where `tte`, `event`, `treatment`, +#' and `stratum` are the column names from `data` with the time-to-event +#' measurement, event status, treatment group, and stratum, respectively. For +#' unstratified designs, the formula can omit the stratum column: `Surv(tte, +#' event) ~ treatment`. #' #' @return A list containing the test method (`method`), #' parameters of this test method (`parameter`), @@ -134,25 +143,52 @@ #' x |> #' counting_process(arm = "experimental") |> #' wlr(weight = fh(rho = 0, gamma = 0.5)) -wlr <- function(data, weight, return_variance = FALSE, ratio = NULL) { +#' +#' # ---------------------- # +#' # Example 3 # +#' # Use formula # +#' # ---------------------- # +#' library("survival") +#' +#' # Unstratified design +#' x <- sim_pw_surv(n = 200) |> cut_data_by_event(100) |> as.data.frame() +#' colnames(x) <- c("tte", "evnt", "strtm", "trtmnt") +#' wlr(x, weight = fh(0, 0.5), formula = Surv(tte, evnt) ~ trtmnt) +#' +#' # Stratified design +#' x$strtm <- sample(c("s1", "s2"), size = nrow(x), replace = TRUE) +#' wlr(x, weight = fh(0, 0.5), formula = Surv(tte, evnt) ~ trtmnt + strata(strtm)) +wlr <- function(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL) { UseMethod("wlr", data) } #' @rdname wlr #' @export -wlr.default <- function(data, weight, return_variance = FALSE, ratio = NULL) { +wlr.default <- function(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL) { + + if (!is.null(formula)) { + stopifnot(inherits(formula, "formula")) + data <- stats::get_all_vars(formula = formula, data = data) + colnames(data)[1:3] <- c("tte", "event", "treatment") + if (ncol(data) == 4) colnames(data)[4] <- "stratum" + } + + if (!all(c("tte", "event", "treatment") %in% colnames(data))) { + stop('Input must have the columns "tte", "event", and "treatment"') + } - if (!all(c("tte", "event", "stratum", "treatment") %in% colnames(data))) { - stop('Input must have the columns "tte", "event", "stratum", and "treatment"') + if (!"stratum" %in% colnames(data)) { + data$stratum <- "All" } wlr.tte_data(data = data, weight = weight, return_variance = return_variance, ratio = ratio) } - #' @rdname wlr #' @export -wlr.tte_data <- function(data, weight, return_variance = FALSE, ratio = NULL) { +wlr.tte_data <- function(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL) { + if (!is.null(formula)) warning("The formula argument was ignored") + # if the `data` is NOT generated by sim_pw_surv # - if user input the randomization ratio, we will directly take its values # - otherwise, we calculate the empirical ratio @@ -171,7 +207,9 @@ wlr.tte_data <- function(data, weight, return_variance = FALSE, ratio = NULL) { #' @rdname wlr #' @export -wlr.counting_process <- function(data, weight, return_variance = FALSE, ratio = NULL) { +wlr.counting_process <- function(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL) { + if (!is.null(formula)) warning("The formula argument was ignored") + x <- data if (is.null(ratio)) { diff --git a/man/wlr.Rd b/man/wlr.Rd index eaac7cd3..20585f03 100644 --- a/man/wlr.Rd +++ b/man/wlr.Rd @@ -7,13 +7,13 @@ \alias{wlr.counting_process} \title{Weighted logrank test} \usage{ -wlr(data, weight, return_variance = FALSE, ratio = NULL) +wlr(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL) -\method{wlr}{default}(data, weight, return_variance = FALSE, ratio = NULL) +\method{wlr}{default}(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL) -\method{wlr}{tte_data}(data, weight, return_variance = FALSE, ratio = NULL) +\method{wlr}{tte_data}(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL) -\method{wlr}{counting_process}(data, weight, return_variance = FALSE, ratio = NULL) +\method{wlr}{counting_process}(data, weight, return_variance = FALSE, ratio = NULL, formula = NULL) } \arguments{ \item{data}{Dataset (generated by \code{\link[=sim_pw_surv]{sim_pw_surv()}}) that has been cut by @@ -43,6 +43,14 @@ there is no need to input the \code{ratio}, as simtrial gets the \code{ratio} vi \item If not, simtrial takes the empirical randomization ratio. } }} + +\item{formula}{A formula to specify the columns that contain the +time-to-event, event, treatment, and stratum variables. Only used by the +default S3 method because the other classes aleady have the required column +names. For stratified designs, the formula should have the form \code{Surv(tte, event) ~ treatment + strata(stratum)}, where \code{tte}, \code{event}, \code{treatment}, +and \code{stratum} are the column names from \code{data} with the time-to-event +measurement, event status, treatment group, and stratum, respectively. For +unstratified designs, the formula can omit the stratum column: \code{Surv(tte, event) ~ treatment}.} } \value{ A list containing the test method (\code{method}), @@ -143,4 +151,19 @@ x |> wlr(weight = fh(rho = 0, gamma = 0.5)) x |> counting_process(arm = "experimental") |> wlr(weight = fh(rho = 0, gamma = 0.5)) + +# ---------------------- # +# Example 3 # +# Use formula # +# ---------------------- # +library("survival") + +# Unstratified design +x <- sim_pw_surv(n = 200) |> cut_data_by_event(100) |> as.data.frame() +colnames(x) <- c("tte", "evnt", "strtm", "trtmnt") +wlr(x, weight = fh(0, 0.5), formula = Surv(tte, evnt) ~ trtmnt) + +# Stratified design +x$strtm <- sample(c("s1", "s2"), size = nrow(x), replace = TRUE) +wlr(x, weight = fh(0, 0.5), formula = Surv(tte, evnt) ~ trtmnt + strata(strtm)) } diff --git a/tests/testthat/test-unvalidated-wlr.R b/tests/testthat/test-unvalidated-wlr.R index dae95060..6f82736f 100644 --- a/tests/testthat/test-unvalidated-wlr.R +++ b/tests/testthat/test-unvalidated-wlr.R @@ -26,7 +26,7 @@ test_that("wlr() rejects input object without proper columns", { x <- mtcars expect_error( wlr(x), - 'Input must have the columns "tte", "event", "stratum", and "treatment"' + 'Input must have the columns "tte", "event", and "treatment"' ) }) @@ -58,3 +58,108 @@ test_that("cut_data_by_date() and cut_data_by_event() return the same classes", expect_identical(class(data_by_event), class(data_by_date)) }) + +test_that("wlr() formula argument can rename columns", { + x <- sim_pw_surv(n = 300) |> cut_data_by_event(100) + expected <- wlr(x, weight = fh(0, 0.5)) + + # Rearrange and rename columns to simulate custom user data. Also remove class + # "tte_data" + y <- x[, rev(colnames(x))] + colnames(y) <- toupper(colnames(y)) + class(y) <- "data.frame" + + observed <- wlr(y, weight = fh(0, 0.5), formula = Surv(TTE, EVENT) ~ TREATMENT) + + # Sometimes info0 is off by ~2e-5 + expect_equal(observed, expected, tolerance = 1e-4) +}) + +test_that("wlr() accepts formula for unstratified design", { + data("ex1_delayed_effect", package = "simtrial", envir = environment()) + ex1_delayed_effect$trt <- ifelse( + ex1_delayed_effect$trt == 1, + "experimental", + "control" + ) + + # Use ex1_delayed_effect directly via the argument `formula` + observed <- wlr( + data = ex1_delayed_effect, + formula = Surv(month, evntd) ~ trt, + weight = fh(0, 0.5) + ) + + # Convert ex1_delayed_effect to tte_data class + ex1_delayed_effect_tte_data <- as.data.frame(ex1_delayed_effect) + colnames(ex1_delayed_effect_tte_data) <- c("id", "tte", "event", "treatment") + ex1_delayed_effect_tte_data$stratum <- "All" + class(ex1_delayed_effect_tte_data) <- c("tte_data", "data.frame") + + expected <- wlr(ex1_delayed_effect_tte_data, weight = fh(0, 0.5)) + + expect_equal(observed, expected) +}) + +test_that("wlr() accepts formula for stratified design", { + data("ex1_delayed_effect", package = "simtrial", envir = environment()) + ex1_delayed_effect$trt <- ifelse( + ex1_delayed_effect$trt == 1, + "experimental", + "control" + ) + ex1_delayed_effect$strtm <- sample( + x = c("biomarker positive", "biomarker negative"), + size = nrow(ex1_delayed_effect), + replace = TRUE, + prob = c(0.6, 0.4) + ) + + # Use ex1_delayed_effect directly via the argument `formula` + observed <- wlr( + data = ex1_delayed_effect, + formula = Surv(month, evntd) ~ trt + strata(strtm), + weight = fh(0, 0.5) + ) + + # Convert ex1_delayed_effect to tte_data class + ex1_delayed_effect_tte_data <- as.data.frame(ex1_delayed_effect) + colnames(ex1_delayed_effect_tte_data) <- c("id", "tte", "event", "treatment", "stratum") + class(ex1_delayed_effect_tte_data) <- c("tte_data", "data.frame") + + expected <- wlr(ex1_delayed_effect_tte_data, weight = fh(0, 0.5)) + + expect_equal(observed, expected) +}) + +test_that("wlr() warns when formula argument is ignored", { + x <- sim_pw_surv(n = 300) |> cut_data_by_event(100) + expect_warning( + wlr(x, weight = fh(0, 0.5), formula = Surv(tte, event) ~ treatment), + "The formula argument was ignored" + ) + + y <- counting_process(x, arm = "experimental") + expect_warning( + wlr(y, weight = fh(0, 0.5), formula = Surv(tte, event) ~ treatment), + "The formula argument was ignored" + ) +}) + +test_that("wlr.default() and wlr.tte_data() require arm='experimental'", { + x <- sim_pw_surv(n = 300) |> cut_data_by_event(100) + x$treatment <- ifelse(x$treatment == "experimental", "test", x$treatment) + + expect_error( + wlr(x, weight = fh(0, 0.5)), + "counting_process: arm is not a valid treatment group value." + ) + expect_error( + wlr(as.data.frame(x), weight = fh(0, 0.5), formula = Surv(tte, event) ~ treatment), + "counting_process: arm is not a valid treatment group value." + ) + + # To use a different value for arm, have to manually run counting_process() + y <- counting_process(x, arm = "test") + expect_silent(wlr(y, weight = fh(0, 0.5))) +})