make xxx_weight only assign weights to each subjects

Merck · Apr 9, 2024 · 1a82541 · 1a82541
1 parent 4106ca0
commit 1a82541
Show file tree

Hide file tree

Showing 5 changed files with 148 additions and 174 deletions.
diff --git a/R/early_zero_weight.R b/R/early_zero_weight.R
@@ -28,16 +28,14 @@
 #'
 #' @importFrom data.table ":=" as.data.table fifelse merge.data.table setDF
 early_zero_weight <- function(x, early_period = 4, fail_rate = NULL) {
-  ans <- list()
-  ans$method <- "WLR"
-  ans$parameter <- paste0("Xu 2017 with first ", early_period, " months of 0 weights")
 
-  res <- as.data.table(x)
-  n_stratum <- length(unique(res$stratum))
+
+  ans <- as.data.table(x)
+  n_stratum <- length(unique(ans$stratum))
 
   # If it is unstratified design
   if (n_stratum == 1) {
-    res[, weight := fifelse(tte < early_period, 0, 1)]
+    ans[, weight := fifelse(tte < early_period, 0, 1)]
   } else {
     if (is.null(fail_rate)) {
       stop("For stratified design to use `early_zero_weight()`, `fail_rate` can't be `NULL`.")
@@ -52,16 +50,12 @@ early_zero_weight <- function(x, early_period = 4, fail_rate = NULL) {
     late_hr <- fail_rate[hr != 1, .(stratum, hr)]
     delay_change_time <- fail_rate[hr == 1, .(stratum, duration)]
 
-    res <- merge.data.table(res, late_hr, by = "stratum", all.x = TRUE)
-    res <- merge.data.table(res, delay_change_time, by = "stratum", all.x = TRUE)
-    res[, weight := fifelse(tte < duration, 0, hr)]
+    ans <- merge.data.table(ans, late_hr, by = "stratum", all.x = TRUE)
+    ans <- merge.data.table(ans, delay_change_time, by = "stratum", all.x = TRUE)
+    ans[, weight := fifelse(tte < duration, 0, hr)]
   }
 
-  setDF(res)
-
-  ans$estimate <- sum(res$o_minus_e * res$weight)
-  ans$se <- sqrt(sum(res$var_o_minus_e * res$weight^2))
-  ans$z <- ans$estimate / ans$se
+  setDF(ans)
 
   return(ans)
 }
diff --git a/R/fh_weight.R b/R/fh_weight.R
@@ -84,15 +84,17 @@ fh_weight <- function(
     x = sim_pw_surv(n = 200) |>
       cut_data_by_event(150) |>
       counting_process(arm = "experimental"),
-    rho_gamma = data.frame(
-      rho = c(0, 0, 1, 1),
-      gamma = c(0, 1, 0, 1)
-    ),
+    rho = 0,
+    gamma = 1,
     return_variance = FALSE,
     return_corr = FALSE) {
-  n_weight <- nrow(rho_gamma)
 
-  # Check input failure rate assumptions
+
+  # Input checking ----
+  if(length(rho) != 1 || length(gamma) != 1) {
+    stop("fh_weight: please input single numerical values of rho and gamma.")
+  }
+
   if (!is.data.frame(x)) {
     stop("fh_weight: x in `fh_weight()` must be a data frame.")
   }
@@ -109,116 +111,7 @@ fh_weight <- function(
     stop("fh_weight: x column names in `fh_weight()` must contain var_o_minus_e.")
   }
 
-  if (return_variance && return_corr) {
-    stop("fh_weight: can't report both covariance and correlation for MaxCombo test.")
-  }
-
-  if (return_corr && n_weight == 1) {
-    stop("fh_weight: can't report the correlation for a single WLR test.")
-  }
-
-  if (n_weight == 1) {
-    test_result <- wlr_fh_z_stat(x, rho_gamma = rho_gamma, return_variance = return_variance)
-
-    ans <- list()
-    ans$method <- "WLR"
-    ans$parameter <- paste0("FH(", rho_gamma$rho, ", ", rho_gamma$gamma, ")")
-    ans$z <- test_result$z
-    ans$estimation <- test_result$estimation
-    ans$se <- test_result$se
-    if (return_variance) {
-      ans$var <- test_result$var
-    }
-  } else {
-
-    ans <- list()
-    ans$method <- "MaxCombo"
-    ans$parameter <- paste((rho_gamma  %>% mutate(x = paste0("FH(", rho, ", ", gamma, ")")))$x, collapse = " + ")
-    ans$estimation <- NULL
-    ans$se <- NULL
-    # Get average rho and gamma for FH covariance matrix
-    # We want ave_rho[i,j]   = (rho[i] + rho[j])/2
-    # and     ave_gamma[i,j] = (gamma[i] + gamma[j])/2
-    ave_rho <- (matrix(rho_gamma$rho, nrow = n_weight, ncol = n_weight, byrow = FALSE) +
-      matrix(rho_gamma$rho, nrow = n_weight, ncol = n_weight, byrow = TRUE)
-    ) / 2
-    ave_gamma <- (matrix(rho_gamma$gamma, nrow = n_weight, ncol = n_weight) +
-      matrix(rho_gamma$gamma, nrow = n_weight, ncol = n_weight, byrow = TRUE)
-    ) / 2
-
-    # Convert to data.table
-    rg_new <- data.table(rho = as.numeric(ave_rho), gamma = as.numeric(ave_gamma))
-    # Get unique values of rho, gamma
-    rg_unique <- unique(rg_new)
-
-    # Compute FH statistic for unique values
-    # and merge back to full set of pairs
-    temp <- wlr_fh_z_stat(x, rho_gamma = rg_unique, return_variance = TRUE) |> as.data.frame()
-    rg_fh <- merge.data.table(
-      x = rg_new,
-      y = temp,
-      by = c("rho", "gamma"),
-      all.x = TRUE,
-      sort = FALSE
-    )
-
-    # Get z statistics for input rho, gamma combinations
-    ans$z <- rg_fh$z[(0:(n_weight - 1)) * n_weight + 1:n_weight]
-
-    # Get correlation matrix
-    cov_mat <- matrix(rg_fh$var, nrow = n_weight, byrow = TRUE)
-
-    if (return_corr) {
-      corr_mat <- stats::cov2cor(cov_mat)
-      colnames(corr_mat) <- paste("v", seq_len(ncol(corr_mat)), sep = "")
-      ans$corr <- as.data.frame(corr_mat)
-    } else if (return_variance) {
-      corr_mat <- cov_mat
-      colnames(corr_mat) <- paste("v", seq_len(ncol(corr_mat)), sep = "")
-      ans$var <- as.data.frame(corr_mat)
-    }
-  }
-
-  return(ans)
-}
-
-# Build an internal function to compute the Z statistics
-# under a sequence of rho and gamma of WLR.
-wlr_fh_z_stat <- function(x, rho_gamma, return_variance) {
-
-  xx <- x[, c("s", "o_minus_e", "var_o_minus_e")]
-
-  # Initialization of the output
-  ans <- list()
-  ans$z <- rep(0, nrow(rho_gamma))
-  ans$estimation <- rep(0, nrow(rho_gamma))
-  ans$se <- rep(0, nrow(rho_gamma))
-  ans$rho <- rep(0, nrow(rho_gamma))
-  ans$gamma <- rep(0, nrow(rho_gamma))
-
-  if (return_variance) {
-    ans$var <- rep(0, nrow(rho_gamma))
-  }
-
-  # Loop for each WLR-FH test with 1 rho and 1 gamma
-  for (i in seq_len(nrow(rho_gamma))) {
-    weight <- xx$s^rho_gamma$rho[i] * (1 - xx$s)^rho_gamma$gamma[i]
-    weighted_o_minus_e <- weight * xx$o_minus_e
-    weighted_var <- weight^2 * xx$var_o_minus_e
-
-    weighted_var_total <- sum(weighted_var)
-    weighted_o_minus_e_total <- sum(weighted_o_minus_e)
-
-    ans$rho[i] <- rho_gamma$rho[i]
-    ans$gamma[i] <- rho_gamma$gamma[i]
-    ans$estimation[i] <- weighted_o_minus_e_total
-    ans$se[i] <- sqrt(weighted_var_total)
-    ans$z[i] <- ans$estimation[i] / ans$se[i]
-
-    if (return_variance) {
-      ans$var[i] <- weighted_var_total
-    }
-  }
+  x$weight <- x$s^rho * (1 - x$s)^gamma
 
-  return(ans)
+  return(x)
 }
diff --git a/R/maxcombo.R b/R/maxcombo.R
@@ -36,35 +36,107 @@
 #' sim_pw_surv(n = 200) |>
 #'   cut_data_by_event(150) |>
 #'   maxcombo(rho = c(0, 0), gamma = c(0, 1))
-maxcombo <- function(data, rho, gamma, return_corr = FALSE) {
-  stopifnot(
-    is.numeric(rho), is.numeric(gamma),
-    rho >= 0, gamma >= 0,
-    length(rho) == length(gamma)
-  )
-
-  res <- data |>
-    counting_process(arm = "experimental") |>
-    fh_weight(
-      rho_gamma = data.frame(rho = rho, gamma = gamma),
-      return_corr = TRUE
-    )
+maxcombo <- function(data = sim_pw_surv(n = 200) |> cut_data_by_event(150),
+                     rho = c(0, 0, 1),
+                     gamma = c(0, 1, 1),
+                     return_variance = FALSE,
+                     return_corr = FALSE) {
+  # Input checking ----
+  n_weight <- length(rho)
 
+  if(any(!is.numeric(rho)) || any(!is.numeric(gamma)) || any(rho < 0) || any(gamma < 0)) {
+    stop("maxcombo: please input positive values of rho and gamma.")
+  }
+
+  if (length(rho) != length(gamma)) {
+    stop("maxcombo: please input rho and gamma of equal length.")
+  }
+
+  if (length(rho) == 1) {
+    stop("maxcombo: please input multiple rho and gamma to make it a MaxCombo test.")
+  }
+
+  if (return_variance && return_corr) {
+    stop("maxcombo: can't report both covariance and correlation for MaxCombo test.")
+  }
+
+  if (return_corr && n_weight == 1) {
+    stop("maxcombo: can't report the correlation for a single WLR test.")
+  }
+
+  # Get average rho and gamma for FH covariance matrix ----
+  # We want ave_rho[i,j]   = (rho[i] + rho[j])/2
+  # and     ave_gamma[i,j] = (gamma[i] + gamma[j])/2
+  ave_rho <- (matrix(rho, nrow = n_weight, ncol = n_weight, byrow = FALSE) +
+                matrix(rho, nrow = n_weight, ncol = n_weight, byrow = TRUE)
+  ) / 2
+  ave_gamma <- (matrix(gamma, nrow = n_weight, ncol = n_weight) +
+                  matrix(gamma, nrow = n_weight, ncol = n_weight, byrow = TRUE)
+  ) / 2
+
+  # Convert to all original and average rho/gamma into a data.table ----
+  rg_new <- data.table(rho = as.numeric(ave_rho), gamma = as.numeric(ave_gamma))
+  rg_unique <- unique(rg_new)
+
+  # Compute WLR FH Z-score and variance for all original and average rho/gamma ----
+  # Compute FH statistic for unique values
+  res <- list()
+  x <- data |> counting_process(arm = "experimental")
+  # Loop for each WLR-FH test with 1 rho and 1 gamma
+  for (i in seq_len(nrow(rg_unique))) {
+    weight <- x$s^rg_unique$rho[i] * (1 - x$s)^rg_unique$gamma[i]
+    weighted_o_minus_e <- weight * x$o_minus_e
+    weighted_var <- weight^2 * x$var_o_minus_e
+
+    weighted_var_total <- sum(weighted_var)
+    weighted_o_minus_e_total <- sum(weighted_o_minus_e)
+
+    res$rho[i] <- rg_unique$rho[i]
+    res$gamma[i] <- rg_unique$gamma[i]
+    res$estimation[i] <- weighted_o_minus_e_total
+    res$se[i] <- sqrt(weighted_var_total)
+    res$var[i] <- weighted_var_total
+    res$z[i] <- res$estimation[i] / res$se[i]
+  }
+
+  # Merge back to full set of pairs ----
+  rg_fh <- merge.data.table(
+    x = rg_new,
+    y = res |> as.data.frame(),
+    by = c("rho", "gamma"),
+    all.x = TRUE,
+    sort = FALSE)
+
+  # Tidy outputs ----
   ans <- list()
   ans$method <- "Maxcombo"
   temp <- data.frame(rho = rho, gamma = gamma) %>% mutate(x= paste0("FH(", rho, ", ", gamma, ")"))
   ans$parameter <- paste(temp$x, collapse = " + ")
   ans$estimation <- NULL
   ans$se <- NULL
-  ans$z <- res$z
 
-  res_df <- as.data.frame(cbind(res$z, res$corr))
-  colnames(res_df)[1] <- "z"
-  ans$p_value <- pvalue_maxcombo(res_df)
+  # Get z statistics for input rho, gamma combinations
+  ans$z <- rg_fh$z[(0:(n_weight - 1)) * n_weight + 1:n_weight]
+
+  # Get correlation matrix
+  cov_mat <- matrix(rg_fh$var, nrow = n_weight, byrow = TRUE)
+  corr_mat <- stats::cov2cor(cov_mat)
+
+  cov_mat <- as.data.frame(cov_mat)
+  corr_mat <- as.data.frame(corr_mat)
+  colnames(cov_mat) <- paste("v", seq_len(ncol(cov_mat)), sep = "")
+  colnames(corr_mat) <- paste("v", seq_len(ncol(corr_mat)), sep = "")
 
   if (return_corr) {
-    ans$corr <- res$corr
+    ans$corr <- corr_mat
+  } else if (return_variance) {
+    ans$var <- cov_mat
   }
 
+  # Get p-values
+  res_df <- as.data.frame(cbind(ans$z, corr_mat))
+  colnames(res_df)[1] <- "z"
+  ans$p_value <- pvalue_maxcombo(res_df)
+
   return(ans)
 }
diff --git a/R/mb_weight.R b/R/mb_weight.R
@@ -68,35 +68,28 @@ mb_weight <- function(x, delay = 4, w_max = Inf) {
     stop("x column names in `mb_weight()` must contain s")
   }
 
-  ans <- list()
-  ans$method <- "WLR"
-  ans$parameter <- paste0("MB(delay = ", delay, ", max_weight = ", w_max, ")")
   # Compute max weight by stratum
   x2 <- as.data.table(x)
   # Make sure you don't lose any stratum!
   tbl_all_stratum <- data.table(stratum = unique(x2$stratum))
 
   # Look only up to delay time
-  res <- x2[tte <= delay, ]
+  ans <- x2[tte <= delay, ]
   # Weight before delay specified as 1/S
-  res <- res[, .(max_weight = max(1 / s)), by = "stratum"]
+  ans <- ans[, .(max_weight = max(1 / s)), by = "stratum"]
   # Get back stratum with no records before delay ends
-  res <- res[tbl_all_stratum, on = "stratum"]
+  ans <- ans[tbl_all_stratum, on = "stratum"]
   # `max_weight` is 1 when there are no records before delay ends
-  res[, max_weight := fifelse(is.na(max_weight), 1, max_weight)]
+  ans[, max_weight := fifelse(is.na(max_weight), 1, max_weight)]
   # Cut off weights at w_max
-  res[, max_weight := pmin(w_max, max_weight)]
+  ans[, max_weight := pmin(w_max, max_weight)]
   # Now merge max_weight back to stratified dataset
-  res <- merge.data.table(res, x2, by = "stratum", all = TRUE)
+  ans <- merge.data.table(ans, x2, by = "stratum", all = TRUE)
   # Weight is min of max_weight and 1/S which will increase up to delay
-  res[, mb_weight := pmin(max_weight, 1 / s)]
-  res[, max_weight := NULL]
+  ans[, mb_weight := pmin(max_weight, 1 / s)]
+  ans[, max_weight := NULL]
 
-  setDF(res)
-
-  ans$estimate <- sum(res$o_minus_e * res$mb_weight)
-  ans$se <- sqrt(sum(res$var_o_minus_e * res$mb_weight^2))
-  ans$z <- ans$estimate / ans$se
+  setDF(ans)
 
   return(ans)
 }