Skip to content

Commit

Permalink
make xxx_weight only assign weights to each subjects
Browse files Browse the repository at this point in the history
  • Loading branch information
LittleBeannie committed Apr 9, 2024
1 parent 4106ca0 commit 1a82541
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 174 deletions.
22 changes: 8 additions & 14 deletions R/early_zero_weight.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,14 @@
#'
#' @importFrom data.table ":=" as.data.table fifelse merge.data.table setDF
early_zero_weight <- function(x, early_period = 4, fail_rate = NULL) {
ans <- list()
ans$method <- "WLR"
ans$parameter <- paste0("Xu 2017 with first ", early_period, " months of 0 weights")

res <- as.data.table(x)
n_stratum <- length(unique(res$stratum))

ans <- as.data.table(x)
n_stratum <- length(unique(ans$stratum))

# If it is unstratified design
if (n_stratum == 1) {
res[, weight := fifelse(tte < early_period, 0, 1)]
ans[, weight := fifelse(tte < early_period, 0, 1)]
} else {
if (is.null(fail_rate)) {
stop("For stratified design to use `early_zero_weight()`, `fail_rate` can't be `NULL`.")
Expand All @@ -52,16 +50,12 @@ early_zero_weight <- function(x, early_period = 4, fail_rate = NULL) {
late_hr <- fail_rate[hr != 1, .(stratum, hr)]
delay_change_time <- fail_rate[hr == 1, .(stratum, duration)]

res <- merge.data.table(res, late_hr, by = "stratum", all.x = TRUE)
res <- merge.data.table(res, delay_change_time, by = "stratum", all.x = TRUE)
res[, weight := fifelse(tte < duration, 0, hr)]
ans <- merge.data.table(ans, late_hr, by = "stratum", all.x = TRUE)
ans <- merge.data.table(ans, delay_change_time, by = "stratum", all.x = TRUE)
ans[, weight := fifelse(tte < duration, 0, hr)]
}

setDF(res)

ans$estimate <- sum(res$o_minus_e * res$weight)
ans$se <- sqrt(sum(res$var_o_minus_e * res$weight^2))
ans$z <- ans$estimate / ans$se
setDF(ans)

return(ans)
}
127 changes: 10 additions & 117 deletions R/fh_weight.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,17 @@ fh_weight <- function(
x = sim_pw_surv(n = 200) |>
cut_data_by_event(150) |>
counting_process(arm = "experimental"),
rho_gamma = data.frame(
rho = c(0, 0, 1, 1),
gamma = c(0, 1, 0, 1)
),
rho = 0,
gamma = 1,
return_variance = FALSE,
return_corr = FALSE) {
n_weight <- nrow(rho_gamma)

# Check input failure rate assumptions

# Input checking ----
if(length(rho) != 1 || length(gamma) != 1) {
stop("fh_weight: please input single numerical values of rho and gamma.")
}

if (!is.data.frame(x)) {
stop("fh_weight: x in `fh_weight()` must be a data frame.")
}
Expand All @@ -109,116 +111,7 @@ fh_weight <- function(
stop("fh_weight: x column names in `fh_weight()` must contain var_o_minus_e.")
}

if (return_variance && return_corr) {
stop("fh_weight: can't report both covariance and correlation for MaxCombo test.")
}

if (return_corr && n_weight == 1) {
stop("fh_weight: can't report the correlation for a single WLR test.")
}

if (n_weight == 1) {
test_result <- wlr_fh_z_stat(x, rho_gamma = rho_gamma, return_variance = return_variance)

ans <- list()
ans$method <- "WLR"
ans$parameter <- paste0("FH(", rho_gamma$rho, ", ", rho_gamma$gamma, ")")
ans$z <- test_result$z
ans$estimation <- test_result$estimation
ans$se <- test_result$se
if (return_variance) {
ans$var <- test_result$var
}
} else {

ans <- list()
ans$method <- "MaxCombo"
ans$parameter <- paste((rho_gamma %>% mutate(x = paste0("FH(", rho, ", ", gamma, ")")))$x, collapse = " + ")
ans$estimation <- NULL
ans$se <- NULL
# Get average rho and gamma for FH covariance matrix
# We want ave_rho[i,j] = (rho[i] + rho[j])/2
# and ave_gamma[i,j] = (gamma[i] + gamma[j])/2
ave_rho <- (matrix(rho_gamma$rho, nrow = n_weight, ncol = n_weight, byrow = FALSE) +
matrix(rho_gamma$rho, nrow = n_weight, ncol = n_weight, byrow = TRUE)
) / 2
ave_gamma <- (matrix(rho_gamma$gamma, nrow = n_weight, ncol = n_weight) +
matrix(rho_gamma$gamma, nrow = n_weight, ncol = n_weight, byrow = TRUE)
) / 2

# Convert to data.table
rg_new <- data.table(rho = as.numeric(ave_rho), gamma = as.numeric(ave_gamma))
# Get unique values of rho, gamma
rg_unique <- unique(rg_new)

# Compute FH statistic for unique values
# and merge back to full set of pairs
temp <- wlr_fh_z_stat(x, rho_gamma = rg_unique, return_variance = TRUE) |> as.data.frame()
rg_fh <- merge.data.table(
x = rg_new,
y = temp,
by = c("rho", "gamma"),
all.x = TRUE,
sort = FALSE
)

# Get z statistics for input rho, gamma combinations
ans$z <- rg_fh$z[(0:(n_weight - 1)) * n_weight + 1:n_weight]

# Get correlation matrix
cov_mat <- matrix(rg_fh$var, nrow = n_weight, byrow = TRUE)

if (return_corr) {
corr_mat <- stats::cov2cor(cov_mat)
colnames(corr_mat) <- paste("v", seq_len(ncol(corr_mat)), sep = "")
ans$corr <- as.data.frame(corr_mat)
} else if (return_variance) {
corr_mat <- cov_mat
colnames(corr_mat) <- paste("v", seq_len(ncol(corr_mat)), sep = "")
ans$var <- as.data.frame(corr_mat)
}
}

return(ans)
}

# Build an internal function to compute the Z statistics
# under a sequence of rho and gamma of WLR.
wlr_fh_z_stat <- function(x, rho_gamma, return_variance) {

xx <- x[, c("s", "o_minus_e", "var_o_minus_e")]

# Initialization of the output
ans <- list()
ans$z <- rep(0, nrow(rho_gamma))
ans$estimation <- rep(0, nrow(rho_gamma))
ans$se <- rep(0, nrow(rho_gamma))
ans$rho <- rep(0, nrow(rho_gamma))
ans$gamma <- rep(0, nrow(rho_gamma))

if (return_variance) {
ans$var <- rep(0, nrow(rho_gamma))
}

# Loop for each WLR-FH test with 1 rho and 1 gamma
for (i in seq_len(nrow(rho_gamma))) {
weight <- xx$s^rho_gamma$rho[i] * (1 - xx$s)^rho_gamma$gamma[i]
weighted_o_minus_e <- weight * xx$o_minus_e
weighted_var <- weight^2 * xx$var_o_minus_e

weighted_var_total <- sum(weighted_var)
weighted_o_minus_e_total <- sum(weighted_o_minus_e)

ans$rho[i] <- rho_gamma$rho[i]
ans$gamma[i] <- rho_gamma$gamma[i]
ans$estimation[i] <- weighted_o_minus_e_total
ans$se[i] <- sqrt(weighted_var_total)
ans$z[i] <- ans$estimation[i] / ans$se[i]

if (return_variance) {
ans$var[i] <- weighted_var_total
}
}
x$weight <- x$s^rho * (1 - x$s)^gamma

return(ans)
return(x)
}
108 changes: 90 additions & 18 deletions R/maxcombo.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,35 +36,107 @@
#' sim_pw_surv(n = 200) |>
#' cut_data_by_event(150) |>
#' maxcombo(rho = c(0, 0), gamma = c(0, 1))
maxcombo <- function(data, rho, gamma, return_corr = FALSE) {
stopifnot(
is.numeric(rho), is.numeric(gamma),
rho >= 0, gamma >= 0,
length(rho) == length(gamma)
)

res <- data |>
counting_process(arm = "experimental") |>
fh_weight(
rho_gamma = data.frame(rho = rho, gamma = gamma),
return_corr = TRUE
)
maxcombo <- function(data = sim_pw_surv(n = 200) |> cut_data_by_event(150),
rho = c(0, 0, 1),
gamma = c(0, 1, 1),
return_variance = FALSE,
return_corr = FALSE) {
# Input checking ----
n_weight <- length(rho)

if(any(!is.numeric(rho)) || any(!is.numeric(gamma)) || any(rho < 0) || any(gamma < 0)) {
stop("maxcombo: please input positive values of rho and gamma.")
}

if (length(rho) != length(gamma)) {
stop("maxcombo: please input rho and gamma of equal length.")
}

if (length(rho) == 1) {
stop("maxcombo: please input multiple rho and gamma to make it a MaxCombo test.")
}

if (return_variance && return_corr) {
stop("maxcombo: can't report both covariance and correlation for MaxCombo test.")
}

if (return_corr && n_weight == 1) {
stop("maxcombo: can't report the correlation for a single WLR test.")
}

# Get average rho and gamma for FH covariance matrix ----
# We want ave_rho[i,j] = (rho[i] + rho[j])/2
# and ave_gamma[i,j] = (gamma[i] + gamma[j])/2
ave_rho <- (matrix(rho, nrow = n_weight, ncol = n_weight, byrow = FALSE) +
matrix(rho, nrow = n_weight, ncol = n_weight, byrow = TRUE)
) / 2
ave_gamma <- (matrix(gamma, nrow = n_weight, ncol = n_weight) +
matrix(gamma, nrow = n_weight, ncol = n_weight, byrow = TRUE)
) / 2

# Convert to all original and average rho/gamma into a data.table ----
rg_new <- data.table(rho = as.numeric(ave_rho), gamma = as.numeric(ave_gamma))
rg_unique <- unique(rg_new)

# Compute WLR FH Z-score and variance for all original and average rho/gamma ----
# Compute FH statistic for unique values
res <- list()
x <- data |> counting_process(arm = "experimental")
# Loop for each WLR-FH test with 1 rho and 1 gamma
for (i in seq_len(nrow(rg_unique))) {
weight <- x$s^rg_unique$rho[i] * (1 - x$s)^rg_unique$gamma[i]
weighted_o_minus_e <- weight * x$o_minus_e
weighted_var <- weight^2 * x$var_o_minus_e

weighted_var_total <- sum(weighted_var)
weighted_o_minus_e_total <- sum(weighted_o_minus_e)

res$rho[i] <- rg_unique$rho[i]
res$gamma[i] <- rg_unique$gamma[i]
res$estimation[i] <- weighted_o_minus_e_total
res$se[i] <- sqrt(weighted_var_total)
res$var[i] <- weighted_var_total
res$z[i] <- res$estimation[i] / res$se[i]
}

# Merge back to full set of pairs ----
rg_fh <- merge.data.table(
x = rg_new,
y = res |> as.data.frame(),
by = c("rho", "gamma"),
all.x = TRUE,
sort = FALSE)

# Tidy outputs ----
ans <- list()
ans$method <- "Maxcombo"
temp <- data.frame(rho = rho, gamma = gamma) %>% mutate(x= paste0("FH(", rho, ", ", gamma, ")"))
ans$parameter <- paste(temp$x, collapse = " + ")
ans$estimation <- NULL
ans$se <- NULL
ans$z <- res$z

res_df <- as.data.frame(cbind(res$z, res$corr))
colnames(res_df)[1] <- "z"
ans$p_value <- pvalue_maxcombo(res_df)
# Get z statistics for input rho, gamma combinations
ans$z <- rg_fh$z[(0:(n_weight - 1)) * n_weight + 1:n_weight]

# Get correlation matrix
cov_mat <- matrix(rg_fh$var, nrow = n_weight, byrow = TRUE)
corr_mat <- stats::cov2cor(cov_mat)

cov_mat <- as.data.frame(cov_mat)
corr_mat <- as.data.frame(corr_mat)
colnames(cov_mat) <- paste("v", seq_len(ncol(cov_mat)), sep = "")
colnames(corr_mat) <- paste("v", seq_len(ncol(corr_mat)), sep = "")

if (return_corr) {
ans$corr <- res$corr
ans$corr <- corr_mat
} else if (return_variance) {
ans$var <- cov_mat
}

# Get p-values
res_df <- as.data.frame(cbind(ans$z, corr_mat))
colnames(res_df)[1] <- "z"
ans$p_value <- pvalue_maxcombo(res_df)

return(ans)
}
25 changes: 9 additions & 16 deletions R/mb_weight.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,35 +68,28 @@ mb_weight <- function(x, delay = 4, w_max = Inf) {
stop("x column names in `mb_weight()` must contain s")
}

ans <- list()
ans$method <- "WLR"
ans$parameter <- paste0("MB(delay = ", delay, ", max_weight = ", w_max, ")")
# Compute max weight by stratum
x2 <- as.data.table(x)
# Make sure you don't lose any stratum!
tbl_all_stratum <- data.table(stratum = unique(x2$stratum))

# Look only up to delay time
res <- x2[tte <= delay, ]
ans <- x2[tte <= delay, ]
# Weight before delay specified as 1/S
res <- res[, .(max_weight = max(1 / s)), by = "stratum"]
ans <- ans[, .(max_weight = max(1 / s)), by = "stratum"]
# Get back stratum with no records before delay ends
res <- res[tbl_all_stratum, on = "stratum"]
ans <- ans[tbl_all_stratum, on = "stratum"]
# `max_weight` is 1 when there are no records before delay ends
res[, max_weight := fifelse(is.na(max_weight), 1, max_weight)]
ans[, max_weight := fifelse(is.na(max_weight), 1, max_weight)]
# Cut off weights at w_max
res[, max_weight := pmin(w_max, max_weight)]
ans[, max_weight := pmin(w_max, max_weight)]
# Now merge max_weight back to stratified dataset
res <- merge.data.table(res, x2, by = "stratum", all = TRUE)
ans <- merge.data.table(ans, x2, by = "stratum", all = TRUE)
# Weight is min of max_weight and 1/S which will increase up to delay
res[, mb_weight := pmin(max_weight, 1 / s)]
res[, max_weight := NULL]
ans[, mb_weight := pmin(max_weight, 1 / s)]
ans[, max_weight := NULL]

setDF(res)

ans$estimate <- sum(res$o_minus_e * res$mb_weight)
ans$se <- sqrt(sum(res$var_o_minus_e * res$mb_weight^2))
ans$z <- ans$estimate / ans$se
setDF(ans)

return(ans)
}
Loading

0 comments on commit 1a82541

Please sign in to comment.