-
Notifications
You must be signed in to change notification settings - Fork 8
/
counting_process.R
146 lines (138 loc) · 5.21 KB
/
counting_process.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Copyright (c) 2024 Merck & Co., Inc., Rahway, NJ, USA and its affiliates.
# All rights reserved.
#
# This file is part of the simtrial program.
#
# simtrial is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#' Process survival data into counting process format
#'
#' Produces a data frame that is sorted by stratum and time.
#' Included in this is only the times at which one or more event occurs.
#' The output dataset contains stratum, TTE (time-to-event),
#' at risk count, and count of events at the specified TTE
#' sorted by stratum and TTE.
#'
#' @details
#' The function only considered two group situation.
#'
#' The tie is handled by the Breslow's Method.
#'
#' @param x A data frame with no missing values and contain variables:
#' - `stratum`: Stratum.
#' - `treatment`: Treatment group.
#' - `tte`: Observed time.
#' - `event`: Binary event indicator, `1` represents event,
#' `0` represents censoring.
#' @param arm Value in the input `treatment` column that indicates
#' treatment group value.
#'
#' @return
#' A data frame grouped by `stratum` and sorted within stratum by `tte`.
#' Remain rows with at least one event in the population, at least one subject
#' is at risk in both treatment group and control group.
#' Other variables in this represent the following within each stratum at
#' each time at which one or more events are observed:
#' - `events`: Total number of events
#' - `n_event_tol`: Total number of events at treatment group
#' - `n_risk_tol`: Number of subjects at risk
#' - `n_risk_trt`: Number of subjects at risk in treatment group
#' - `S`: Left-continuous Kaplan-Meier survival estimate
#' - `o_minus_e`: In treatment group, observed number of events minus expected
#' number of events. The expected number of events is estimated by assuming
#' no treatment effect with hypergeometric distribution with parameters total
#' number of events, total number of events at treatment group and number of
#' events at a time. (Same assumption of log-rank test under the null
#' hypothesis)
#' - `var_o_minus_e`: Variance of `o_minus_e` under the same assumption.
#'
#' @importFrom data.table ":=" as.data.table setDF uniqueN
#'
#' @export
#'
#' @details
#' The output produced by [counting_process()] produces a
#' counting process dataset grouped by stratum and sorted within stratum
#' by increasing times where events occur.
#'
#' @examples
#' # Example 1
#' x <- data.frame(
#' stratum = c(rep(1, 10), rep(2, 6)),
#' treatment = rep(c(1, 1, 0, 0), 4),
#' tte = 1:16,
#' event = rep(c(0, 1), 8)
#' )
#' counting_process(x, arm = 1)
#'
#' # Example 2
#' x <- sim_pw_surv(n = 400)
#' y <- cut_data_by_event(x, 150) |> counting_process(arm = "experimental")
#' # Weighted logrank test (Z-value and 1-sided p-value)
#' z <- sum(y$o_minus_e) / sqrt(sum(y$var_o_minus_e))
#' c(z, pnorm(z))
counting_process <- function(x, arm) {
unique_treatment <- unique(x$treatment)
if (length(unique_treatment) > 2) {
stop("counting_process: expected two groups.")
}
if (!arm %in% unique_treatment) {
stop("counting_process: arm is not a valid treatment group value.")
}
if (!all(unique(x$event) %in% c(0, 1))) {
stop("counting_process: event indicator must be 0 (censoring) or 1 (event).")
}
ans <- as.data.table(x)
ans <- ans[order(tte, decreasing = TRUE), ]
ans[, one := 1]
ans[, `:=`(
n_risk_tol = cumsum(one),
n_risk_trt = cumsum(treatment == arm)
), by = "stratum"]
# Handling ties using Breslow's method
if (uniqueN(ans[, .(stratum, tte)]) < nrow(ans)) { # ties
ans[, mtte := -tte]
ans <- ans[, .(
events = sum(event),
n_event_tol = sum((treatment == arm) * event),
tte = tte[1],
n_risk_tol = max(n_risk_tol),
n_risk_trt = max(n_risk_trt)
), by = c("stratum", "mtte")]
ans[, mtte := NULL]
} else { # no ties
ans <- ans[, .(
stratum,
events = event,
n_event_tol = (treatment == arm) * event,
tte,
n_risk_tol,
n_risk_trt
)]
}
# Keep calculation for observed time with at least one event,
# at least one subject is at risk in both treatment group and control group.
ans <- ans[events > 0 & n_risk_tol - n_risk_trt > 0 & n_risk_trt > 0, ]
ans[, s := 1 - events / n_risk_tol]
ans <- ans[order(stratum, tte), ]
# Left continuous Kaplan-Meier Estimator
ans[, s := c(1, cumprod(s)[-length(s)]), by = "stratum"]
# Observed events minus Expected events in treatment group
ans[, o_minus_e := n_event_tol - n_risk_trt / n_risk_tol * events]
# Variance of o_minus_e
ans[, var_o_minus_e := (n_risk_tol - n_risk_trt) *
n_risk_trt * events * (n_risk_tol - events) /
n_risk_tol^2 / (n_risk_tol - 1)]
setDF(ans)
return(ans)
}