This repository has been archived by the owner on Jun 14, 2023. It is now read-only.
/
DesignWithAverageHazardRatio.Rmd
449 lines (393 loc) · 18.7 KB
/
DesignWithAverageHazardRatio.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
---
title: "Design Using Average Hazard Ratio"
author: "Keaven M. Anderson"
date: "12/21/2020"
output:
html_document:
code_folding: hide
toc: true
toc_depth: 2
bibliography: gsDesign.bib
vignette: >
%\VignetteIndexEntry{Design Using Average Hazard Ratio}
%\VignetteEncoding{UTF-8}
%\VignetteEngine{knitr::rmarkdown}
---
```{r setup, include=FALSE, message=FALSE, warning=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r packages, message=FALSE, warning=FALSE}
# packages used
library(gsDesign)
library(gsDesign2)
library(gsdmvn)
library(ggplot2)
library(dplyr)
library(knitr)
library(kableExtra)
library(gt)
library(tidyr)
```
## Introduction
@JTBook
## Scenarios
Expected enrollment duration is 24 months with piecewise constant enrollment rates escalating every 2 months until month 6 where enrollment is assumed to have reached steady state.
For alternate scenarios, enrollment will occur at a faster than planned rate.
For both scenarios, we assume for initial illustrations that the total enrollment is 100 subjects.
```{r}
# 6 month ramp-up of enrollment, 24 months enrollment time target
enroll24 <- tibble::tibble(Stratum = rep("All",4),
duration = c(rep(2,3), 18),
rate = 1:4)
# Set rates to enroll 100 subjects
N <- sum(enroll24$duration * enroll24$rate)
enroll24$rate <- enroll24$rate * 100 / N
# Enroll in 16 months, same ramp-up
enroll16 <- tibble::tibble(Stratum = rep("All",4),
duration = c(rep(2,3), 12),
rate = 1:4)
# Set rates to enroll 100 subjects
N <- sum(enroll16$duration * enroll16$rate)
enroll16$rate <- enroll16$rate * 100 / N
# Put these in a single tibble by scenario
# We will use 16 month enrollment for delayed effect and crossing hazards
# scenarios
enrollRates <- rbind(enroll24 %>% mutate(Scenario = "PH"),
enroll24 %>% mutate(Scenario = "Delayed effect 1"),
enroll16 %>% mutate(Scenario = "Delayed effect 2"),
enroll16 %>% mutate(Scenario = "Crossing")
)
```
We will consider the following failure rate assumptions:
- PH: Proportional hazards is assumed.
- Control group has exponential failure rate with a median of 12 months.
- Constant hazard ratio of 0.7 (experimental/control).
- Delayed effect
- Control group has exponential failure rate with a median of 10 months.
- Hazard ratio of 1 for 6 months followed by a hazard ratio of 0.575.
- Crossing hazards
- Control group has exponential failure rate with a median of 10 months.
- Hazard ratio of 1.5 for 4 months followed by a hazard ratio of 0.5.
Survival curves for these 3 scenarios are shown below:
```{r}
Month <- c(0,4,6,44)
duration <- Month - c(0,Month[1:3])
control_rate <- log(2) / c(rep(16,4), rep(14, 4), rep(14, 4))
s <- tibble::tibble(Scenario = c(rep("PH",4), rep("Delayed effect", 4), rep("Crossing", 4)),
Treatment = rep("Control", 12),
Month = rep(Month, 3),
duration = rep(duration, 3),
rate = control_rate,
hr = c(rep(.7, 4), c(1, 1, 1, .575), c(1.5,1.5, .5, .5)),
)
s <- rbind(s,
s %>% mutate(Treatment = "Experimental",
rate = rate * hr)
) %>%
group_by(Scenario, Treatment) %>%
mutate(Survival = exp(-cumsum(duration * rate)))
ggplot(s, aes(x = Month, y = Survival, col = Scenario, lty = Treatment)) +
geom_line() +
scale_y_log10(breaks = (1:10) /10, lim=c(.1,1))+
scale_x_continuous(breaks = seq(0,42, 6))
```
## Average Hazard Ratio
```{r}
# Durations to be used in common for all failure rate scenarios
dur <- c(4,2,100)
# Exponential failure, proportional hazards
failRates <- rbind(tibble(Scenario = "PH", Stratum = "All",
duration = dur, failRate = log(2) / 14,
hr = 0.7, dropoutRate = .001),
tibble(Scenario = "Delayed effect 1", Stratum = "All",
duration = dur, failRate = log(2) / 11,
hr = c(1, .6, .6), dropoutRate = .001),
tibble(Scenario = "Delayed effect 2", Stratum = "All",
duration = dur, failRate = log(2) / 11,
hr = c(1, 1, .7), dropoutRate = .001),
tibble(Scenario = "Crossing", Stratum = "All",
duration = dur, failRate = log(2) / 11,
hr = c(1.5, .6,.6), dropoutRate = .001)
)
hr <- NULL
for(g in c("PH", "Delayed effect 1", "Delayed effect 2", "Crossing")){
hr <-
rbind(hr,
AHR(enrollRates = enrollRates %>% filter(Scenario == g),
failRates = failRates %>% filter(Scenario == g),
totalDuration = c(.001, seq(4, 44, 4))
) %>%
mutate(Scenario = g)
)
}
```
```{r}
ggplot(hr, aes(x=Time, y=AHR, col = Scenario)) + geom_line() + scale_x_continuous(breaks = seq(0, 42, 6))
```
```{r}
ggplot(hr, aes(x=Time, y=`Events`, col = Scenario)) + geom_line() + scale_x_continuous(breaks = seq(0, 42, 6))
```
## Sample Size and Events by Scenario
### Fixed design using AHR and logrank
We power a fixed design at 90% with 2.5% one-sided Type I error under the different scenarios under consideration.
```{r}
ss_ahr_fixed <- NULL
for(g in c("PH", "Delayed effect 1","Delayed effect 2", "Crossing")){
ss_ahr_fixed <-
rbind(ss_ahr_fixed,
gs_design_ahr(enrollRates = enrollRates %>% filter(Scenario == g),
failRates = failRates %>% filter(Scenario == g),
analysisTimes = 36,
upper = gs_b, upar = qnorm(.975),
lower = gs_b, lpar = -Inf,
alpha = .025,
beta = .1
)$bounds %>% mutate(Scenario = g)
)
}
ss_ahr_fixed %>% select(Time, N, Events, AHR, Scenario) %>%
gt() %>% fmt_number(columns=1:3,decimals = 0) %>% fmt_number(columns = 4, decimals = 3) %>%
tab_header(title = "Sample Size and Events Required by Scenario",
subtitle = "36 Month Trial duration, 2.5% One-sided Type 1 Error, 90% Power")
```
Assuming delayed effect 1 is the primary scenario for which we wish to protect power, how long should the trial be to optimize the tradeoffs between sample size, AHR and events required?
We will inform this tradeoff by looking sizing the trial for different assumed trial durations with the same failure rates and assumed relative enrollment rates.
The counts of events required is perhaps the most interesting here in that a 24 month trial requires almost twice the events to be powered at 90% compared to a trial of 42 months duration.
For further study, we will consider the 36 month trial duration as a reasonable tradeoff between time, sample size and power under a presumed delayed effect of 4 months followed by a hazard ratio of 0.6 thereafter.
```{r}
ss_ahr_fixed <- NULL
g <- "Delayed effect 1"
for(trialEnd in c(24,30,36,42)){
ss_ahr_fixed <-
rbind(ss_ahr_fixed,
gs_design_ahr(enrollRates = enrollRates %>% filter(Scenario == g),
failRates = failRates %>% filter(Scenario == g),
analysisTimes = trialEnd,
upper = gs_b, upar = qnorm(.975),
lower = gs_b, lpar = -Inf,
alpha = .025,
beta = .1
)$bounds %>% mutate(Scenario = g)
)
}
ss_ahr_fixed %>% select(Time, N, Events, AHR, Scenario) %>%
gt() %>% fmt_number(columns=1:3,decimals = 0) %>% fmt_number(columns = 4, decimals = 3) %>%
tab_header(title = "Sample Size and Events Required by Trial Duration",
subtitle = "Delayed Effect of 4 Months, HR = 0.6 Thereafter; 90% Power")
```
### Alternate hypothesis mapping
Experimental version of `AHR()`.
```{r}
AHRx <- function(enrollRates=tibble::tibble(Stratum="All",
duration=c(2,2,10),
rate=c(3,6,9)),
failRates=tibble::tibble(Stratum="All",
duration=c(3,100),
failRate=log(2)/c(9,18),
hr=c(.9,.6),
dropoutRate=rep(.001,2)),
totalDuration=30,
ratio=1,
simple=TRUE
){
# check input values
# check input enrollment rate assumptions
if(max(names(enrollRates)=="Stratum") != 1){stop("gsDesign2: enrollRates column names in `AHR()` must contain stratum")}
if(max(names(enrollRates)=="duration") != 1){stop("gsDesign2: enrollRates column names in `AHR()` must contain duration")}
if(max(names(enrollRates)=="rate") != 1){stop("gsDesign2: enrollRates column names in `AHR()' must contain rate")}
# check input failure rate assumptions
if(max(names(failRates)=="Stratum") != 1){stop("gsDesign2: failRates column names in `AHR()` must contain stratum")}
if(max(names(failRates)=="duration") != 1){stop("gsDesign2: failRates column names in `AHR()` must contain duration")}
if(max(names(failRates)=="failRate") != 1){stop("gsDesign2: failRates column names in `AHR()` must contain failRate")}
if(max(names(failRates)=="hr") != 1){stop("gsDesign2: failRates column names in `AHR()` must contain hr")}
if(max(names(failRates)=="dropoutRate") != 1){stop("gsDesign2: failRates column names in `AHR()` must contain dropoutRate")}
# check input trial durations
if(!is.numeric(totalDuration)){stop("gsDesign2: totalDuration in `AHR()` must be a non-empty vector of positive numbers")}
if(!is.vector(totalDuration) > 0){stop("gsDesign2: totalDuration in `AHR()` must be a non-empty vector of positive numbers")}
if(!min(totalDuration) > 0){stop("gsDesign2: totalDuration in `AHR()` must be greater than zero")}
strata <- names(table(enrollRates$Stratum))
strata2 <- names(table(failRates$Stratum))
length(strata) == length(strata2)
for(s in strata){
if(max(strata2==s) != 1){stop("gsDesign2: Strata in `AHR()` must be the same in enrollRates and failRates")}
}
# check input simple is logical
if(!is.logical(simple)){stop("gsDesign2: simple in `AHR()` must be logical")}
# compute proportion in each group
Qe <- ratio / (1 + ratio)
Qc <- 1 - Qe
# compute expected events by treatment group, stratum and time period
rval <- NULL
for(td in totalDuration){
events <- NULL
for(s in strata){
# subset to stratum
enroll <- enrollRates %>% filter(Stratum==s)
fail <- failRates %>% filter(Stratum==s)
# Control events
enrollc <- enroll %>% mutate(rate=rate*Qc)
control <- eEvents_df(enrollRates=enrollc,failRates=fail,totalDuration=td,simple=FALSE)
# Experimental events
enrolle <- enroll %>% mutate(rate=rate*Qe)
fre <- fail %>% mutate(failRate=failRate*hr)
experimental <- eEvents_df(enrollRates=enrolle,failRates=fre,totalDuration=td,simple=FALSE)
# Combine control and experimental; by period recompute HR, events, information
events <-
rbind(control %>% mutate(Treatment="Control"),
experimental %>% mutate(Treatment="Experimental")) %>%
arrange(t, Treatment) %>% ungroup() %>% group_by(t) %>%
summarize(Stratum = s, info = (sum(1 / Events))^(-1),
Events = sum(Events), HR = last(failRate) / first(failRate)
) %>%
rbind(events)
}
rval <- rbind(rval,
events %>%
mutate(Time=td, lnhr = log(HR), info0 = Events * Qc * Qe) %>%
# NEXT 2 lines are the only changes from AHR()
ungroup() %>% group_by(Stratum, t) %>%
summarize(Time = td, Events = sum(Events), HR=first(HR), lnhr=first(lnhr), info0 = sum(info0), info = sum(info)) %>% ungroup()
)
}
if(!simple) return(rval %>% select(c("Time", "Stratum", "t", "HR", "Events", "info", "info0")) %>%
group_by(Time, Stratum) %>% arrange(t, .by_group = TRUE))
return(rval %>%
group_by(Time) %>%
summarize(AHR = exp(sum(log(HR)*Events)/sum(Events)),
Events = sum(Events),
info = sum(info),
info0 = sum(info0))
)
}
```
Under the different scenarios of interest, we can examine the expected number of events in time periods of interest.
```{r}
events_by_time_period <- NULL
for(g in c("PH", "Delayed effect 1","Delayed effect 2", "Crossing")){
events_by_time_period <-
rbind(events_by_time_period,
AHRx(enrollRates = enrollRates %>% filter(Scenario == g),
failRates = failRates %>% filter(Scenario == g),
totalDuration = c(12, 20, 28, 36), simple = FALSE) %>% mutate(Scenario = g)
)
}
events_by_time_period %>% gt()
```
Recall that our alternate hypothesis assumes no treatment effect (HR=1) for 4 months and then HR = 0.6 thereafter.
For any of the above scenarios, if we wish to base a futility bound on this assumption plus the above number of events in the first 4 months and after 4 months, then we can compute the average hazard ratio under the alternate hazard ratio for each scenario at 20 months as follows.
You can see that an interim futility spending bound based on the alternate hypothesis can depend fairly heavily on enrollment and the control failure rate.
Note also that at the time of interim analysis, the alternate hypothesis AHR can be computed in this same fashion based on observed events by time period.
Note that this can be quite different than the scenario HR; e.g., for PH, we assume HR=0.7 throughout, but for the futility bound comparison, we compute blinded AHR that decreases with each analysis under the alternate hypothesis.
```{r}
# Time periods for each scenario were 0-4, 4-6, and 6+
# Thus H1 has HR as follows
hr1 <- tibble(t = c(0, 4, 6), hr1 = c(1, .6, .6))
ahr_by_analysis <-
events_by_time_period %>%
full_join(hr1) %>%
group_by(Scenario, Time) %>%
summarize(AHR1 = exp(sum(Events * log(hr1))/ sum(Events)))
ahr_by_analysis %>%
pivot_wider(names_from = Scenario, values_from = AHR1) %>%
gt() %>% fmt_number(columns=2:5, decimals = 3)
```
## Group sequential design
Here we assume the design is under a delayed effect model where the delay is not too long and the long-term average hazard ratio benefit is strong.
proportional hazards scenario, but we look at power under the alternate scenarios.
We will plan a 36 month group sequential design under the delayed effect 1 scenario.
Interim analyses are planned after 12, 20, and 28 months.
```{r}
analysisTimes <- c(12, 20, 28, 36)
upar <- list(sf = gsDesign::sfLDOF, total_spend = 0.025, param = NULL, timing = NULL, theta=0)
lpar <- list(sf = gsDesign::sfHSD, total_spend = .1, param = -2, timing = NULL, theta=NULL)
NPHasymmetric <- gs_design_ahr(enrollRates = enrollRates,
failRates = failRates,
ratio = 1, alpha = .025, beta = 0.1,
# Information fraction not required (but available!)
analysisTimes = analysisTimes,
# Function to enable spending bound
upper = gs_spending_bound, lower = gs_spending_bound,
# Spending function and parameters used
upar = upar, lpar = lpar
)
NPHasymmetric$bounds
```
By scenario, we now wish to compute the adjusted expected futility bounds and the power implied.
```{r}
xx <- NULL
lparx <- lpar
for(g in c("PH", "Delayed effect 1","Delayed effect 2", "Crossing")){
AHR1 <- (filter(ahr_by_analysis, Scenario == g))$AHR1
lparx$theta1 <- -log(AHR1)
yy <- gs_power_ahr(enrollRates = enrollRates %>% filter(Scenario == g),
failRates = failRates %>% filter(Scenario == g),
events = NULL,
analysisTimes = c(12,20,28,36),
upper = gs_spending_bound,
upar = upar,
lower = gs_spending_bound,
lpar = lparx)
xx <- rbind(xx, yy %>% mutate(Scenario = g))
}
```
### Weighted logrank
```{r, eval = FALSE}
ss_FH05_fixed <- NULL
g <- "Delayed effect"
# for(g in c("PH", "Delayed effect", "Crossing")){
# ss_FH05_fixed <-
# rbind(ss_FH05_fixed,
gs_arm <- gsdmvn:::gs_create_arm(enrollRates, failRates %>% filter(Scenario == g),
ratio = 1, # Randomization ratio
total_time = 44) # Total study duration
arm0 <- gs_arm[["arm0"]]
arm1 <- gs_arm[["arm1"]]
npsurvSS::size_two_arm(arm0, arm1, power = 0.9, alpha = 0.025,
test = list(test="weighted logrank", weight = "FH_p.5_q0"))
npsurvSS::size_two_arm(arm0, arm1, power = 0.9, alpha = 0.025,
test = list(test="rmst difference",
# Milestone allow user to define RMST cutpoint
milestone = 44)) # 42 selected since better than 40 or 44
gsdmvn:::gs_design_wlr(enrollRates = enrollRates,
failRates = failRates %>% filter(Scenario == g),
weight = function(x, arm0, arm1){
gsdmvn:::wlr_weight_fh(x, arm0, arm1,
rho = 0, gamma = 0.5, tau = 4)},
alpha = .025, beta = .1,
upar = qnorm(.975),
lpar = -Inf,
analysisTimes = 44)$bounds %>% filter(Bound =="Upper")
```
```{r, eval = FALSE}
# Ignore tau or (tau can be -1)
gsdmvn:::gs_design_wlr(enrollRates = enrollRates,
failRates = failRates %>% filter(Scenario == g),
weight = function(x, arm0, arm1){
gsdmvn:::wlr_weight_fh(x, arm0, arm1,
rho = 0, gamma = 0.5)},
alpha = .025, beta = .1,
upar = qnorm(.975),
lpar = -Inf,
analysisTimes = 44)$bounds %>% filter(Bound =="Upper")
```
```{r, eval = FALSE}
# MaxCombo
MC_test <- data.frame(rho = c(0, 0, .5), gamma = c(0, .5, .5), tau = -1,
test = 1:3,
Analysis = 1,
analysisTimes = 44)
gs_design_combo(enrollRates,
failRates %>% filter(Scenario == g),
MC_test,
alpha = 0.025,
beta = 0.1,
ratio = 1,
binding = FALSE,
upper = gs_spending_combo,
upar = list(sf = gsDesign::sfLDOF, total_spend = 0.025),
lower = gs_spending_combo,
lpar = list(sf = gsDesign::sfLDOF, total_spend = .1)
)
}
```
## References