/
pred_update.R
324 lines (284 loc) · 13.9 KB
/
pred_update.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
#' Perform Model Updating on an Existing Prediction Model
#'
#' This function takes an existing (previously developed) prediction model and
#' applies various model updating methods to tailor/adapt it to a new dataset.
#' Various levels of updating are possible, ranging from model re-calibration to
#' model refit.
#'
#' @param x an object of class "\code{predinfo}" produced by calling
#' \code{\link{pred_input_info}} containing information on exactly one
#' existing prediction model.
#' @param update_type character variable specifying the level of updating that
#' is required.
#' @param new_data data.frame upon which the prediction models should be
#' updated.
#' @param binary_outcome Character variable giving the name of the column in
#' \code{new_data} that represents the observed binary outcomes (should be
#' coded 0 and 1 for non-event and event, respectively). Only relevant for
#' \code{model_type}="logistic"; leave as \code{NULL} otherwise. Leave as
#' \code{NULL} if \code{new_data} does not contain any outcomes.
#' @param survival_time Character variable giving the name of the column in
#' \code{new_data} that represents the observed survival times. Only relevant
#' for \code{x$model_type}="survival"; leave as \code{NULL} otherwise.
#' @param event_indicator Character variable giving the name of the column in
#' \code{new_data} that represents the observed survival indicator (1 for
#' event, 0 for censoring). Only relevant for \code{x$model_type}="survival";
#' leave as \code{NULL} otherwise.
#'
#' @details This function takes a single existing (previously estimated)
#' prediction model, and apply various model discrete model updating methods
#' (see Su et al. 2018) to tailor the model to a new dataset.
#'
#' The type of updating method is selected with the \code{update_type}
#' parameter, with options: "intercept_update", "recalibration" and "refit".
#' "intercept_update" corrects the overall calibration-in-the-large of the
#' model, through altering the model intercept (or baseline hazard) to suit
#' the new dataset. This is achieved by fitting a logistic model (if the
#' existing model is of type logistic) or time-to-event model (if the existing
#' model if of type survival) to the new dataset, with the linear predictor as
#' the only covariate, with the coefficient fixed at unity (i.e. as an
#' offset). "recalibration" corrects the calibration-in-the-large and any
#' under/over-fitting, by fitting a logistic model (if the existing model is
#' of type logistic) or time-to-event model (if the existing model if of type
#' survival) to the new dataset, with the linear predictor as the only
#' covariate. Finally, "refit" takes the original model structure and
#' re-estimates all coefficients; this has the effect as re-developing the
#' original model in the new data.
#'
#' \code{new_data} should be a data.frame, where each row should be an
#' observation (e.g. patient) and each variable/column should be a predictor
#' variable. The predictor variables need to include (as a minimum) all of the
#' predictor variables that are included in the existing prediction model
#' (i.e., each of the variable names supplied to
#' \code{\link{pred_input_info}}, through the \code{model_info} parameter,
#' must match the name of a variables in \code{new_data}).
#'
#' Any factor variables within \code{new_data} must be converted to dummy
#' (0/1) variables before calling this function. \code{\link{dummy_vars}} can
#' help with this. See \code{\link{pred_predict}} for examples.
#'
#' \code{binary_outcome}, \code{survival_time} and \code{event_indicator} are
#' used to specify the outcome variable(s) within \code{new_data} (use
#' \code{binary_outcome} if \code{x$model_type} = "logistic", or use
#' \code{survival_time} and \code{event_indicator} if \code{x$model_type} =
#' "survival").
#'
#' @return A object of class "\code{predUpdate}". This is the same as that
#' detailed in \code{\link{pred_input_info}}, with the added element
#' containing the estimates of the model updating and the update type.
#'
#' @examples
#' #Example 1 - update time-to-event model by updating the baseline hazard in new dataset
#' model1 <- pred_input_info(model_type = "survival",
#' model_info = SYNPM$Existing_TTE_models[1,],
#' cum_hazard = SYNPM$TTE_mod1_baseline)
#' recalibrated_model1 <- pred_update(x = model1,
#' update_type = "intercept_update",
#' new_data = SYNPM$ValidationData,
#' survival_time = "ETime",
#' event_indicator = "Status")
#' summary(recalibrated_model1)
#'
#' @references Su TL, Jaki T, Hickey GL, Buchan I, Sperrin M. A review of
#' statistical updating methods for clinical prediction models. \emph{Stat Methods
#' Med Res}. 2018 Jan;27(1):185-197. doi: 10.1177/0962280215626466.
#'
#' @seealso \code{\link{pred_input_info}}
#'
#' @export
pred_update <- function(x,
update_type = c("intercept_update", "recalibration", "refit"),
new_data,
binary_outcome = NULL,
survival_time = NULL,
event_indicator = NULL) {
UseMethod("pred_update")
}
#' @export
pred_update.default <- function(x,
update_type = c("intercept_update", "recalibration", "refit"),
new_data,
binary_outcome = NULL,
survival_time = NULL,
event_indicator = NULL) {
stop("'x' is not of class 'predinfo'",
call. = FALSE)
}
#' @export
pred_update.predinfo_logistic <- function(x,
update_type = c("intercept_update", "recalibration", "refit"),
new_data,
binary_outcome = NULL,
survival_time = NULL,
event_indicator = NULL) {
#Check outcomes were inputted (needed to update the model)
if (is.null(binary_outcome)) {
stop("binary_outcome must be supplied to update the existing model(s)",
call. = FALSE)
}
#check only updating one model
if (x$M != 1) {
stop("M > 1,'pred_update' currently only supports updating a single model",
call. = FALSE)
}
update_type <- match.arg(update_type)
#Make predictions within new_data using the existing prediction model
predictions <- predRupdate::pred_predict(x = x,
new_data = new_data,
binary_outcome = binary_outcome,
survival_time = survival_time,
event_indicator = event_indicator)
if(update_type == "intercept_update") {
#Run model update
fit <- stats::glm(predictions$Outcomes ~ offset(predictions$LinearPredictor),
family = stats::binomial(link = "logit"))
param <- data.frame(cbind(fit$coefficients, sqrt(diag(stats::vcov(fit)))))
names(param) <- c("Estimate", "Std. Error")
#Update old coefficients
coef_table <- x$coefs
coef_table["Intercept"] <- coef_table["Intercept"] + param["(Intercept)","Estimate"]
} else if(update_type == "recalibration") {
#Run model update
fit <- stats::glm(predictions$Outcomes ~ predictions$LinearPredictor,
family = stats::binomial(link = "logit"))
param <- data.frame(cbind(fit$coefficients, sqrt(diag(stats::vcov(fit)))))
rownames(param)[2] <- c("Slope")
names(param) <- c("Estimate", "Std. Error")
if(is.na(param["Slope","Estimate"])) {
stop("Error occured in recalibration")
} else {
#Update old coefficients
coef_table <- x$coefs
coef_table <- coef_table * param["Slope","Estimate"]
coef_table["Intercept"] <- coef_table["Intercept"] + param["(Intercept)","Estimate"]
}
} else if(update_type == "refit") {
#Run model update
formula_text <- paste0(binary_outcome, x$formula[1], x$formula[2])
fit <- stats::glm(eval(parse(text=formula_text)), data = new_data,
family = stats::binomial(link = "logit"))
param <- data.frame(cbind(fit$coefficients, sqrt(diag(stats::vcov(fit)))))
names(param) <- c("Estimate", "Std. Error")
#Update old coefficients
coef_table <- data.frame(t(param$Estimate))
names(coef_table) <- names(stats::coef(fit))
names(coef_table)[which(names(coef_table) == "(Intercept)")] <- "Intercept"
}
#Return results and set S3 class
update_results <- list("M" = 1,
"model_type" = x$model_type,
"coefs" = data.frame(as.list(coef_table)),
"coef_names" = names(coef_table),
"formula" = stats::as.formula(paste0(x$formula[1],
x$formula[2])),
"model_info" = coef_table,
"model_update_results" = param,
"update_type" = update_type)
class(update_results) <- c("predUpdate", "predinfo_logistic", "predinfo")
update_results
}
#' @export
pred_update.predinfo_survival <- function(x,
update_type = c("intercept_update", "recalibration", "refit"),
new_data,
binary_outcome = NULL,
survival_time = NULL,
event_indicator = NULL) {
#Check outcomes were inputted (needed to update the model)
if (is.null(survival_time) |
is.null(event_indicator)) {
stop("survival_time and event_indicator must be supplied to update the existing model(s)",
call. = FALSE)
}
#check only updating one model
if (x$M != 1) {
stop("M > 1,'pred_update' currently only supports updating a single model",
call. = FALSE)
}
update_type <- match.arg(update_type)
#Make predictions within new_data using the existing prediction model
predictions <- predRupdate::pred_predict(x = x,
new_data = new_data,
binary_outcome = binary_outcome,
survival_time = survival_time,
event_indicator = event_indicator)
if(update_type == "intercept_update") {
#Run model update
fit <- survival::coxph(predictions$Outcomes ~ offset(predictions$LinearPredictor))
#obtain new baseline cumulative hazard
BH <- survival::basehaz(fit)
#undo the scaling of the offset term in the baseline hazard (above BH is for scaled LP):
BH$hazard <- BH$hazard / exp(mean(predictions$LinearPredictor))
#keep original coefficients
coef_table <- x$coefs
param <- NA
} else if(update_type == "recalibration") {
#Run model update
fit <- survival::coxph(predictions$Outcomes ~ predictions$LinearPredictor)
param <- data.frame(cbind(fit$coefficients, sqrt(diag(stats::vcov(fit)))))
rownames(param)[1] <- c("Slope")
names(param) <- c("Estimate", "Std. Error")
if(is.na(param["Slope","Estimate"])) {
stop("Error occured in recalibration")
} else {
#Update old coefficients
coef_table <- x$coefs
coef_table <- coef_table * param["Slope","Estimate"]
}
#obtain new baseline cumulative hazard
BH <- survival::basehaz(fit, centered = FALSE)
} else if(update_type == "refit") {
#Run model update
DM <- data.frame("outcome" = predictions$Outcomes,
new_data[,x$coef_names])
fit <- survival::coxph(outcome ~ ., data = DM)
param <- data.frame(cbind(fit$coefficients, sqrt(diag(stats::vcov(fit)))))
names(param) <- c("Estimate", "Std. Error")
#Update old coefficients
coef_table <- data.frame(t(param$Estimate))
names(coef_table) <- names(stats::coef(fit))
#obtain new baseline cumulative hazard
BH <- survival::basehaz(fit, centered = FALSE)
}
#Return results and set S3 class
update_results <- list("M" = 1,
"model_type" = x$model_type,
"coefs" = data.frame(as.list(coef_table)),
"coef_names" = names(coef_table),
"formula" = stats::as.formula(paste0(x$formula[1],
x$formula[2])),
"cum_hazard" = data.frame("time" = BH$time,
"hazard" = BH$hazard),
"model_info" = coef_table,
"model_update_results" = param,
"update_type" = update_type)
class(update_results) <- c("predUpdate", "predinfo_survival", "predinfo")
update_results
}
#' @export
summary.predUpdate <- function(object, ...) {
cat(paste("Original model was updated with type",
object$update_type, sep = " "))
if(object$model_type == "survival"){
if(object$update_type != "intercept_update"){
cat("\nThe model updating results are as follows: \n")
print(object$model_update_results)
}
cat("\nThe new model baseline cumulative hazard is: \n")
if(nrow(object$cum_hazard) > 6){
print(utils::head(object$cum_hazard, 6))
cat("...\n")
}else{
print((object$cum_hazard))
}
} else if (object$model_type == "logistic"){
cat("\nThe model updating results are as follows: \n")
print(object$model_update_results)
}
cat("\nUpdated Model Coefficients \n",
"================================= \n", sep = "")
print(object$coefs)
cat("\nModel Functional Form \n",
"================================= \n", sep = "")
cat(as.character(object$formula)[2])
}