/
PMcompare.R
418 lines (380 loc) · 17.9 KB
/
PMcompare.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
#' @title Compare NPAG or IT2B runs
#' @description
#' ` r lifecycle::badge("stable")`
#'
#' Compare parameters, convergence, -2*log likelihood, AIC and bias
#' and imprecision of population and posterior predictions.
#' @details
#' Objects can be specified separated by commas, e.g. `PM_compare(run1, run2, run3)`
#' followed by any arguments you wish to [plot.PMop], [mtsknn.eq].
#' P-values are based on comparison using the nearest neighbors
#' approach if all models are non-parametrics. Models may only be compared on
#' parameters that are included in the first model. The P-value is the
#' comparison between each model and the first model in
#' the list. Missing P-values are when a model has no parameter names
#' in common with the first model, and for the first model compared to itself,
#' or when models from IT2B runs are included. Significant P-values indicate
#' that the null hypothesis should be rejected, i.e. the joint distributions
#' between the two compared models for that parameter are significantly different.
#'
#' @param x The first [PM_result] object you wish to compare. Unlike the legacy
#' [PMcompare] this function only uses objects already loaded with [PM_load].
#' This will serve as the reference output for P-value testing (see details).
#' @param y The second [PM_result] object to compare.
#' @param ... Additional [PM_result] objects to compare. See details.
#' Also, parameters to be passed to [plot.PM_op]
#' if `plot` is true as well as to [mtsknn.eq]. Order does not matter.
#' @param icen Can be either "median" for the predictions based on medians of
#' `pred.type` parameter value distributions, or "mean". Default is "median".
#' @param outeq Number of the output equation to compare; default is 1
#' @param plot Boolean operator selecting whether to generate observed vs.
#' predicted plots for each data object as in [plot.PM_op].
#' @return A data frame with the following objects for each model to analyze:
#' \item{run }{The run number of the data}
#' \item{type }{NPAG or IT2B data}
#' \item{nsub }{Number of subjects in the model}
#' \item{nvar }{Number of random parameters in the model}
#' \item{par }{Names of random parameters}
#' \item{cycles }{Number of cycles run}
#' \item{converge }{Boolean value if convergence occurred.}
#' \item{ll }{Final cycle -2*Log-likelihood }
#' \item{aic }{Final cycle Akaike Information Criterion}
#' \item{bic }{Final cycle Bayesian (Schwartz) Information Criterion }
#' \item{popBias }{Bias, or mean weighted prediction error of predictions based on population parameters minus observations}
#' \item{popImp }{Imprecision, or bias-adjusted mean weighted squared error of predictions based on population parameters minus observations }
#' \item{popPerRMSE}{Percent root mean squared error of predictions based on population parameters minus observations}
#' \item{postBias }{Bias, or mean weighted prediction error of predictions - observations based on posterior parameters}
#' \item{postImp }{Imprecision, or bias-adjusted mean weighted squared error of predictions - observations based on posterior parameters}
#' \item{postPerRMSE}{Percent root mean squared error of predictions based on posterior parameters minus observations}
#' \item{pval }{P-value for each model compared to the first. See details.}
#' @author Michael Neely
#' @seealso [PM_load], [plot.PM_op], [mtsknn.eq]
#' @export
PM_compare <- function(x, y, ..., icen = "median", outeq = 1, plot = F) {
if (missing(x) | missing(y)) stop("You must specify at least two PM_result objects for PM_compare.\n")
stopifnot("Please specify your PM_result objects. See help." = inherits(x, "PM_result"))
# parse dots
arglist <- list(...)
namesPlot <- names(formals(plot.PM_op))
namesMTSKNN <- names(formals(mtsknn.eq))
# get the args to plot.PMop and set defaults if missing
plotArgs <- which(names(arglist) %in% namesPlot)
argsPlot <- arglist[plotArgs]
MTSKNNargs <- which(names(arglist) %in% namesMTSKNN)
argsMTSKNN <- arglist[MTSKNNargs]
if (!"k" %in% names(argsMTSKNN)) argsMTSKNN$k <- 3
if (!"print" %in% names(argsMTSKNN)) argsMTSKNN$print <- FALSE
# get the others if there and assume that they are PMdata objects for now
if ((length(arglist) - length(c(plotArgs, MTSKNNargs))) > 0) {
if (length(c(plotArgs, MTSKNNargs)) == 0) {
argsPM <- 1:length(arglist)
} else {
argsPM <- (1:length(arglist))[-c(plotArgs, MTSKNNargs)]
}
} else {
argsPM <- NULL
}
if (length(argsPM) == 0) obj <- list(x, y)
if (length(argsPM) >= 1) obj <- c(list(x, y), arglist[argsPM])
# declare global variables to avoid problems with R CMD Check
# NPAGout <- NULL
# get each obj
nobj <- length(obj)
allObj <- purrr::map(obj, function(x) {
if(!is.null(x$NPdata)){x$NPdata} else {x$ITdata}
})
objClass <- purrr::map(allObj, function(x) class(x)[1])
# check for non-Pmetrics data objects and remove them if found
yesPM <- which(objClass %in% c("NPAG", "IT2B"))
allObj <- allObj[yesPM]
objClass <- objClass[yesPM]
# check for zero cycle objects
cycles <- unlist(sapply(allObj, function(x) x$icyctot))
if (any(cycles == 0)) stop(paste("Do not include 0-cycle runs: item(s) ", paste(which(cycles == 0), collapse = ", "), "\n", sep = ""))
op <- purrr::map(obj, function(x) {
x$op$data
})
if (plot) {
if (!"resid" %in% names(argsPlot)) {
if (nobj <= 3) {
par(mfrow = c(nobj, 2))
} else {
par(mfrow = c(3, 2))
devAskNewPage(ask = T)
}
for (i in 1:length(op)) {
do.call(plot.PM_op, args = c(list(
x = op[[i]], pred.type = "pop", icen = icen, outeq = outeq,
title = paste("Model", i, "Population")
), argsPlot))
do.call(plot.PM_op, args = c(list(
x = op[[i]], pred.type = "post", icen = icen, outeq = outeq,
title = paste("Model", i, "Posterior")
), argsPlot))
}
} else {
devAskNewPage(ask = T)
for (i in 1:length(op)) {
do.call(plot.PM_op, args = c(list(
x = op[[i]], pred.type = "post", icen = icen, outeq = outeq,
title = paste("Model", i)
), argsPlot))
}
}
par(mfrow = c(1, 1))
devAskNewPage(ask = F)
}
# get summaries of op for outeq
sumobjPop <- mapply(summary.PMop, op, MoreArgs = list(outeq = outeq, pred.type = "pop", icen = icen), SIMPLIFY = F)
sumobjPost <- mapply(summary.PMop, op, MoreArgs = list(outeq = outeq, pred.type = "post", icen = icen), SIMPLIFY = F)
popBias <- sapply(sumobjPop, function(x) ifelse(is.na(x$pe[1]), NA, x$pe$mwpe))
postBias <- sapply(sumobjPost, function(x) ifelse(is.na(x$pe[1]), NA, x$pe$mwpe))
popImp <- sapply(sumobjPop, function(x) ifelse(is.na(x$pe[1]), NA, x$pe$bamwspe))
postImp <- sapply(sumobjPost, function(x) ifelse(is.na(x$pe[1]), NA, x$pe$bamwspe))
popPercent_RMSE <- sapply(sumobjPop, function(x) ifelse(is.na(x$pe[1]), NA, x$pe$percent_rmse))
postPercent_RMSE <- sapply(sumobjPost, function(x) ifelse(is.na(x$pe[1]), NA, x$pe$percent_rmse))
# get population points
final <- purrr::map(obj, function(x) {
x$final$data
})
# find intersecting parameters
popPointsRef <- final[[1]]$popPoints
namesRef <- names(popPointsRef)
popPointsOther <- lapply(2:nobj, function(x) final[[x]]$popPoints)
t <- sapply(2:nobj, function(x) {
thisPopPoints <- popPointsOther[[x - 1]]
namesThis <- names(thisPopPoints)
intersect <- namesRef[namesRef %in% namesThis]
if (length(intersect) > 0) {
popPoints1 <- popPointsRef[, intersect]
popPoints2 <- thisPopPoints[, intersect]
t <- do.call(mtsknn.eq, args = c(list(x = popPoints1, y = popPoints2), argsMTSKNN))$pval
} else {
t <- NA
}
signif(t, 3)
})
t <- c(NA, t)
results <- data.frame(
run = seq(1:nobj),
type = unlist(objClass),
nsub = unlist(purrr::map(obj,function(x){x$final$nsub})),
nvar = mapply(function(x) x$nvar, allObj),
par = mapply(function(x) paste(x$par, collapse = " "), allObj),
converge = mapply(function(x) x$converge == 1, allObj),
ll = mapply(function(x) -2 * x$ilog[length(x$ilog)], allObj),
aic = mapply(function(x) tail(x$iic[, 1], 1), allObj),
bic = mapply(function(x) tail(x$iic[, 2], 1), allObj),
popBias = popBias,
popImp = popImp,
popPer_RMSE = popPercent_RMSE,
postBias = postBias,
postImp = postImp,
postPer_RMSE = postPercent_RMSE,
pval = t
)
names(results)[7] <- "-2*LL"
results[, 7:15] <- format(results[, 7:15], digits = 4)
row.names(results) <- 1:nobj
results
}
#' @title Compare NPAG or IT2B runs
#' @description
#' `r lifecycle::badge("superseded")`
#'
#' Compare NPAG or IT2B runs. his function is superseded by [PM_compare].
#' @details
#' For backwards compatibility, objects can be specified separated by commas, e.g. PMcompare(1,2,3) followed by
#' any arguments you wish to \code{\link{plot.PMop}}, \code{\link{mtsknn.eq}}. P-values are based on comparison using the nearest neighbors
#' approach if all models are non-parametrics. Models may only be compared on parameters that are included
#' in the first model. The P-value is the comparison between each model and the first model in
#' the list. Missing P-values are when a model has no parameter names in common with the first
#' model, and for the first model compared to itself, or when models from IT2B runs are included. Significant P-values indicate that the null
#' hypothesis should be rejected, i.e. the joint distributions between the two compared models are
#' significantly different.
#'
#' @param x The run number of the first object you wish to compare. This should be a folder in your
#' working directory. To avoid confusion, this function does not use objects
#' already loaded with \code{\link{PMload}}.
#' This will serve as the reference output for P-value testing (see details).
#' @param y The run number of the second object to compare.
#' @param \dots Additional run numbers to compare. See details. Also, parameters to be passed to \code{\link{plot.PMop}}
#' if \code{plot} is true as well as to \code{\link{mtsknn.eq}}. Order does not matter.
#' @param icen Can be either "median" for the predictions based on medians of \code{pred.type} parameter value
#' distributions, or "mean". Default is "median".
#' @param outeq Number of the output equation to compare; default is 1
#' @param plot Boolean operator selecting whether to generate observed vs. predicted plots for each data object
#' as in \code{\link{plot.PMop}}
#' @return A data frame with the following objects for each model to analyze:
#' \item{run }{The run number of the data}
#' \item{type }{NPAG or IT2B data}
#' \item{nsub }{Number of subjects in the model}
#' \item{nvar }{Number of random parameters in the model}
#' \item{par }{Names of random parameters}
#' \item{cycles }{Number of cycles run}
#' \item{converge }{Boolean value if convergence occurred.}
#' \item{ll }{Final cycle -2*Log-likelihood }
#' \item{aic }{Final cycle Akaike Information Criterion}
#' \item{bic }{Final cycle Bayesian (Schwartz) Information Criterion }
#' \item{popBias }{Bias, or mean weighted prediction error of predictions based on population parameters minus observations}
#' \item{popImp }{Imprecision, or bias-adjusted mean weighted squared error of predictions based on population parameters minus observations }
#' \item{popPerRMSE}{Percent root mean squared error of predictions based on population parameters minus observations}
#' \item{postBias }{Bias, or mean weighted prediction error of predictions - observations based on posterior parameters}
#' \item{postImp }{Imprecision, or bias-adjusted mean weighted squared error of predictions - observations based on posterior parameters}
#' \item{postPerRMSE}{Percent root mean squared error of predictions based on posterior parameters minus observations}
#' \item{pval }{P-value for each model compared to the first. See details.}
#' @author Michael Neely
#' @seealso \code{\link{PMload}}, \code{\link{plot.PMop}}, \code{\link{mtsknn.eq}}
#' @export
PMcompare <- function(x, y, ..., icen = "median", outeq = 1, plot = F) {
if (missing(x) | missing(y)) stop("You must specify at least two run numbers for PMcompare.\n")
if (inherits(x, c("NPdata", "ITdata"))) stop("You should specify your objects by run number. See help.\n")
# parse dots
arglist <- list(...)
namesPlot <- names(formals(plot.PMop))
namesMTSKNN <- names(formals(mtsknn.eq))
# get the args to plot.PMop and set defaults if missing
plotArgs <- which(names(arglist) %in% namesPlot)
argsPlot <- arglist[plotArgs]
if (!"cex.stat" %in% names(argsPlot)) argsPlot$cex.stat <- 0.8
if (!"x.stat" %in% names(argsPlot)) argsPlot$x.stat <- 0.5
# get the args to mtsknn.eq and set defaults if missing
MTSKNNargs <- which(names(arglist) %in% namesMTSKNN)
argsMTSKNN <- arglist[MTSKNNargs]
if (!"k" %in% names(argsMTSKNN)) argsMTSKNN$k <- 3
if (!"print" %in% names(argsMTSKNN)) argsMTSKNN$print <- FALSE
# get the others if there and assume that they are PMdata objects for now
if ((length(arglist) - length(c(plotArgs, MTSKNNargs))) > 0) {
if (length(c(plotArgs, MTSKNNargs)) == 0) {
argsPM <- 1:length(arglist)
} else {
argsPM <- (1:length(arglist))[-c(plotArgs, MTSKNNargs)]
}
} else {
argsPM <- NULL
}
if (length(argsPM) == 0) obj <- list(x, y)
if (length(argsPM) >= 1) obj <- c(list(x, y), arglist[argsPM])
# declare global variables to avoid problems with R CMD Check
NPAGout <- NULL
IT2Bout <- NULL
# get each obj
nobj <- length(obj)
allObj <- list()
for (thisobj in 1:nobj) {
# find objects
if (!file.exists(as.character(obj[thisobj]))) {
cat(paste(obj[thisobj], " is not a folder in the current working directory.\n", sep = ""))
} else {
ITfile <- list.files(paste(obj[thisobj], "outputs", sep = "/"), pattern = "IT2Bout.Rdata", full.names = T)
NPfile <- list.files(paste(obj[thisobj], "outputs", sep = "/"), pattern = "NPAGout.Rdata", full.names = T)
load(c(ITfile, NPfile))
if (length(ITfile) > 0) {
allObj[[thisobj]] <- IT2Bout$ITdata
} else {
allObj[[thisobj]] <- NPAGout$NPdata
}
}
}
objClass <- mapply(class, allObj)
# check for non-Pmetrics data objects and remove them if found
yesPM <- which(objClass %in% c("NPAG", "IT2B"))
allObj <- allObj[yesPM]
objClass <- objClass[yesPM]
# check for zero cycle objects
cycles <- unlist(sapply(allObj, function(x) x$icyctot))
if (any(cycles == 0)) stop(paste("Do not include 0-cycle runs: item(s) ", paste(which(cycles == 0), collapse = ", "), "\n", sep = ""))
op <- apply(mapply(makeOP, allObj), 2, function(x) {
data.frame(x)
})
op <- lapply(op, function(x) {
class(x) <- c("PMop", "data.frame")
x
})
if (plot) {
if (!"resid" %in% names(argsPlot)) {
if (nobj <= 3) {
par(mfrow = c(nobj, 2))
} else {
par(mfrow = c(3, 2))
devAskNewPage(ask = T)
}
for (i in 1:length(op)) {
do.call(plot.PMop, args = c(list(
x = op[[i]], pred.type = "pop", icen = icen, outeq = outeq,
main = paste("Model", i, "Population")
), argsPlot))
do.call(plot.PMop, args = c(list(
x = op[[i]], pred.type = "post", icen = icen, outeq = outeq,
main = paste("Model", i, "Posterior")
), argsPlot))
}
} else {
devAskNewPage(ask = T)
for (i in 1:length(op)) {
do.call(plot.PMop, args = c(list(
x = op[[i]], pred.type = "post", icen = icen, outeq = outeq,
main = paste("Model", i)
), argsPlot))
}
}
par(mfrow = c(1, 1))
devAskNewPage(ask = F)
}
# get summaries of op for outeq
sumobjPop <- mapply(summary.PMop, op, MoreArgs = list(outeq = outeq, pred.type = "pop", icen = icen), SIMPLIFY = F)
sumobjPost <- mapply(summary.PMop, op, MoreArgs = list(outeq = outeq, pred.type = "post", icen = icen), SIMPLIFY = F)
popBias <- sapply(sumobjPop, function(x) ifelse(is.na(x$pe[1]), NA, x$pe$mwpe))
postBias <- sapply(sumobjPost, function(x) ifelse(is.na(x$pe[1]), NA, x$pe$mwpe))
popImp <- sapply(sumobjPop, function(x) ifelse(is.na(x$pe[1]), NA, x$pe$bamwspe))
postImp <- sapply(sumobjPost, function(x) ifelse(is.na(x$pe[1]), NA, x$pe$bamwspe))
popPercent_RMSE <- sapply(sumobjPop, function(x) ifelse(is.na(x$pe[1]), NA, x$pe$percent_rmse))
postPercent_RMSE <- sapply(sumobjPost, function(x) ifelse(is.na(x$pe[1]), NA, x$pe$percent_rmse))
# if all NPAG, calculate nearest neighbors p-value compared to first
if (all(sapply(allObj, function(x) inherits(x, "NPAG")))) {
# get population points
final <- mapply(makeFinal, allObj)
# find intersecting parameters
popPointsRef <- final[, 1]$popPoints
namesRef <- names(popPointsRef)
popPointsOther <- lapply(2:nobj, function(x) final[, x]$popPoints)
t <- sapply(2:nobj, function(x) {
thisPopPoints <- popPointsOther[[x - 1]]
namesThis <- names(thisPopPoints)
intersect <- namesRef[namesRef %in% namesThis]
if (length(intersect) > 0) {
popPoints1 <- popPointsRef[, intersect]
popPoints2 <- thisPopPoints[, intersect]
t <- do.call(mtsknn.eq, args = c(list(x = popPoints1, y = popPoints2), argsMTSKNN))$pval
} else {
t <- NA
}
signif(t, 3)
})
t <- c(NA, t)
} else {
t <- NA
}
results <- data.frame(
run = unlist(obj),
type = objClass,
nsub = mapply(function(x) x$nsub, allObj),
nvar = mapply(function(x) x$nvar, allObj),
par = mapply(function(x) paste(x$par, collapse = " "), allObj),
converge = mapply(function(x) x$converge == 1, allObj),
ll = mapply(function(x) -2 * x$ilog[length(x$ilog)], allObj),
aic = mapply(function(x) tail(x$iic[, 1], 1), allObj),
bic = mapply(function(x) tail(x$iic[, 2], 1), allObj),
popBias = popBias,
popImp = popImp,
popPer_RMSE = popPercent_RMSE,
postBias = postBias,
postImp = postImp,
postPer_RMSE = postPercent_RMSE,
pval = t
)
names(results)[7] <- "-2*LL"
results[, 7:15] <- format(results[, 7:15], digits = 4)
row.names(results) <- 1:nobj
results
}