/
strip_rawdata.R
executable file
·149 lines (129 loc) · 4.66 KB
/
strip_rawdata.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#' Strip rawdata from a generalized additive model
#'
#' This function removes all individual participant data from a generalized additive
#' model object, while keeping aggregated quantities. The resulting object can be
#' shared without exposing individual participant data.
#'
#' @param model A model fitted using \code{mgcv::gam}, \code{mgcv::bam},
#' \code{mgcv::gamm}, or \code{gamm4::gamm4}.
#' @param path Optional path in which to save the object as a \code{.rds} file.
#' @param save_ranges Logical specifying whether to save the ranges of each
#' variable used by the model. For numeric variables this amounts to the minimum
#' and maximum, and for factors all levels are saved. The values will be in the
#' list element \code{var.summary} of the returned object.
#' @param ... Other arguments (not used).
#'
#' @details
#'
#' Thin plate regression splines (\code{bs='tp'} and \code{bs='ts'}) and Duchon splines \code{bs='ds'}
#' are currently not supported, since for these splines \code{mgcv}
#' requires the unique values of the explanatory variables for each smooth term for the \code{predict}
#' method to work. Future updates to this package will fix this.
#'
#' @return Model object with individual participant data removed.
#' @export
#'
#' @example /inst/examples/metagam_examples.R
#'
strip_rawdata <- function(model, path = NULL, save_ranges = TRUE, ...){
UseMethod("strip_rawdata")
}
#' @describeIn strip_rawdata Strip rawdata from list object returned by gamm4
#' @export
strip_rawdata.list <- function(model, path = NULL, save_ranges = TRUE, ...){
model <- model$gam
strip_rawdata(model)
}
#' @describeIn strip_rawdata Strip rawdata from gamm object
#' @export
strip_rawdata.gamm <- function(model, path = NULL, save_ranges = TRUE, ...){
model <- model$gam
strip_rawdata(model)
}
#' @describeIn strip_rawdata Strip rawdata from gam object
#' @export
strip_rawdata.bam <- function(model, path = NULL, save_ranges = TRUE, ...){
# bam objects inherit from gam, and can be treated as them
NextMethod()
}
#' @describeIn strip_rawdata Strip rawdata from gam object
#' @export
strip_rawdata.gam <- function(model, path = NULL, save_ranges = TRUE, ...){
# Vector of mgcv smooth classes currently supported
supported_smooths <- c("pspline.smooth", "tensor.smooth", "Bspline.smooth",
"cp.smooth", "cr.smooth", "cyclic.smooth", "t2.smooth")
lapply(model$smooth, function(x){
check_smooths(x, supported_smooths)
if(inherits(x, c("tensor.smooth", "t2.smooth"))){
lapply(x$margin, check_smooths, supported_smooths)
}
})
# Original mgcv::gam output
mgcv_output <- utils::capture.output(summary(model))
# Terms with respective variables
term_list <- lapply(model$smooth, function(x) x[["term"]])
names(term_list) <- lapply(model$smooth, function(x) x[["label"]])
# Find p-values of smooth terms
s.table <- summary(model)$s.table
# Save only the parts of the model which contain aggregated data
obj <- list(
aic = model$aic,
assign = model$assign,
boundary = model$boundary,
call = model$call,
cmX = model$cmX,
coefficients = model$coefficients,
control = model$control,
converged = model$converged,
deviance = model$deviance,
df.null = model$df.null,
df.residual = model$df.residual,
edf = model$edf,
edf1 = model$edf1,
edf2 = model$edf2,
family = model$family,
formula = model$formula,
gcv.ubre = model$gcv.ubre,
iter = model$iter,
method = model$method,
mgcv.conv = model$mgcv.conv,
min.edf = model$min.edf,
nsdf = model$nsdf,
null.deviance = model$null.deviance,
offset = model$offset,
optimizer = model$optimizer,
pred.formula = model$pred.formula,
pterms = model$pterms,
rank = model$rank,
reml.scale = model$reml.scale,
rV = model$rV,
scale = model$scale,
scale.estimated = model$scale.estimated,
sig2 = model$sig2,
smooth = model$smooth,
sp = model$sp,
terms = model$terms,
var.summary = if(save_ranges) model$var.summary,
Ve = model$Ve,
Vp = model$Vp,
Vc = model$Vc,
n = nrow(model$model),
smooth_terms = names(term_list),
term_list = term_list,
s.table = s.table,
mgcv_output = mgcv_output
)
class(obj) <- c("striprawdata", class(model))
if(!is.null(path)) {
print(paste("Saving object to", path))
saveRDS(obj, file = path)
}
return(obj)
}
check_smooths <- function(x, supported_smooths){
if(!inherits(x, supported_smooths))
stop(paste0("metagam currently supports the following splines:\n",
paste(supported_smooths, collapse = "\n"),
"\n\nSee the Details section in the documentation for strip_rawdata()."))
0
}