-
Notifications
You must be signed in to change notification settings - Fork 23
/
ds.mean.R
233 lines (214 loc) · 13.1 KB
/
ds.mean.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#'
#' @title Computes server-side vector statistical mean
#' @description This function computes the statistical mean
#' of a given server-side vector.
#'
#' @details This function is similar to the R function \code{mean}.
#'
#' The function can carry out 3 types of analysis depending on
#' the argument \code{type}:\cr
#' (1) If \code{type} is set to \code{'combine'}, \code{'combined'},
#' \code{'combines'} or \code{'c'}, a global mean is calculated.\cr
#' (2) If \code{type} is set to \code{'split'}, \code{'splits'} or \code{'s'},
#' the mean is calculated separately for each study. \cr
#' (3) If \code{type} is set to \code{'both'} or \code{'b'},
#' both sets of outputs are produced.
#'
#' If the argument \code{save.mean.Nvalid} is set to TRUE
#' study-specific means and \code{Nvalids}
#' as well as the global equivalents across all studies combined
#' are saved in the server-side.
#' Once the estimated means and \code{Nvalids}
#' are written into the server-side R environments, they can be used directly to centralize
#' the variable of interest around its global mean or its study-specific means. Finally,
#' the \code{isDefined} internal function checks whether the key variables have been created.
#'
#' Server function called: \code{meanDS}
#' @param x a character specifying the name of a numerical vector.
#' @param type a character string that represents the type of analysis to carry out.
#' This can be set as \code{'combine'}, \code{'combined'}, \code{'combines'},
#' \code{'split'}, \code{'splits'}, \code{'s'},
#' \code{'both'} or \code{'b'}.
#' For more information see \strong{Details}.
#' @param checks logical. If TRUE optional checks of model
#' components will be undertaken. Default is FALSE to save time.
#' It is suggested that checks
#' should only be undertaken once the function call has failed.
#' @param save.mean.Nvalid logical. If TRUE generated values of the mean and
#' the number of valid (non-missing) observations will be saved on the data servers.
#' Default FALSE.
#' For more information see \strong{Details}.
#' @param datasources a list of \code{\link{DSConnection-class}}
#' objects obtained after login. If the \code{datasources} argument is not specified
#' the default set of connections will be used: see \code{\link{datashield.connections_default}}.
#' @return \code{ds.mean} returns to the client-side a list including: \cr
#'
#' \code{Mean.by.Study}: estimated mean, \code{Nmissing}
#' (number of missing observations), \code{Nvalid} (number of valid observations) and
#' \code{Ntotal} (sum of missing and valid observations)
#' separately for each study (if \code{type = split} or \code{type = both}). \cr
#' \code{Global.Mean}: estimated mean, \code{Nmissing}, \code{Nvalid} and \code{Ntotal}
#' across all studies combined (if \code{type = combine} or \code{type = both}). \cr
#' \code{Nstudies}: number of studies being analysed. \cr
#' \code{ValidityMessage}: indicates if the analysis was possible. \cr
#'
#' If \code{save.mean.Nvalid} is set as TRUE, the objects
#' \code{Nvalid.all.studies}, \code{Nvalid.study.specific},
#' \code{mean.all.studies} and \code{mean.study.specific} are written to the server-side.
#'
#' @author DataSHIELD Development Team
#' @seealso \code{ds.quantileMean} to compute quantiles.
#' @seealso \code{ds.summary} to generate the summary of a variable.
#' @export
#' @examples
#' \dontrun{
#'
#' ## Version 6, for version 5 see the Wiki
#'
#' # connecting to the Opal servers
#'
#' require('DSI')
#' require('DSOpal')
#' require('dsBaseClient')
#'
#' builder <- DSI::newDSLoginBuilder()
#' builder$append(server = "study1",
#' url = "http://192.168.56.100:8080/",
#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM1", driver = "OpalDriver")
#' builder$append(server = "study2",
#' url = "http://192.168.56.100:8080/",
#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM2", driver = "OpalDriver")
#' builder$append(server = "study3",
#' url = "http://192.168.56.100:8080/",
#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM3", driver = "OpalDriver")
#' logindata <- builder$build()
#'
#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
#'
#' #Calculate the mean of a vector in the server-side
#'
#' ds.mean(x = "D$LAB_TSC",
#' type = "split",
#' checks = FALSE,
#' save.mean.Nvalid = FALSE,
#' datasources = connections)
#'
#' # clear the Datashield R sessions and logout
#' datashield.logout(connections)
#' }
#'
ds.mean <- function(x=NULL, type='split', checks=FALSE, save.mean.Nvalid=FALSE, datasources=NULL){
#####################################################################################
#MODULE 1: IDENTIFY DEFAULT DS CONNECTIONS #
# look for DS connections #
if(is.null(datasources)){ #
datasources <- datashield.connections_find() #
} #
#####################################################################################
#####################################################################################
#MODULE 2: SET UP KEY VARIABLES ALLOWING FOR DIFFERENT INPUT FORMATS #
if(is.null(x)){ #
stop("Please provide the name of the input vector!", call.=FALSE) #
} #
# the input variable might be given as a variable in a data frame (i.e. D$x) #
# or just as a vector not attached to a table (i.e. x) #
# we have to make sure the function deals with each case #
xnames <- extract(x) #
varname <- xnames$elements #
obj2lookfor <- xnames$holders #
#####################################################################################
###############################################################################################
#MODULE 3: GENERIC OPTIONAL CHECKS TO ENSURE CONSISTENT STRUCTURE OF KEY VARIABLES #
#IN DIFFERENT SOURCES #
# beginning of optional checks - the process stops and reports as soon as one #
#check fails #
#
if(checks){ #
message(" -- Verifying the variables in the model") #
#
# check if the input object(s) is(are) defined in all the studies #
if(is.na(obj2lookfor)){ #
defined <- isDefined(datasources, varname) #
}else{ #
defined <- isDefined(datasources, obj2lookfor) #
} #
#
# call the internal function that checks the input object is suitable in all studies #
varClass <- checkClass(datasources, x) #
# the input object must be a numeric or an integer vector #
if(!('integer' %in% varClass) & !('numeric' %in% varClass)){ #
stop("The input object must be an integer or a numeric vector.", call.=FALSE) #
} #
} #
###############################################################################################
###################################################################################################
#MODULE 4: EXTEND "type" argument to include "both" and enable valid alisases #
if(type == 'combine' | type == 'combined' | type == 'combines' | type == 'c') type <- 'combine' #
if(type == 'split' | type == 'splits' | type == 's') type <- 'split' #
if(type == 'both' | type == 'b' ) type <- 'both' #
if(type != 'combine' & type != 'split' & type != 'both') #
stop('Function argument "type" has to be either "both", "combine" or "split"', call.=FALSE) #
#
#MODIFY FUNCTION CODE TO DEAL WITH ALL THREE TYPES #
###################################################################################################
cally <- paste0("meanDS(", x, ")")
ss.obj <- DSI::datashield.aggregate(datasources, as.symbol(cally))
Nstudies <- length(datasources)
ss.mat <- matrix(as.numeric(matrix(unlist(ss.obj),nrow=Nstudies,byrow=TRUE)[,1:4]),nrow=Nstudies)
dimnames(ss.mat) <- c(list(names(ss.obj),names(ss.obj[[1]])[1:4]))
ValidityMessage.mat <- matrix(matrix(unlist(ss.obj),nrow=Nstudies,byrow=TRUE)[,5],nrow=Nstudies)
dimnames(ValidityMessage.mat) <- c(list(names(ss.obj),names(ss.obj[[1]])[5]))
ss.mat.combined <- t(matrix(ss.mat[1,]))
ss.mat.combined[1,1] <- (t(matrix(ss.mat[,3]))%*%ss.mat[,1])/sum(ss.mat[,3])
ss.mat.combined[1,2] <- sum(ss.mat[,2])
ss.mat.combined[1,3] <- sum(ss.mat[,3])
ss.mat.combined[1,4] <- sum(ss.mat[,4])
dimnames(ss.mat.combined) <- c(list("studiesCombined"),list(names(ss.obj[[1]])[1:4]))
# IF save.mean.Nvalid==TRUE - KEY STUDY SPECIFIC STATISTICS ON APPROPRIATE DATA REPOSITORY SERVERS WITH ASSIGN FUNCTION
if(save.mean.Nvalid==TRUE){
for(j in 1:Nstudies){
selected.conn <- datasources[j]
mean.study.specific <- ss.mat[j,1]
Nvalid.study.specific <- ss.mat[j,3]
# SAVE VALIDITY MESSAGE
DSI::datashield.assign(selected.conn, "mean.study.specific", as.symbol(mean.study.specific))
DSI::datashield.assign(selected.conn, "Nvalid.study.specific", as.symbol(Nvalid.study.specific))
}
# SAVE KEY GLOBAL STATISTICS ON ALL DATA REPOSITORY SERVERS WITH ASSIGN FUNCTION
mean.all.studies <- ss.mat.combined[1,1]
Nvalid.all.studies <- ss.mat.combined[1,3]
DSI::datashield.assign(datasources, "mean.all.studies", as.symbol(mean.all.studies))
DSI::datashield.assign(datasources, "Nvalid.all.studies", as.symbol(Nvalid.all.studies))
#############################################################################
# MODULE 5: CHECK DATA OBJECTS SUCCESSFULLY CREATED #
key.names <- extract("mean.all.studies") #
key.varname <- key.names$elements #
key.obj2lookfor <- key.names$holders #
#
if(is.na(key.obj2lookfor)){ #
key.defined <- isDefined(datasources, key.varname) #
}else{ #
key.defined <- isDefined(datasources, key.obj2lookfor) #
} #
#
#if(key.defined==TRUE){ #
#print("Data object <mean.all.studies> created successfully in all sources") #
#} #
#############################################################################
}
#PRIMARY FUNCTION OUTPUT SUMMARISE RESULTS FROM
#AGGREGATE FUNCTION AND RETURN TO CLIENT-SIDE
if (type=='split'){
return(list(Mean.by.Study=ss.mat,Nstudies=Nstudies,ValidityMessage=ValidityMessage.mat))
}
if (type=="combine") {
return(list(Global.Mean=ss.mat.combined,Nstudies=Nstudies,ValidityMessage=ValidityMessage.mat))
}
if (type=="both") {
return(list(Mean.by.Study=ss.mat,Global.Mean=ss.mat.combined,Nstudies=Nstudies,ValidityMessage=ValidityMessage.mat))
}
}
#ds.mean