-
Notifications
You must be signed in to change notification settings - Fork 23
/
ds.quantileMean.R
134 lines (124 loc) · 5.07 KB
/
ds.quantileMean.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#'
#' @title Computes the quantiles of a server-side variable
#' @description This function calculates the mean and quantile values of a
#' server-side quantitative variable.
#' @details This function does not return the minimum and maximum values
#' because they are potentially disclosive.
#'
#' Depending on the argument \code{type} can be carried out two types of analysis: \cr
#' (1) \code{type = 'combine'} pooled values are displayed \cr
#' (2) \code{type = 'split'} summaries are
#' returned for each study.
#'
#' Server functions called: \code{quantileMeanDS}, \code{length} and \code{numNaDS}
#' @param x a character string specifying the name of the numeric vector.
#' @param type a character that represents the type of graph to display.
#' This can be set as \code{'combine'} or \code{'split'}.
#' For more information see \strong{Details}.
#' @param datasources a list of \code{\link{DSConnection-class}}
#' objects obtained after login. If the \code{datasources} argument is not specified
#' the default set of connections will be used: see \code{\link{datashield.connections_default}}.
#' @return \code{ds.quantileMean} returns to the client-side the quantiles and statistical mean
#' of a server-side numeric vector.
#' @author DataSHIELD Development Team
#' @seealso \code{\link{ds.mean}} to compute the statistical mean.
#' @seealso \code{\link{ds.summary}} to generate the summary of a variable.
#' @export
#' @examples
#' \dontrun{
#'
#' ## Version 6, for version 5 see the Wiki
#'
#' # connecting to the Opal servers
#'
#' require('DSI')
#' require('DSOpal')
#' require('dsBaseClient')
#'
#' builder <- DSI::newDSLoginBuilder()
#' builder$append(server = "study1",
#' url = "http://192.168.56.100:8080/",
#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM1", driver = "OpalDriver")
#' builder$append(server = "study2",
#' url = "http://192.168.56.100:8080/",
#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM2", driver = "OpalDriver")
#' builder$append(server = "study3",
#' url = "http://192.168.56.100:8080/",
#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM3", driver = "OpalDriver")
#' logindata <- builder$build()
#'
#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
#'
#' #Get the quantiles and mean of a server-side variable
#'
#' ds.quantileMean(x = "D$LAB_TRIG",
#' type = "combine",
#' datasources = connections)
#'
#'
#' # clear the Datashield R sessions and logout
#' datashield.logout(connections)
#'
#'
#' }
#'
ds.quantileMean <- function(x=NULL, type='combine', datasources=NULL){
# look for DS connections
if(is.null(datasources)){
datasources <- datashield.connections_find()
}
if(is.null(x)){
stop("Please provide the name of the input vector!", call.=FALSE)
}
# the input variable might be given as column table (i.e. D$x)
# or just as a vector not attached to a table (i.e. x)
# we have to make sure the function deals with each case
xnames <- extract(x)
varname <- xnames$elements
obj2lookfor <- xnames$holders
# check if the input object(s) is(are) defined in all the studies
if(is.na(obj2lookfor)){
defined <- isDefined(datasources, varname)
}else{
defined <- isDefined(datasources, obj2lookfor)
}
# call the internal function that checks the input object is of the same class in all studies.
typ <- checkClass(datasources, x)
# the input object must be a numeric or an integer vector
if(!('integer' %in% typ) & !('numeric' %in% typ)){
message(paste0(x, " is of type ", typ, "!"))
stop("The input object must be an integer or numeric vector.", call.=FALSE)
}
# get the server function that produces the quantiles
cally1 <- paste0('quantileMeanDS(', x, ')')
quants <- DSI::datashield.aggregate(datasources, as.symbol(cally1))
# combine the vector of quantiles - using weighted sum
cally2 <- call('lengthDS', x)
lengths <- DSI::datashield.aggregate(datasources, cally2)
cally3 <- paste0("numNaDS(", x, ")")
numNAs <- DSI::datashield.aggregate(datasources, cally3)
global.quantiles <- rep(0, length(quants[[1]])-1)
global.mean <- 0
for(i in 1: length(datasources)){
vect <- quants[[i]][1:7] * (lengths[[i]]-numNAs[[i]])
global.quantiles <- global.quantiles + vect
global.mean <- global.mean + quants[[i]][8] * (lengths[[i]]-numNAs[[i]])
}
global.mean <- global.mean/(sum(unlist(lengths))-sum(unlist(numNAs)))
global.quantiles <- global.quantiles/(sum(unlist(lengths))-sum(unlist(numNAs)))
output <- c(global.quantiles, global.mean)
names(output) <- c("5%","10%","25%","50%","75%","90%","95%","Mean")
if(type=="combine"){
message(" Quantiles of the pooled data")
return(output)
}else{
if(type=="split"){
return(quants)
}else{
stop('Function argument "type" has to be either "combine" or "split"')
}
}
}