/
dataFrameDS.R
103 lines (91 loc) · 4.8 KB
/
dataFrameDS.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#' @title dataFrameDS called by ds.dataFrame
#' @description The serverside function that creates a data frame from
#' its elemental components. That is: pre-existing data frames;
#' single variables; and/or matrices
#' @details A data frame is a list of variables all with the same number of rows with unique row
#' names, which is of class 'data.frame'. ds.dataFrame will create a data frame by combining
#' a series of elemental components which may be pre-existing data.frames, matrices or variables.
#' A critical requirement is that the length of all component variables, and the
#' number of rows of the component data.frames or matrices must all be the same. The output
#' data.frame will then have this same number of rows. The serverside
#' function dataFrameDS() calls the native R function data.frame()
#' and several of its arguments are precisely the same as for data.frame().
#' In consequence, additional information can be sought from the help()
#' for data.frame().
#' @param vectors a list which contains the elemental components to combine.
#' These correspond to the vector of character strings specified in argument x
#' of the clientside function ds.dataFrame()
#' @param r.names NULL or a character vector specifying the names of the rows. Default NULL.
#' @param ch.rows logical, if TRUE then the rows are checked for consistency of length and names.
#' Default FALSE.
#' @param ch.names logical, if TRUE then the names of the variables in the data frame
#' are checked to ensure that they are syntactically valid variable names and are not duplicated.
#' Default TRUE. In fact, the clientside function ensures no duplicated names can
#' be presented to dataFrameDS
#' but this argument is kept to check for other forms of syntactic validity.
#' @param clnames a list of characters, the column names of the output data frame.
#' These are generated by the clientside function from the names of vectors, and
#' the column names of data.frames and matrices being combined in producing
#' the output data.frame
#' @param strAsFactors logical, if TRUE determines whether character vectors should automatically be
#' converted to factors? Default TRUE.
#' @param completeCases logical. If TRUE indicates that only complete cases should be
#' included: any rows with missing values in any component will be excluded. Default FALSE.
#' @return a dataframe composed of the specified elemental components will be created on the
#' serverside and named according to the <newobj> argument of the clientside
#' function ds.dataFrame()
#' @author DataSHIELD Development Team
#' @export
#'
dataFrameDS <- function(vectors=NULL, r.names=NULL, ch.rows=FALSE, ch.names=TRUE, clnames=NULL, strAsFactors=TRUE, completeCases=FALSE){
#########################################################################
# DataSHIELD MODULE: CAPTURE THE nfilter SETTINGS
thr <- listDisclosureSettingsDS()
#nfilter.tab<-as.numeric(thr$nfilter.tab)
#nfilter.glm<-as.numeric(thr$nfilter.glm)
nfilter.subset <- as.numeric(thr$nfilter.subset)
#nfilter.string<-as.numeric(thr$nfilter.string)
#nfilter.stringShort<-as.numeric(thr$nfilter.stringShort)
#nfilter.kNN<-as.numeric(thr$nfilter.kNN)
#datashield.privacyLevel<-as.numeric(thr$datashield.privacyLevel)
#########################################################################
if(strAsFactors){
strAsFactors <- default.stringsAsFactors()
}
if(!(is.null(r.names))){
r.names <- unlist(r.names)
}
eval.code.vectors.names <- paste0("data.frame(", vectors, ")")
dtemp0 <- eval(parse(text=eval.code.vectors.names), envir = parent.frame())
dtemp <- data.frame(dtemp0, row.names=r.names, check.rows=ch.rows, check.names=ch.names,
stringsAsFactors=strAsFactors)
colnames.act1 <- unlist(strsplit(clnames, split=","))
# Detects which column names (if any) have the '$' in their string and detach
# the '$' sign and any characters before that
detect.idx <- grep('[$]', colnames.act1)
if(length(detect.idx) > 0){
detach.names <- strsplit(colnames.act1[detect.idx], "\\$", perl=TRUE)
for(i in 1:length(detach.names)){
detach.names[i] <- detach.names[[i]][2]
}
colnames.act1[detect.idx] <- detach.names
}
# Check if any column names are duplicated and add a suffix ".k" to the kth replicate
colnames.act1 <- make.names(colnames.act1, unique=TRUE)
colnames(dtemp) <- colnames.act1
# remove any rows with missing values if completeCases is TRUE
if(completeCases){
dt <- dtemp[stats::complete.cases(dtemp),]
}else{
dt <- dtemp
}
# check if the resulting dataframe is of valid length and output accordingly
if(dim(dt)[1] < nfilter.subset){
dt[] <- NA
studysideMessage <- "nfilter.trap: dataframe has less than nfilter.subset rows"
return(list(studysideMessage=studysideMessage))
}
return(dt)
}
# ASSIGN FUNCTION
# dataFrameDS