R/dataFrameDS.R

#' @title dataFrameDS called by ds.dataFrame
#' @description The serverside function that creates a data frame from
#' its elemental components. That is: pre-existing data frames;
#' single variables; and/or matrices
#' @details A data frame is a list of variables all with the same number of rows with unique row
#' names, which is of class 'data.frame'. ds.dataFrame will create a data frame by combining
#' a series of elemental components which may be pre-existing data.frames, matrices or variables.
#' A critical requirement is that the length of all component variables, and the
#' number of rows of the component data.frames or matrices must all be the same. The output
#' data.frame will then have this same number of rows. The serverside
#' function dataFrameDS() calls the native R function data.frame()
#' and several of its arguments are precisely the same as for data.frame().
#' In consequence, additional information can be sought from the help()
#' for data.frame().
#' @param vectors a list which contains the elemental components to combine.
#' These correspond to the vector of character strings specified in argument x
#' of the clientside function ds.dataFrame()
#' @param r.names NULL or a character vector specifying the names of the rows. Default NULL.
#' @param ch.rows logical, if TRUE then the rows are checked for consistency of length and names.
#' Default FALSE.
#' @param ch.names logical, if TRUE then the names of the variables in the data frame
#' are checked to ensure that they are syntactically valid variable names and are not duplicated.
#' Default TRUE. In fact, the clientside function ensures no duplicated names can
#' be presented to dataFrameDS
#' but this argument is kept to check for other forms of syntactic validity.
#' @param clnames a list of characters, the column names of the output data frame.
#' These are generated by the clientside function from the names of vectors, and
#' the column names of data.frames and matrices being combined in producing
#' the output data.frame
#' @param strAsFactors logical, if TRUE determines whether character vectors should automatically be
#' converted to factors? Default TRUE.
#' @param completeCases logical. If TRUE indicates that only complete cases should be
#' included: any rows with missing values in any component will be excluded. Default FALSE.
#' @return a dataframe composed of the specified elemental components will be created on the
#' serverside and named according to the <newobj> argument of the clientside
#' function ds.dataFrame()
#' @author DataSHIELD Development Team
#' @export
#'
dataFrameDS <- function(vectors=NULL, r.names=NULL, ch.rows=FALSE, ch.names=TRUE, clnames=NULL, strAsFactors=TRUE, completeCases=FALSE){

  #########################################################################
  # DataSHIELD MODULE: CAPTURE THE nfilter SETTINGS
  thr <- listDisclosureSettingsDS()
  #nfilter.tab<-as.numeric(thr$nfilter.tab)
  #nfilter.glm<-as.numeric(thr$nfilter.glm)
  nfilter.subset <- as.numeric(thr$nfilter.subset)
  #nfilter.string<-as.numeric(thr$nfilter.string)
  #nfilter.stringShort<-as.numeric(thr$nfilter.stringShort)
  #nfilter.kNN<-as.numeric(thr$nfilter.kNN)
  #datashield.privacyLevel<-as.numeric(thr$datashield.privacyLevel)
  #########################################################################

  if(strAsFactors){
    strAsFactors <- default.stringsAsFactors()
  }
  if(!(is.null(r.names))){
    r.names <- unlist(r.names)
  }
  
  eval.code.vectors.names <- paste0("data.frame(", vectors, ")")
  dtemp0 <- eval(parse(text=eval.code.vectors.names), envir = parent.frame())
  
  dtemp <- data.frame(dtemp0, row.names=r.names, check.rows=ch.rows, check.names=ch.names,
                    stringsAsFactors=strAsFactors)
  
  colnames.act1 <- unlist(strsplit(clnames, split=","))
  
  # Detects which column names (if any) have the '$' in their string and detach 
  # the '$' sign and any characters before that 
  detect.idx <- grep('[$]', colnames.act1)
  if(length(detect.idx) > 0){
    detach.names <- strsplit(colnames.act1[detect.idx], "\\$", perl=TRUE)
    for(i in 1:length(detach.names)){
      detach.names[i] <- detach.names[[i]][2]
    }
    colnames.act1[detect.idx] <- detach.names
  }
  
  # Check if any column names are duplicated and add a suffix ".k" to the kth replicate
  colnames.act1 <- make.names(colnames.act1, unique=TRUE)
  
  colnames(dtemp) <- colnames.act1
  
  # remove any rows with missing values if completeCases is TRUE
  if(completeCases){
    dt <- dtemp[stats::complete.cases(dtemp),]
  }else{
    dt <- dtemp
  }

  # check if the resulting dataframe is of valid length and output accordingly
  if(dim(dt)[1] < nfilter.subset){
    dt[] <- NA
    studysideMessage <- "nfilter.trap: dataframe has less than nfilter.subset rows"
    return(list(studysideMessage=studysideMessage))
  }

  return(dt)

}
# ASSIGN FUNCTION
# dataFrameDS