Remove sysdata.rda

- removing the internal sysdata.rda object and replacing those functions with actual `R/*.R` files - there is no need to keep these internal functions hidden - newly exported functions: - `addAttributes()` - `addClass()` - `cleanNames()` - `getAdatVersion()` - new internal functions: - `convertColMeta()` - `genRowNames()` - `syncColMeta()` - `parseCheck()` - `scaleAnalytes()` - `Makefile` no longer has recipe for generating sysdata.rda - minor tweaks to some unit tests during porting over from internal code base - fixes #59
SomaLogic · Sep 8, 2023 · 647e65a · 647e65a
1 parent df09a9c
commit 647e65a
Show file tree

Hide file tree

Showing 20 changed files with 787 additions and 36 deletions.
diff --git a/Makefile b/Makefile
@@ -23,7 +23,7 @@ SYSFILES := $(addprefix $(DIR)/R/, \
 	syncColMeta.R)
 
 all: check clean
-update: sync sysdata objects
+update: sync objects
 
 roxygen:
 	@ $(RSCRIPT) -e "roxygen2::roxygenise()"
@@ -75,22 +75,6 @@ objects:
 	@ echo "Saving objects to 'data/*.rda' ..."
 	@ $(RM) example_data.adat
 
-# necessary to decouple the function from the namespace
-# avoids loading of source package when 'sysdata.rda' is loaded
-# a bit hacky and could probably be improved (sgf)
-sysdata:
-	@ echo "Creating 'R/sysdata.rda' ..."
-	@ git clone --depth=1 ssh://git@bitbucket.sladmin.com:7999/sv/somareadr.git $(DIR)
-	@ git archive --format=tar --remote=ssh://git@bitbucket.sladmin.com:7999/sv/somaplyr \
-		master R/scaleAnalytes.R | tar -xf - -C $(DIR)
-	@ $(RSCRIPT) \
-	-e "files <- commandArgs(TRUE)" \
-	-e ".__IO__env <- new.env()" \
-	-e "invisible(lapply(files, sys.source, envir = .__IO__env, keep.source = TRUE))" \
-	-e "save(.__IO__env, file = 'R/sysdata.rda')" $(SYSFILES)
-	@ $(RM) $(DIR)
-	@ echo "Saving 'R/sysdata.rda' ..."
-
 check_versions:
 	@ $(RSCRIPT) inst/check-pkg-versions.R
 

diff --git a/NAMESPACE b/NAMESPACE
@@ -57,17 +57,21 @@ S3method(ungroup,soma_adat)
 S3method(unite,soma_adat)
 export("%>%")
 export(adat2eSet)
+export(addAttributes)
+export(addClass)
 export(add_rowid)
 export(anti_join)
 export(antilog)
 export(apt2seqid)
 export(arrange)
+export(cleanNames)
 export(col2rn)
 export(collapseAdats)
 export(count)
 export(diffAdats)
 export(filter)
 export(full_join)
+export(getAdatVersion)
 export(getAnalyteInfo)
 export(getAnalytes)
 export(getFeatureData)

diff --git a/R/addAttributes.R b/R/addAttributes.R
@@ -0,0 +1,33 @@
+#' Add Attributes to `soma_adat` Objects
+#'
+#' Adds a set of attributes, typically "Header.Meta" and "Col.Meta",
+#' to a `data.frame`, `tibble`, `soma_adat` or similar tabular object.
+#' Existing attributes `data` are _not_ over-written.
+#' Typically untouched are:
+#'   \itemize{
+#'     \item `names`
+#'     \item `class`
+#'     \item `row.names`
+#'   }
+#'
+#' @param data The _receiving_ `data.frame` object for new attributes.
+#' @param new.atts A _named_ `list` object containing new attributes
+#'   to add to the existing ones.
+#' @return A data frame object corresponding to `data` but with the
+#'   attributes of `new.atts` grafted on to it.
+#'   Existing attribute names are _not_ over-written.
+#' @author Stu Field
+#' @seealso [attr()], [setdiff()]
+#' @export
+addAttributes <- function(data, new.atts) {
+  stopifnot(
+    "`data` must be a data frame, tibble, or similar." = inherits(data, "data.frame"),
+    "`new.atts` must be a *named* list." = inherits(new.atts, "list"),
+    "`new.atts` must be a *named* list." = !is.null(names(new.atts))
+  )
+  attrs <- setdiff(names(new.atts), names(attributes(data)))
+  for ( i in attrs ) {
+    attr(data, i) <- new.atts[[i]]
+  }
+  data
+}
diff --git a/R/addClass.R b/R/addClass.R
@@ -0,0 +1,30 @@
+#' Add a Class to an Object
+#'
+#' Utility to add (prepend) a class(es) to existing objects.
+#'
+#' @param x The object to receive new class(es).
+#' @param class Character. The name of additional class(es).
+#' @return An object with new classes.
+#' @author Stu Field
+#' @seealso [class()], [typeof()], [structure()]
+#' @examples
+#' class(iris)
+#'
+#' addClass(iris, "new") |> class()
+#'
+#' addClass(iris, c("A", "B")) |> class()    # 2 classes
+#'
+#' addClass(iris, c("A", "data.frame")) |> class()    # no duplicates
+#'
+#' addClass(iris, c("data.frame", "A")) |> class()    # re-orders if exists
+#' @export
+addClass <- function(x, class) {
+  if ( is.null(class) ) {
+    warning("Passing `class = NULL` leaves class(x) unchanged.", call. = FALSE)
+  }
+  if ( any(is.na(class)) ) {
+    stop("The `class` param cannot contain `NA`: ", .value(class), call. = FALSE)
+  }
+  new_class <- union(class, class(x))
+  structure(x, class = new_class)
+}
diff --git a/R/cleanNames.R b/R/cleanNames.R
@@ -0,0 +1,34 @@
+#' Clean Up Character String
+#'
+#' Often the names, particularly within `soma_adat` objects,
+#' are messy due to varying inputs, this function attempts to remedy this by
+#' removing the following:
+#'   \itemize{
+#'     \item trailing/leading/internal whitespace
+#'     \item non-alphanumeric strings (except underscores)
+#'     \item duplicated internal dots (`..`), (`...`), etc.
+#'     \item SomaScan normalization scale factor format
+#'   }
+#'
+#' @param x Character. String to clean up.
+#' @return A cleaned up character string.
+#' @seealso [trimws()], [gsub()], [sub()]
+#' @author Stu Field
+#' @examples
+#' cleanNames("    sdkfj...sdlkfj.sdfii4994### ")
+#'
+#' cleanNames("Hyb..Scale")
+#' @export
+cleanNames <- function(x) {
+  y <- squish(x)                        # zap leading/trailing/internal whitespace
+  y <- gsub("[^A-Za-z0-9_]", ".", y)    # zap non-alphanum (keep '_')
+  y <- gsub("\\.+", ".", y)             # zap multiple dots
+  y <- gsub("^\\.|\\.$", "", y)         # zap leading/trailing dots
+  y <- sub("^Hyb[.]Scale", "HybControlNormScale", y)
+  sub("^Med[.]Scale", "NormScale", y)
+}
+
+squish <- function(x) {
+  # zap leading/trailing whitespace & extra internal whitespace
+  gsub("[[:space:]]+", " ", trimws(x))
+}
diff --git a/R/convertColMeta.R b/R/convertColMeta.R
@@ -0,0 +1,35 @@
+#' Create Annotations Table
+#'
+#' Uses the column meta data (Col.Meta; feature data that appears above
+#' the protein measurements in the adat file)
+#' and compiles them into a grouped tibble lookup table
+#' for simple manipulation and indexing.
+#'
+#' @param x The "Col.Meta" element from an adat attributes.
+#' @return A `tibble` object with columns corresponding
+#'   to the column meta data entries in the adat.
+#' @author Stu Field
+#' @importFrom tibble as_tibble
+#' @noRd
+convertColMeta <- function(x) {
+  # conversion fails if un-equal length columns
+  tbl <- setNames(as_tibble(x), cleanNames(names(x)))
+
+  if ( !is.null(tbl$Dilution) ) {
+    tbl$Dilution2 <- as.numeric(gsub("%$|Mix ", "", tbl$Dilution)) / 100
+  }
+
+  convert_lgl <- function(.x) {
+    w <- tryCatch(as.numeric(.x), warning = function(w) w)
+    is_warn <- inherits(w, "simpleWarning")
+    # NA warning tripped?
+    na_warn <- is_warn && identical(w$message, "NAs introduced by coercion")
+    num_ok  <- !na_warn
+    num_ok && !inherits(.x, c("factor", "integer")) # don't touch factors/integers
+  }
+  idx <- which(vapply(tbl, convert_lgl, NA))
+  for ( i in idx ) tbl[[i]] <- as.numeric(tbl[[i]])
+  tbl$Dilution <- as.character(tbl$Dilution)    # keep character
+  tbl$SeqId    <- getSeqId(tbl$SeqId, TRUE)     # rm versions; safety
+  tbl
+}
diff --git a/R/genRowNames.R b/R/genRowNames.R
@@ -0,0 +1,39 @@
+#' Generates row names for a `soma_adat` object
+#'
+#' @param adat A data frame representing an ADAT.
+#' @return A vector of row names derived from `SlideId_Subarray`.
+#' @noRd
+genRowNames <- function(adat) {
+
+  checkDups <- function(x) any(duplicated(x))   # internal
+
+  if ( all(c("Subarray", "SlideId") %in% names(adat)) ) {
+
+    adat_rn <- paste0(adat$SlideId, "_", adat$Subarray)
+
+    # Added for datasets with same slide_id sub-scanned with different software
+    # nocov start
+    if ( checkDups(adat_rn) ) {
+      if ( "PlateId" %in% names(adat) ) {
+        adat_rn <- paste0(adat$PlateId, "_", adat_rn)
+      } else if ( "DatasetId" %in% names(adat) ) {
+        adat_rn <- paste0(adat$DatasetId, "_", adat_rn)
+      }
+      if ( checkDups(adat_rn) ) {
+        warning(
+          "Found duplicate row names, i.e. `SlideId_Subarray` non-unique. ",
+          "They will be numbered sequentially.", call. = FALSE
+        )
+        adat_rn <- seq_len(nrow(adat))
+      }
+    }
+    # nocov end
+  } else {
+    warning(
+      "No SlideId_Subarray found in ADAT. ",
+      "Rows numbered sequentially.", call. = FALSE
+    )
+    adat_rn <- as.character(seq_len(nrow(adat)))
+  }
+  adat_rn
+}
diff --git a/R/getAdatVersion.R b/R/getAdatVersion.R
@@ -0,0 +1,43 @@
+#' Get ADAT Version
+#'
+#' Get the ADAT version number from a parsed ADAT header.
+#'
+#' @param x The "Header.Meta" list of an ADAT object attributes.
+#' @return Returns the key-value of the ADAT version as a string.
+#' @author Stu Field
+#' @examples
+#' header <- attributes(example_data)$Header.Meta
+#' getAdatVersion(header)
+#'
+#' header$HEADER$Version <- "1.0"
+#' getAdatVersion(header)
+#' @export
+getAdatVersion <- function(x) {
+
+  vidx <- grep("^Version$|^AdatVersion$", names(x$HEADER))
+
+  if ( length(vidx) == 0L ) {
+    stop(
+      "Unable to identify ADAT Version from Header information. ",
+      "Please check 'Header.Meta'.", call. = FALSE
+    )
+  }
+
+  version <- x$HEADER[[vidx]]
+
+  if ( length(version) > 1L ) {
+    warning(
+      "Version length > 1 ... there may be empty tabs in ",
+      "the header block above the data matrix.", call. = FALSE
+    )
+    version <- version[1L]
+  }
+
+  if ( identical(version, "1.01") ) {
+    stop(
+      "Invalid Version (", .value("1.01"), "). Please modify to `1.0.1`.",
+      call. = FALSE
+    )
+  }
+  version
+}