Skip to content

Commit

Permalink
Remove sysdata.rda
Browse files Browse the repository at this point in the history
- removing the internal sysdata.rda object
  and replacing those functions with actual `R/*.R` files
- there is no need to keep these internal functions hidden
- newly exported functions:
  - `addAttributes()`
  - `addClass()`
  - `cleanNames()`
  - `getAdatVersion()`
- new internal functions:
  - `convertColMeta()`
  - `genRowNames()`
  - `syncColMeta()`
  - `parseCheck()`
  - `scaleAnalytes()`
- `Makefile` no longer has recipe for generating sysdata.rda
- minor tweaks to some unit tests during porting over from
  internal code base
- fixes #59
  • Loading branch information
stufield committed Sep 8, 2023
1 parent df09a9c commit 647e65a
Show file tree
Hide file tree
Showing 20 changed files with 787 additions and 36 deletions.
18 changes: 1 addition & 17 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ SYSFILES := $(addprefix $(DIR)/R/, \
syncColMeta.R)

all: check clean
update: sync sysdata objects
update: sync objects

roxygen:
@ $(RSCRIPT) -e "roxygen2::roxygenise()"
Expand Down Expand Up @@ -75,22 +75,6 @@ objects:
@ echo "Saving objects to 'data/*.rda' ..."
@ $(RM) example_data.adat

# necessary to decouple the function from the namespace
# avoids loading of source package when 'sysdata.rda' is loaded
# a bit hacky and could probably be improved (sgf)
sysdata:
@ echo "Creating 'R/sysdata.rda' ..."
@ git clone --depth=1 ssh://git@bitbucket.sladmin.com:7999/sv/somareadr.git $(DIR)
@ git archive --format=tar --remote=ssh://git@bitbucket.sladmin.com:7999/sv/somaplyr \
master R/scaleAnalytes.R | tar -xf - -C $(DIR)
@ $(RSCRIPT) \
-e "files <- commandArgs(TRUE)" \
-e ".__IO__env <- new.env()" \
-e "invisible(lapply(files, sys.source, envir = .__IO__env, keep.source = TRUE))" \
-e "save(.__IO__env, file = 'R/sysdata.rda')" $(SYSFILES)
@ $(RM) $(DIR)
@ echo "Saving 'R/sysdata.rda' ..."

check_versions:
@ $(RSCRIPT) inst/check-pkg-versions.R

Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,21 @@ S3method(ungroup,soma_adat)
S3method(unite,soma_adat)
export("%>%")
export(adat2eSet)
export(addAttributes)
export(addClass)
export(add_rowid)
export(anti_join)
export(antilog)
export(apt2seqid)
export(arrange)
export(cleanNames)
export(col2rn)
export(collapseAdats)
export(count)
export(diffAdats)
export(filter)
export(full_join)
export(getAdatVersion)
export(getAnalyteInfo)
export(getAnalytes)
export(getFeatureData)
Expand Down
33 changes: 33 additions & 0 deletions R/addAttributes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#' Add Attributes to `soma_adat` Objects
#'
#' Adds a set of attributes, typically "Header.Meta" and "Col.Meta",
#' to a `data.frame`, `tibble`, `soma_adat` or similar tabular object.
#' Existing attributes `data` are _not_ over-written.
#' Typically untouched are:
#' \itemize{
#' \item `names`
#' \item `class`
#' \item `row.names`
#' }
#'
#' @param data The _receiving_ `data.frame` object for new attributes.
#' @param new.atts A _named_ `list` object containing new attributes
#' to add to the existing ones.
#' @return A data frame object corresponding to `data` but with the
#' attributes of `new.atts` grafted on to it.
#' Existing attribute names are _not_ over-written.
#' @author Stu Field
#' @seealso [attr()], [setdiff()]
#' @export
addAttributes <- function(data, new.atts) {
stopifnot(
"`data` must be a data frame, tibble, or similar." = inherits(data, "data.frame"),
"`new.atts` must be a *named* list." = inherits(new.atts, "list"),
"`new.atts` must be a *named* list." = !is.null(names(new.atts))
)
attrs <- setdiff(names(new.atts), names(attributes(data)))
for ( i in attrs ) {
attr(data, i) <- new.atts[[i]]
}
data
}
30 changes: 30 additions & 0 deletions R/addClass.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#' Add a Class to an Object
#'
#' Utility to add (prepend) a class(es) to existing objects.
#'
#' @param x The object to receive new class(es).
#' @param class Character. The name of additional class(es).
#' @return An object with new classes.
#' @author Stu Field
#' @seealso [class()], [typeof()], [structure()]
#' @examples
#' class(iris)
#'
#' addClass(iris, "new") |> class()
#'
#' addClass(iris, c("A", "B")) |> class() # 2 classes
#'
#' addClass(iris, c("A", "data.frame")) |> class() # no duplicates
#'
#' addClass(iris, c("data.frame", "A")) |> class() # re-orders if exists
#' @export
addClass <- function(x, class) {
if ( is.null(class) ) {
warning("Passing `class = NULL` leaves class(x) unchanged.", call. = FALSE)
}
if ( any(is.na(class)) ) {
stop("The `class` param cannot contain `NA`: ", .value(class), call. = FALSE)
}
new_class <- union(class, class(x))
structure(x, class = new_class)
}
34 changes: 34 additions & 0 deletions R/cleanNames.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#' Clean Up Character String
#'
#' Often the names, particularly within `soma_adat` objects,
#' are messy due to varying inputs, this function attempts to remedy this by
#' removing the following:
#' \itemize{
#' \item trailing/leading/internal whitespace
#' \item non-alphanumeric strings (except underscores)
#' \item duplicated internal dots (`..`), (`...`), etc.
#' \item SomaScan normalization scale factor format
#' }
#'
#' @param x Character. String to clean up.
#' @return A cleaned up character string.
#' @seealso [trimws()], [gsub()], [sub()]
#' @author Stu Field
#' @examples
#' cleanNames(" sdkfj...sdlkfj.sdfii4994### ")
#'
#' cleanNames("Hyb..Scale")
#' @export
cleanNames <- function(x) {
y <- squish(x) # zap leading/trailing/internal whitespace
y <- gsub("[^A-Za-z0-9_]", ".", y) # zap non-alphanum (keep '_')
y <- gsub("\\.+", ".", y) # zap multiple dots
y <- gsub("^\\.|\\.$", "", y) # zap leading/trailing dots
y <- sub("^Hyb[.]Scale", "HybControlNormScale", y)
sub("^Med[.]Scale", "NormScale", y)
}

squish <- function(x) {
# zap leading/trailing whitespace & extra internal whitespace
gsub("[[:space:]]+", " ", trimws(x))
}
35 changes: 35 additions & 0 deletions R/convertColMeta.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#' Create Annotations Table
#'
#' Uses the column meta data (Col.Meta; feature data that appears above
#' the protein measurements in the adat file)
#' and compiles them into a grouped tibble lookup table
#' for simple manipulation and indexing.
#'
#' @param x The "Col.Meta" element from an adat attributes.
#' @return A `tibble` object with columns corresponding
#' to the column meta data entries in the adat.
#' @author Stu Field
#' @importFrom tibble as_tibble
#' @noRd
convertColMeta <- function(x) {
# conversion fails if un-equal length columns
tbl <- setNames(as_tibble(x), cleanNames(names(x)))

if ( !is.null(tbl$Dilution) ) {
tbl$Dilution2 <- as.numeric(gsub("%$|Mix ", "", tbl$Dilution)) / 100
}

convert_lgl <- function(.x) {
w <- tryCatch(as.numeric(.x), warning = function(w) w)
is_warn <- inherits(w, "simpleWarning")
# NA warning tripped?
na_warn <- is_warn && identical(w$message, "NAs introduced by coercion")
num_ok <- !na_warn
num_ok && !inherits(.x, c("factor", "integer")) # don't touch factors/integers
}
idx <- which(vapply(tbl, convert_lgl, NA))
for ( i in idx ) tbl[[i]] <- as.numeric(tbl[[i]])
tbl$Dilution <- as.character(tbl$Dilution) # keep character
tbl$SeqId <- getSeqId(tbl$SeqId, TRUE) # rm versions; safety
tbl
}
39 changes: 39 additions & 0 deletions R/genRowNames.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#' Generates row names for a `soma_adat` object
#'
#' @param adat A data frame representing an ADAT.
#' @return A vector of row names derived from `SlideId_Subarray`.
#' @noRd
genRowNames <- function(adat) {

checkDups <- function(x) any(duplicated(x)) # internal

if ( all(c("Subarray", "SlideId") %in% names(adat)) ) {

adat_rn <- paste0(adat$SlideId, "_", adat$Subarray)

# Added for datasets with same slide_id sub-scanned with different software
# nocov start
if ( checkDups(adat_rn) ) {
if ( "PlateId" %in% names(adat) ) {
adat_rn <- paste0(adat$PlateId, "_", adat_rn)
} else if ( "DatasetId" %in% names(adat) ) {
adat_rn <- paste0(adat$DatasetId, "_", adat_rn)
}
if ( checkDups(adat_rn) ) {
warning(
"Found duplicate row names, i.e. `SlideId_Subarray` non-unique. ",
"They will be numbered sequentially.", call. = FALSE
)
adat_rn <- seq_len(nrow(adat))
}
}
# nocov end
} else {
warning(
"No SlideId_Subarray found in ADAT. ",
"Rows numbered sequentially.", call. = FALSE
)
adat_rn <- as.character(seq_len(nrow(adat)))
}
adat_rn
}
43 changes: 43 additions & 0 deletions R/getAdatVersion.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' Get ADAT Version
#'
#' Get the ADAT version number from a parsed ADAT header.
#'
#' @param x The "Header.Meta" list of an ADAT object attributes.
#' @return Returns the key-value of the ADAT version as a string.
#' @author Stu Field
#' @examples
#' header <- attributes(example_data)$Header.Meta
#' getAdatVersion(header)
#'
#' header$HEADER$Version <- "1.0"
#' getAdatVersion(header)
#' @export
getAdatVersion <- function(x) {

vidx <- grep("^Version$|^AdatVersion$", names(x$HEADER))

if ( length(vidx) == 0L ) {
stop(
"Unable to identify ADAT Version from Header information. ",
"Please check 'Header.Meta'.", call. = FALSE
)
}

version <- x$HEADER[[vidx]]

if ( length(version) > 1L ) {
warning(
"Version length > 1 ... there may be empty tabs in ",
"the header block above the data matrix.", call. = FALSE
)
version <- version[1L]
}

if ( identical(version, "1.01") ) {
stop(
"Invalid Version (", .value("1.01"), "). Please modify to `1.0.1`.",
call. = FALSE
)
}
version
}

0 comments on commit 647e65a

Please sign in to comment.