From 8d6484e66b00bf291395ac87d08c3735d23f07ce Mon Sep 17 00:00:00 2001 From: cpauvert Date: Mon, 4 Dec 2023 16:52:48 +0100 Subject: [PATCH] add default spectra naming in the `process_spectra()` this fixes #37 by evaluating the uniqueness of names before assigning and giving clues on how to make them unique Further tests to downstream functions as `get_spectra_names()` does not allow empty peaks --- R/get_spectra_names.R | 18 +++++- R/process_spectra.R | 19 ++++--- dev/dereplicate-spectra.Rmd | 57 +++++++++++++------ dev/flat_utils.Rmd | 24 +++++++- man/process_spectra.Rd | 8 ++- tests/testthat/test-get_spectra_names.R | 6 ++ tests/testthat/test-merge_processed_spectra.R | 14 ++--- tests/testthat/test-process_spectra.R | 24 +++++++- 8 files changed, 131 insertions(+), 39 deletions(-) diff --git a/R/get_spectra_names.R b/R/get_spectra_names.R index 91157ee..f3a14be 100644 --- a/R/get_spectra_names.R +++ b/R/get_spectra_names.R @@ -36,7 +36,21 @@ get_spectra_names <- function(spectra_list){ ) } - spectra_names <- lapply(spectra_list, function(spectra) { + # Need to make sure that the spectra are not empty here to avoid + # a tibble issue like: + # Error in `tibble::as_tibble_row()`: + # ! Columns 1, 2, and 3 must be named. + # + # Therefore, error if the spectra is empty or not + empty_spectra <- vapply(spectra_list, MALDIquant::isEmpty, FUN.VALUE = logical(1)) + + if(any(empty_spectra)){ + stop( + "Empty spectra detected! Preprocess the data accordingly using `check_spectra()`" + ) + } + + spectra_names <- lapply(spectra_list, function(spectra){ MALDIquant::metaData(spectra)[c("name", "fullName", "file")] %>% tibble::as_tibble_row() }) %>% @@ -46,7 +60,7 @@ get_spectra_names <- function(spectra_list){ ) %>% dplyr::relocate("sanitized_name") - if( nrow(spectra_names) > dplyr::n_distinct(spectra_names$sanitized_name)){ + if( nrow(spectra_names) > dplyr::n_distinct(spectra_names[["sanitized_name"]])){ warning( "Non-unique values in spectra names!", "\n\nQuickfix: use `dplyr::mutate(sanitized_name = base::make.unique(sanitized_name))`" diff --git a/R/process_spectra.R b/R/process_spectra.R index 21119eb..231339d 100644 --- a/R/process_spectra.R +++ b/R/process_spectra.R @@ -15,6 +15,7 @@ #' #' #' @param spectra_list A list of [MALDIquant::MassSpectrum] objects. +#' @param spectra_names A [tibble::tibble] (or [data.frame]) of sanitized spectra names by default from [get_spectra_names]. If provided manually, the column `sanitized_name` will be used to name the spectra. #' @param rds_prefix A character indicating the prefix for the `.RDS` output files to be written in the `processed` directory. By default, no prefix are given and thus no files are written. #' #' @return A named list of three objects: @@ -45,7 +46,9 @@ #' # A detailed view of the metadata with the median signal-to-noise #' # ratio (SNR) and the number of peaks #' processed$metadata -process_spectra <- function(spectra_list, rds_prefix = NULL) { +process_spectra <- function(spectra_list, + spectra_names = get_spectra_names(spectra_list), + rds_prefix = NULL) { # It returns the list and write it for future processing as an RDS file. # 1. SQRT transformation @@ -81,15 +84,15 @@ process_spectra <- function(spectra_list, rds_prefix = NULL) { "peaks" = lengths(snr_list) ) - # Add the spectra identifiers to all objects - rownames(metadata) <- names(spectra) <- names(peaks) <- sapply(spectra, function(x) { - # e.g., 230117_1750_1_B1 - gsub( - "[-\\.]", "_", - MALDIquant::metaData(x)[["fullName"]] + if(! "sanitized_name" %in% colnames(spectra_names)){ + stop( + "Missing 'sanitized_name' column in the provided 'spectra_names' tibble!", + "\n\nTip: Use the `get_spectra_names()` for default and compliant names." ) - }) + } + rownames(metadata) <- names(spectra) <- names(peaks) <- spectra_names[["sanitized_name"]] + # Aggregate the objects to a list processed_list <- list( "spectra" = spectra, diff --git a/dev/dereplicate-spectra.Rmd b/dev/dereplicate-spectra.Rmd index 272c55d..2d80ec5 100644 --- a/dev/dereplicate-spectra.Rmd +++ b/dev/dereplicate-spectra.Rmd @@ -56,6 +56,7 @@ The full procedure is illustrated in the example below. While in this case, all #' #' #' @param spectra_list A list of [MALDIquant::MassSpectrum] objects. +#' @param spectra_names A [tibble::tibble] (or [data.frame]) of sanitized spectra names by default from [get_spectra_names]. If provided manually, the column `sanitized_name` will be used to name the spectra. #' @param rds_prefix A character indicating the prefix for the `.RDS` output files to be written in the `processed` directory. By default, no prefix are given and thus no files are written. #' #' @return A named list of three objects: @@ -70,7 +71,9 @@ The full procedure is illustrated in the example below. While in this case, all #' #' @note The original R code on which this function is based is accessible at: #' @examples -process_spectra <- function(spectra_list, rds_prefix = NULL) { +process_spectra <- function(spectra_list, + spectra_names = get_spectra_names(spectra_list), + rds_prefix = NULL) { # It returns the list and write it for future processing as an RDS file. # 1. SQRT transformation @@ -106,15 +109,15 @@ process_spectra <- function(spectra_list, rds_prefix = NULL) { "peaks" = lengths(snr_list) ) - # Add the spectra identifiers to all objects - rownames(metadata) <- names(spectra) <- names(peaks) <- sapply(spectra, function(x) { - # e.g., 230117_1750_1_B1 - gsub( - "[-\\.]", "_", - MALDIquant::metaData(x)[["fullName"]] + if(! "sanitized_name" %in% colnames(spectra_names)){ + stop( + "Missing 'sanitized_name' column in the provided 'spectra_names' tibble!", + "\n\nTip: Use the `get_spectra_names()` for default and compliant names." ) - }) + } + rownames(metadata) <- names(spectra) <- names(peaks) <- spectra_names[["sanitized_name"]] + # Aggregate the objects to a list processed_list <- list( "spectra" = spectra, @@ -173,12 +176,32 @@ test_that("process_spectra works", { ) ) }) -test_that("process_spectra warns on empty spectra", { - expect_warning( +test_that("process_spectra with automatic names fails on empty spectra with maldipickr functions", { + expect_error( process_spectra(c(MALDIquant::createMassSpectrum(0, 0))), + "Empty spectra detected!" + ) +}) +test_that("process_spectra with manual names warns on empty spectra with MALDIquant functions", { + expect_warning( + process_spectra( + c(MALDIquant::createMassSpectrum(0, 0)), + spectra_names = tibble::tibble(sanitized_name = "Dummy_name") + ), "MassSpectrum object is empty" ) }) +test_that("process_spectra with manual names fails if wrong column", { + expect_error( + process_spectra( + spectra_list_test, + spectra_names = tibble::tibble( + sanitized_wrong_column = c("spectra1","spectra2") + ) + ), + "Missing 'sanitized_name' column" + ) +}) ``` ## Merge multiple processed spectra @@ -354,13 +377,13 @@ test_that("merge_processed_spectra fails with the wrong input", { test_that("merge_processed_spectra fails with only empty peaks", { expect_warning( - empty_peaks <- list( - createMassSpectrum( - mass = 4500:5000, - intensity = rep(0, 501), - metaData = list(fullName = "foo") - ) - ) %>% process_spectra(), + empty_peaks <- list( + createMassSpectrum( + mass = 4500:5000, + intensity = rep(0, 501), + metaData = list(fullName = "foo") + ) + ) %>% process_spectra(spectra_names = tibble::tibble(sanitized_name = "foo")), "MassSpectrum object is empty!" ) expect_warning( diff --git a/dev/flat_utils.Rmd b/dev/flat_utils.Rmd index 2c2f3aa..0eb8e8d 100644 --- a/dev/flat_utils.Rmd +++ b/dev/flat_utils.Rmd @@ -361,7 +361,21 @@ get_spectra_names <- function(spectra_list){ ) } - spectra_names <- lapply(spectra_list, function(spectra) { + # Need to make sure that the spectra are not empty here to avoid + # a tibble issue like: + # Error in `tibble::as_tibble_row()`: + # ! Columns 1, 2, and 3 must be named. + # + # Therefore, error if the spectra is empty or not + empty_spectra <- vapply(spectra_list, MALDIquant::isEmpty, FUN.VALUE = logical(1)) + + if(any(empty_spectra)){ + stop( + "Empty spectra detected! Preprocess the data accordingly using `check_spectra()`" + ) + } + + spectra_names <- lapply(spectra_list, function(spectra){ MALDIquant::metaData(spectra)[c("name", "fullName", "file")] %>% tibble::as_tibble_row() }) %>% @@ -371,7 +385,7 @@ get_spectra_names <- function(spectra_list){ ) %>% dplyr::relocate("sanitized_name") - if( nrow(spectra_names) > dplyr::n_distinct(spectra_names$sanitized_name)){ + if( nrow(spectra_names) > dplyr::n_distinct(spectra_names[["sanitized_name"]])){ warning( "Non-unique values in spectra names!", "\n\nQuickfix: use `dplyr::mutate(sanitized_name = base::make.unique(sanitized_name))`" @@ -422,6 +436,12 @@ test_that("get_spectra_names warns when duplicate", { out$sanitized_name, c("species1_G2", "species1_G2") ) }) +test_that("get_spectra_names fails on empty spectra", { + expect_error( + get_spectra_names(c(MALDIquant::createMassSpectrum(0, 0))), + "Empty spectra detected!" + ) +}) ``` diff --git a/man/process_spectra.Rd b/man/process_spectra.Rd index 162dde0..e018d25 100644 --- a/man/process_spectra.Rd +++ b/man/process_spectra.Rd @@ -4,11 +4,17 @@ \alias{process_spectra} \title{Process Bruker MALDI Biotyper spectra \emph{à la} Strejcek et al. (2018)} \usage{ -process_spectra(spectra_list, rds_prefix = NULL) +process_spectra( + spectra_list, + spectra_names = get_spectra_names(spectra_list), + rds_prefix = NULL +) } \arguments{ \item{spectra_list}{A list of \link[MALDIquant:MassSpectrum-class]{MALDIquant::MassSpectrum} objects.} +\item{spectra_names}{A \link[tibble:tibble]{tibble::tibble} (or \link{data.frame}) of sanitized spectra names by default from \link{get_spectra_names}. If provided manually, the column \code{sanitized_name} will be used to name the spectra.} + \item{rds_prefix}{A character indicating the prefix for the \code{.RDS} output files to be written in the \code{processed} directory. By default, no prefix are given and thus no files are written.} } \value{ diff --git a/tests/testthat/test-get_spectra_names.R b/tests/testthat/test-get_spectra_names.R index 855ce4a..5811157 100644 --- a/tests/testthat/test-get_spectra_names.R +++ b/tests/testthat/test-get_spectra_names.R @@ -25,3 +25,9 @@ test_that("get_spectra_names warns when duplicate", { out$sanitized_name, c("species1_G2", "species1_G2") ) }) +test_that("get_spectra_names fails on empty spectra", { + expect_error( + get_spectra_names(c(MALDIquant::createMassSpectrum(0, 0))), + "Empty spectra detected!" + ) +}) diff --git a/tests/testthat/test-merge_processed_spectra.R b/tests/testthat/test-merge_processed_spectra.R index f13c0c6..d9e9817 100644 --- a/tests/testthat/test-merge_processed_spectra.R +++ b/tests/testthat/test-merge_processed_spectra.R @@ -41,13 +41,13 @@ test_that("merge_processed_spectra fails with the wrong input", { test_that("merge_processed_spectra fails with only empty peaks", { expect_warning( - empty_peaks <- list( - createMassSpectrum( - mass = 4500:5000, - intensity = rep(0, 501), - metaData = list(fullName = "foo") - ) - ) %>% process_spectra(), + empty_peaks <- list( + createMassSpectrum( + mass = 4500:5000, + intensity = rep(0, 501), + metaData = list(fullName = "foo") + ) + ) %>% process_spectra(spectra_names = tibble::tibble(sanitized_name = "foo")), "MassSpectrum object is empty!" ) expect_warning( diff --git a/tests/testthat/test-process_spectra.R b/tests/testthat/test-process_spectra.R index ebeeedd..15fd69e 100644 --- a/tests/testthat/test-process_spectra.R +++ b/tests/testthat/test-process_spectra.R @@ -16,9 +16,29 @@ test_that("process_spectra works", { ) ) }) -test_that("process_spectra warns on empty spectra", { - expect_warning( +test_that("process_spectra with automatic names fails on empty spectra with maldipickr functions", { + expect_error( process_spectra(c(MALDIquant::createMassSpectrum(0, 0))), + "Empty spectra detected!" + ) +}) +test_that("process_spectra with manual names warns on empty spectra with MALDIquant functions", { + expect_warning( + process_spectra( + c(MALDIquant::createMassSpectrum(0, 0)), + spectra_names = tibble::tibble(sanitized_name = "Dummy_name") + ), "MassSpectrum object is empty" ) }) +test_that("process_spectra with manual names fails if wrong column", { + expect_error( + process_spectra( + spectra_list_test, + spectra_names = tibble::tibble( + sanitized_wrong_column = c("spectra1","spectra2") + ) + ), + "Missing 'sanitized_name' column" + ) +})