Skip to content

Commit

Permalink
add default spectra naming in the process_spectra()
Browse files Browse the repository at this point in the history
this fixes #37 by evaluating the uniqueness of names before assigning
and giving clues on how to make them unique

Further tests to downstream functions as `get_spectra_names()` does not
allow empty peaks
  • Loading branch information
cpauvert committed Dec 4, 2023
1 parent f445409 commit 8d6484e
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 39 deletions.
18 changes: 16 additions & 2 deletions R/get_spectra_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,21 @@ get_spectra_names <- function(spectra_list){
)
}

spectra_names <- lapply(spectra_list, function(spectra) {
# Need to make sure that the spectra are not empty here to avoid
# a tibble issue like:
# Error in `tibble::as_tibble_row()`:
# ! Columns 1, 2, and 3 must be named.
#
# Therefore, error if the spectra is empty or not
empty_spectra <- vapply(spectra_list, MALDIquant::isEmpty, FUN.VALUE = logical(1))

if(any(empty_spectra)){
stop(
"Empty spectra detected! Preprocess the data accordingly using `check_spectra()`"
)
}

spectra_names <- lapply(spectra_list, function(spectra){
MALDIquant::metaData(spectra)[c("name", "fullName", "file")] %>%
tibble::as_tibble_row()
}) %>%
Expand All @@ -46,7 +60,7 @@ get_spectra_names <- function(spectra_list){
) %>%
dplyr::relocate("sanitized_name")

if( nrow(spectra_names) > dplyr::n_distinct(spectra_names$sanitized_name)){
if( nrow(spectra_names) > dplyr::n_distinct(spectra_names[["sanitized_name"]])){
warning(
"Non-unique values in spectra names!",
"\n\nQuickfix: use `dplyr::mutate(sanitized_name = base::make.unique(sanitized_name))`"
Expand Down
19 changes: 11 additions & 8 deletions R/process_spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#'
#'
#' @param spectra_list A list of [MALDIquant::MassSpectrum] objects.
#' @param spectra_names A [tibble::tibble] (or [data.frame]) of sanitized spectra names by default from [get_spectra_names]. If provided manually, the column `sanitized_name` will be used to name the spectra.
#' @param rds_prefix A character indicating the prefix for the `.RDS` output files to be written in the `processed` directory. By default, no prefix are given and thus no files are written.
#'
#' @return A named list of three objects:
Expand Down Expand Up @@ -45,7 +46,9 @@
#' # A detailed view of the metadata with the median signal-to-noise
#' # ratio (SNR) and the number of peaks
#' processed$metadata
process_spectra <- function(spectra_list, rds_prefix = NULL) {
process_spectra <- function(spectra_list,
spectra_names = get_spectra_names(spectra_list),
rds_prefix = NULL) {
# It returns the list and write it for future processing as an RDS file.

# 1. SQRT transformation
Expand Down Expand Up @@ -81,15 +84,15 @@ process_spectra <- function(spectra_list, rds_prefix = NULL) {
"peaks" = lengths(snr_list)
)


# Add the spectra identifiers to all objects
rownames(metadata) <- names(spectra) <- names(peaks) <- sapply(spectra, function(x) {
# e.g., 230117_1750_1_B1
gsub(
"[-\\.]", "_",
MALDIquant::metaData(x)[["fullName"]]
if(! "sanitized_name" %in% colnames(spectra_names)){
stop(
"Missing 'sanitized_name' column in the provided 'spectra_names' tibble!",
"\n\nTip: Use the `get_spectra_names()` for default and compliant names."
)
})
}
rownames(metadata) <- names(spectra) <- names(peaks) <- spectra_names[["sanitized_name"]]

# Aggregate the objects to a list
processed_list <- list(
"spectra" = spectra,
Expand Down
57 changes: 40 additions & 17 deletions dev/dereplicate-spectra.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ The full procedure is illustrated in the example below. While in this case, all
#'
#'
#' @param spectra_list A list of [MALDIquant::MassSpectrum] objects.
#' @param spectra_names A [tibble::tibble] (or [data.frame]) of sanitized spectra names by default from [get_spectra_names]. If provided manually, the column `sanitized_name` will be used to name the spectra.
#' @param rds_prefix A character indicating the prefix for the `.RDS` output files to be written in the `processed` directory. By default, no prefix are given and thus no files are written.
#'
#' @return A named list of three objects:
Expand All @@ -70,7 +71,9 @@ The full procedure is illustrated in the example below. While in this case, all
#'
#' @note The original R code on which this function is based is accessible at: <https://github.com/strejcem/MALDIvs16S>
#' @examples
process_spectra <- function(spectra_list, rds_prefix = NULL) {
process_spectra <- function(spectra_list,
spectra_names = get_spectra_names(spectra_list),
rds_prefix = NULL) {
# It returns the list and write it for future processing as an RDS file.
# 1. SQRT transformation
Expand Down Expand Up @@ -106,15 +109,15 @@ process_spectra <- function(spectra_list, rds_prefix = NULL) {
"peaks" = lengths(snr_list)
)
# Add the spectra identifiers to all objects
rownames(metadata) <- names(spectra) <- names(peaks) <- sapply(spectra, function(x) {
# e.g., 230117_1750_1_B1
gsub(
"[-\\.]", "_",
MALDIquant::metaData(x)[["fullName"]]
if(! "sanitized_name" %in% colnames(spectra_names)){
stop(
"Missing 'sanitized_name' column in the provided 'spectra_names' tibble!",
"\n\nTip: Use the `get_spectra_names()` for default and compliant names."
)
})
}
rownames(metadata) <- names(spectra) <- names(peaks) <- spectra_names[["sanitized_name"]]
# Aggregate the objects to a list
processed_list <- list(
"spectra" = spectra,
Expand Down Expand Up @@ -173,12 +176,32 @@ test_that("process_spectra works", {
)
)
})
test_that("process_spectra warns on empty spectra", {
expect_warning(
test_that("process_spectra with automatic names fails on empty spectra with maldipickr functions", {
expect_error(
process_spectra(c(MALDIquant::createMassSpectrum(0, 0))),
"Empty spectra detected!"
)
})
test_that("process_spectra with manual names warns on empty spectra with MALDIquant functions", {
expect_warning(
process_spectra(
c(MALDIquant::createMassSpectrum(0, 0)),
spectra_names = tibble::tibble(sanitized_name = "Dummy_name")
),
"MassSpectrum object is empty"
)
})
test_that("process_spectra with manual names fails if wrong column", {
expect_error(
process_spectra(
spectra_list_test,
spectra_names = tibble::tibble(
sanitized_wrong_column = c("spectra1","spectra2")
)
),
"Missing 'sanitized_name' column"
)
})
```

## Merge multiple processed spectra
Expand Down Expand Up @@ -354,13 +377,13 @@ test_that("merge_processed_spectra fails with the wrong input", {
test_that("merge_processed_spectra fails with only empty peaks", {
expect_warning(
empty_peaks <- list(
createMassSpectrum(
mass = 4500:5000,
intensity = rep(0, 501),
metaData = list(fullName = "foo")
)
) %>% process_spectra(),
empty_peaks <- list(
createMassSpectrum(
mass = 4500:5000,
intensity = rep(0, 501),
metaData = list(fullName = "foo")
)
) %>% process_spectra(spectra_names = tibble::tibble(sanitized_name = "foo")),
"MassSpectrum object is empty!"
)
expect_warning(
Expand Down
24 changes: 22 additions & 2 deletions dev/flat_utils.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,21 @@ get_spectra_names <- function(spectra_list){
)
}
spectra_names <- lapply(spectra_list, function(spectra) {
# Need to make sure that the spectra are not empty here to avoid
# a tibble issue like:
# Error in `tibble::as_tibble_row()`:
# ! Columns 1, 2, and 3 must be named.
#
# Therefore, error if the spectra is empty or not
empty_spectra <- vapply(spectra_list, MALDIquant::isEmpty, FUN.VALUE = logical(1))
if(any(empty_spectra)){
stop(
"Empty spectra detected! Preprocess the data accordingly using `check_spectra()`"
)
}
spectra_names <- lapply(spectra_list, function(spectra){
MALDIquant::metaData(spectra)[c("name", "fullName", "file")] %>%
tibble::as_tibble_row()
}) %>%
Expand All @@ -371,7 +385,7 @@ get_spectra_names <- function(spectra_list){
) %>%
dplyr::relocate("sanitized_name")
if( nrow(spectra_names) > dplyr::n_distinct(spectra_names$sanitized_name)){
if( nrow(spectra_names) > dplyr::n_distinct(spectra_names[["sanitized_name"]])){
warning(
"Non-unique values in spectra names!",
"\n\nQuickfix: use `dplyr::mutate(sanitized_name = base::make.unique(sanitized_name))`"
Expand Down Expand Up @@ -422,6 +436,12 @@ test_that("get_spectra_names warns when duplicate", {
out$sanitized_name, c("species1_G2", "species1_G2")
)
})
test_that("get_spectra_names fails on empty spectra", {
expect_error(
get_spectra_names(c(MALDIquant::createMassSpectrum(0, 0))),
"Empty spectra detected!"
)
})
```


Expand Down
8 changes: 7 additions & 1 deletion man/process_spectra.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions tests/testthat/test-get_spectra_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,9 @@ test_that("get_spectra_names warns when duplicate", {
out$sanitized_name, c("species1_G2", "species1_G2")
)
})
test_that("get_spectra_names fails on empty spectra", {
expect_error(
get_spectra_names(c(MALDIquant::createMassSpectrum(0, 0))),
"Empty spectra detected!"
)
})
14 changes: 7 additions & 7 deletions tests/testthat/test-merge_processed_spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ test_that("merge_processed_spectra fails with the wrong input", {

test_that("merge_processed_spectra fails with only empty peaks", {
expect_warning(
empty_peaks <- list(
createMassSpectrum(
mass = 4500:5000,
intensity = rep(0, 501),
metaData = list(fullName = "foo")
)
) %>% process_spectra(),
empty_peaks <- list(
createMassSpectrum(
mass = 4500:5000,
intensity = rep(0, 501),
metaData = list(fullName = "foo")
)
) %>% process_spectra(spectra_names = tibble::tibble(sanitized_name = "foo")),
"MassSpectrum object is empty!"
)
expect_warning(
Expand Down
24 changes: 22 additions & 2 deletions tests/testthat/test-process_spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,29 @@ test_that("process_spectra works", {
)
)
})
test_that("process_spectra warns on empty spectra", {
expect_warning(
test_that("process_spectra with automatic names fails on empty spectra with maldipickr functions", {
expect_error(
process_spectra(c(MALDIquant::createMassSpectrum(0, 0))),
"Empty spectra detected!"
)
})
test_that("process_spectra with manual names warns on empty spectra with MALDIquant functions", {
expect_warning(
process_spectra(
c(MALDIquant::createMassSpectrum(0, 0)),
spectra_names = tibble::tibble(sanitized_name = "Dummy_name")
),
"MassSpectrum object is empty"
)
})
test_that("process_spectra with manual names fails if wrong column", {
expect_error(
process_spectra(
spectra_list_test,
spectra_names = tibble::tibble(
sanitized_wrong_column = c("spectra1","spectra2")
)
),
"Missing 'sanitized_name' column"
)
})

0 comments on commit 8d6484e

Please sign in to comment.