diff --git a/.Rbuildignore b/.Rbuildignore index a5e6d7a..9a69a7b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,3 +4,8 @@ ^\.travis\.yml$ ^codecov\.yml$ ^\.github$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ +^doc$ +^Meta$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index fc81824..c21abc8 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,13 +1,10 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: - - master - - devel + branches: [main, master, devel] pull_request: - branches: - - master + branches: [main, master] name: R-CMD-check @@ -21,67 +18,32 @@ jobs: fail-fast: false matrix: config: - #- {os: windows-latest, r: 'release'} - #- {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-r@master + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true - - uses: r-lib/actions/setup-pandoc@master - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - sudo apt install libopenbabel-dev - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Test coverage - run: covr::codecov() - shell: Rscript {0} + extra-packages: any::rcmdcheck + needs: check - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main + - uses: r-lib/actions/check-r-package@v2 with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check + upload-snapshots: true diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..087f0b0 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,46 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 0000000..2c5bb50 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,50 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr + needs: coverage + + - name: Test coverage + run: | + covr::codecov( + quiet = FALSE, + clean = FALSE, + install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + ) + shell: Rscript {0} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/.gitignore b/.gitignore index 5b6a065..130a84f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,7 @@ .Rhistory .RData .Ruserdata +docs +inst/doc +/doc/ +/Meta/ diff --git a/DESCRIPTION b/DESCRIPTION index c7601f9..4a5c4f3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,72 +1,50 @@ -Package: MFassign -Title: Molecular Formula Assignment for High Resolution metabolomics -Version: 0.7.10 +Package: assignments +Title: Molecular Formula Assignment For High Resolution ESI-MS Based Metabolomics Data +Version: 1.0.0 Authors@R: person("Jasen", "Finch", email = "jsf9@aber.ac.uk", role = c("aut", "cre")) -Description: Molecular formula assignment for high resolution metabolomics. +Description: A molecular formula assignment approach for electrospray ionisation high resolution mass spectrometry based metabolomics data. Depends: R (>= 3.5.0), - ggraph -Imports: CHNOSZ, +Imports: cli, + crayon, dplyr, + furrr, + future, + igraph, + lubridate, magrittr, - stringr, - tibble, + metabolyseR, methods, - tidyr, - crayon, + mzAnnotation, purrr, - cli, - lubridate, + rlang, + stringr, + tibble, tidygraph, - igraph, - ggthemes, + tidyr +License: GPL (>= 3) +Encoding: UTF-8 +LazyData: true +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.2.3 +Collate: parameters.R assignment.R correlations.R + addIsoAssign.R transformationAssign.R relationships.R + graph.R assign.R internals.R components.R + feature_data.R assignments.R plotAdductDist.R plot_components.R + plotSpectrum.R reexports.R zzz.R +Suggests: + testthat, + covr, ggplot2, - patchwork, - mzAnnotation, - metabolyseR, + ggraph, ggrepel, + ggtext, + ggthemes, + glue, graphlayouts, - future, - furrr -Remotes: jasenfinch/mzAnnotation, + patchwork, + knitr, + rmarkdown +Remotes: aberHRML/mzAnnotation, jasenfinch/metabolyseR -License: GPL (>= 3) -Encoding: UTF-8 -LazyData: true -RoxygenNote: 7.1.1 -Collate: - allGenerics.R - allClasses.R - prepCorrelations-method.R - addIsoAssign-method.R - transformationAssign-method.R - relationships-method.R - assignmentParameters.R - assignMFs.R - MFgen.R - MFscore.R - addIsoScore.R - addMFs.R - calcComponents.R - assignMethods.R - peakData.R - LCassignment.R - MFassign.R - FIEassignment.R - doAssignment-method.R - continueAssignment.R - show-method.R - access-methods.R - summariseAssignment-method.R - plotNetwork-method.R - networkComponents.R - addNames.R - eliminate.R - recalcComponents.R - plotAdductDist-method.R - plotFeatureSolutions.R - calcCorrelations-method.R - filterCorrelations-method.R - plotSpectrum-method.R - reexports.R -Suggests: testthat, - covr +URL: https://aberhrml.github.io/assignments/ +VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 7de078c..b647917 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,28 +1,62 @@ # Generated by roxygen2: do not edit by hand +export("%>%") +export("MFrankThreshold<-") +export("adductRules<-") +export("adducts<-") +export("correlationsParameters<-") +export("isotopeRules<-") +export("isotopes<-") +export("limit<-") +export("maxM<-") +export("ppm<-") +export("transformationRules<-") +export("transformations<-") +export(MFrankThreshold) +export(addIsoAssign) +export(adductRules) +export(adducts) export(assignMFs) +export(assignedData) +export(assignment) export(assignmentParameters) -export(continueAssignment) +export(assignments) +export(availableTechniques) +export(calcCorrelations) +export(calcRelationships) +export(component) +export(components) +export(correlations) +export(correlationsParameters) export(edges) +export(featureComponents) +export(featureData) +export(graph) +export(isotopeRules) +export(isotopes) +export(iterations) +export(limit) +export(maxM) export(nodes) export(plan) -exportClasses(Assignment) -exportClasses(AssignmentParameters) -exportMethods(assignedData) -exportMethods(assignmentData) -exportMethods(assignments) -exportMethods(plotAdductDist) -exportMethods(plotFeatureSolutions) -exportMethods(plotNetwork) -exportMethods(plotSpectrum) -exportMethods(show) -exportMethods(summariseAssignment) -importFrom(CHNOSZ,count.elements) +export(plotAdductDist) +export(plotComponent) +export(plotFeatureComponents) +export(plotSpectrum) +export(ppm) +export(relationships) +export(summariseAssignments) +export(technique) +export(transformationAssign) +export(transformationRules) +export(transformations) importFrom(cli,console_width) importFrom(crayon,blue) importFrom(crayon,green) importFrom(crayon,red) importFrom(crayon,yellow) +importFrom(dplyr,all_of) +importFrom(dplyr,anti_join) importFrom(dplyr,arrange) importFrom(dplyr,bind_cols) importFrom(dplyr,bind_rows) @@ -32,94 +66,68 @@ importFrom(dplyr,distinct) importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) +importFrom(dplyr,group_split) importFrom(dplyr,inner_join) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,mutate_at) +importFrom(dplyr,mutate_if) importFrom(dplyr,n) +importFrom(dplyr,relocate) importFrom(dplyr,rename) importFrom(dplyr,rowwise) importFrom(dplyr,select) importFrom(dplyr,semi_join) +importFrom(dplyr,slice) importFrom(dplyr,slice_sample) importFrom(dplyr,summarise) importFrom(dplyr,ungroup) importFrom(dplyr,vars) importFrom(furrr,furrr_options) importFrom(furrr,future_map) +importFrom(furrr,future_map_dfr) importFrom(future,plan) -importFrom(ggplot2,aes) -importFrom(ggplot2,coord_fixed) -importFrom(ggplot2,element_blank) -importFrom(ggplot2,element_text) -importFrom(ggplot2,facet_wrap) -importFrom(ggplot2,geom_bar) -importFrom(ggplot2,geom_segment) -importFrom(ggplot2,ggplot) -importFrom(ggplot2,guides) -importFrom(ggplot2,labs) -importFrom(ggplot2,margin) -importFrom(ggplot2,scale_fill_manual) -importFrom(ggplot2,theme) -importFrom(ggplot2,theme_bw) -importFrom(ggplot2,xlim) -importFrom(ggplot2,ylim) -importFrom(ggraph,create_layout) -importFrom(ggraph,facet_edges) -importFrom(ggraph,geom_edge_link) -importFrom(ggraph,geom_node_label) -importFrom(ggraph,geom_node_point) -importFrom(ggraph,geom_node_text) -importFrom(ggraph,ggraph) -importFrom(ggraph,scale_edge_color_gradient) -importFrom(ggraph,theme_graph) -importFrom(ggrepel,geom_text_repel) -importFrom(ggthemes,ptol_pal) -importFrom(ggthemes,scale_fill_ptol) -importFrom(graphlayouts,layout_igraph_stress) importFrom(igraph,E) importFrom(igraph,V) +importFrom(igraph,degree) importFrom(igraph,edge.attributes) -importFrom(igraph,set_edge_attr) -importFrom(igraph,set_vertex_attr) importFrom(igraph,vertex.attributes) importFrom(lubridate,seconds_to_period) importFrom(magrittr,"%>%") importFrom(magrittr,set_names) -importFrom(magrittr,set_rownames) -importFrom(metabolyseR,analysisData) importFrom(metabolyseR,analysisParameters) importFrom(metabolyseR,analysisResults) importFrom(metabolyseR,dat) -importFrom(metabolyseR,keepFeatures) importFrom(metabolyseR,metabolyse) -importFrom(metabolyseR,sinfo) +importFrom(metabolyseR,preTreated) +importFrom(metabolyseR,raw) +importFrom(methods,as) importFrom(methods,new) importFrom(methods,show) -importFrom(mzAnnotation,adducts) +importFrom(methods,validObject) +importFrom(mzAnnotation,adduct_rules) importFrom(mzAnnotation,calcM) -importFrom(mzAnnotation,calcMZ) -importFrom(mzAnnotation,generateMF) -importFrom(mzAnnotation,isotopes) -importFrom(mzAnnotation,ppmError) +importFrom(mzAnnotation,ipMF) +importFrom(mzAnnotation,isotope_rules) importFrom(mzAnnotation,relationshipCalculator) importFrom(mzAnnotation,transformMF) -importFrom(mzAnnotation,transformations) -importFrom(parallel,detectCores) -importFrom(patchwork,plot_annotation) -importFrom(patchwork,wrap_plots) +importFrom(mzAnnotation,transformationPossible) +importFrom(mzAnnotation,transformation_rules) +importFrom(purrr,compact) +importFrom(purrr,flatten_chr) importFrom(purrr,map) importFrom(purrr,map_dbl) +importFrom(purrr,map_dfr) importFrom(purrr,map_lgl) -importFrom(stats,cutree) -importFrom(stats,dist) -importFrom(stats,hclust) +importFrom(rlang,check_installed) importFrom(stringr,str_c) importFrom(stringr,str_detect) +importFrom(stringr,str_remove) importFrom(stringr,str_replace_all) importFrom(stringr,str_split_fixed) importFrom(stringr,str_sub) importFrom(tibble,as_tibble) +importFrom(tibble,enframe) importFrom(tibble,tibble) importFrom(tidygraph,activate) importFrom(tidygraph,as_tbl_graph) @@ -130,6 +138,8 @@ importFrom(tidygraph,morph) importFrom(tidygraph,tbl_graph) importFrom(tidygraph,to_components) importFrom(tidygraph,unmorph) +importFrom(tidyr,expand_grid) importFrom(tidyr,gather) +importFrom(tidyr,replace_na) importFrom(utils,capture.output) importFrom(utils,packageVersion) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..109e5bb --- /dev/null +++ b/NEWS.md @@ -0,0 +1,25 @@ +# assignments 1.0.0 + +* Added a `NEWS.md` file to track changes to the package. + +* The `Assignment` S4 class now inherits from the `AssignmentParameters` S4 class. + +* The molecular formula generation is now handled by [`mzAnnotation::ipMF()`](https://aberhrml.github.io/mzAnnotation/reference/ipMF.html). + +* Improved molecular formula selection routine based on the Seven Golden Rules from [Kind et al. 2007](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-8-105). + +* The adduct and isotope assignment routine now conducted over multiple iterations. + +* Graphical components are now selected using an improved plausibility score. + +* Graphical components are now only retained if they contain at least one non-isotopic assignment. + +* The individual assignment step methods (`calcCorrelations()`, `calcRelationships()`, `addIsoAssign()`, `transformAssign()`) are now exported. + +* Added the `availableTechniques()` function to return the supported analytical techniques. + +* Numerous documentation improvements. + +* Added a usage introduction vignette. + +* The package documentation is now available at diff --git a/R/FIEassignment.R b/R/FIEassignment.R deleted file mode 100644 index f2e36da..0000000 --- a/R/FIEassignment.R +++ /dev/null @@ -1,48 +0,0 @@ - -FIEassignment <- function(element = NULL) { - methods <- list( - `calculate correlations` = function(assignment){ - assignment %>% - calcCorrelations() - }, - `filter correlations` = function(assignment){ - assignment %>% - filterCorrelations() - }, - `prepare correlations` = function(assignment){ - assignment %>% - prepCorrelations() - }, - relationships = function(assignment){ - assignment %>% - relationships() - }, - `adduct and isotope assignment` = function(assignment){ - assignment %>% - addIsoAssign() - }, - `transformation assignment` = function(assignment){ - count <- 0 - while (T) { - count <- count + 1 - assignment <- suppressWarnings(transformationAssign(assignment)) - if (length(assignment@transAssign[[count]]) == 0) { - assignment@transAssign <- assignment@transAssign[-count] - break() - } - if (nrow(assignment@transAssign[[count]]$assigned) == 0) { - assignment@transAssign <- assignment@transAssign[-count] - break() - } - - } - return(assignment) - } - ) - - if (!is.null(element)) { - return(methods[[element]]) - } else { - return(methods) - } -} \ No newline at end of file diff --git a/R/LCassignment.R b/R/LCassignment.R deleted file mode 100644 index e9a5309..0000000 --- a/R/LCassignment.R +++ /dev/null @@ -1,47 +0,0 @@ - -LCassignment <- function(element = NULL){ - methods <- list( - `calculate correlations` = function(assignment){ - assignment %>% - calcCorrelations() - }, - `filter correlations` = function(assignment){ - assignment %>% - filterCorrelations() - }, - `prepare correlations` = function(assignment){ - assignment %>% - prepCorrelations() - }, - relationships = function(assignment){ - assignment %>% - relationships(transformations = FALSE) - }, - `adduct and isotope assignment` = function(assignment){ - assignment %>% - addIsoAssign() - }#, - # `transformation assignment` = function(assignment){ - # count <- 0 - # while (T) { - # count <- count + 1 - # assignment <- suppressWarnings(transformationAssign(assignment)) - # if (length(assignment@transAssign[[count]]) == 0) { - # break() - # } - # if (nrow(assignment@transAssign[[count]]$assigned) == 0) { - # assignment@transAssign <- assignment@transAssign[-count] - # break() - # } - # - # } - # return(assignment) - # } - ) - - if (!is.null(element)) { - return(methods[[element]]) - } else { - return(methods) - } -} \ No newline at end of file diff --git a/R/MFassign.R b/R/MFassign.R deleted file mode 100644 index 925f6fe..0000000 --- a/R/MFassign.R +++ /dev/null @@ -1,14 +0,0 @@ - -globalVariables(c( - 'Mass','.','Feature1','Feature2','Feature','Isotope','Adduct', - 'Measured m/z','Isotope1','Adduct1','Isotope2','Adduct2','MF1', - 'MF2','Nodes','AddIsoScore','RetentionTime','Score','r','EdgeWeight1', - 'EdgeWeight2','Degree','Measured M','Cluster', 'Average AddIsoScore', - 'Transformation1','Transformation2','log2IntensityRatio','m/z1', - 'm/z2','RetentionTime1','RetentionTime2','mz','Theoretical M', - 'Theoretical m/z','PPM Error','name','Mode','V1','V2','Mode1', - 'Mode2','ID','Adducts','Isotopes','Transformations','Error', - 'Count','TransformedMF1', 'TransformedMF2','Plausibility','absPPM', - 'Name1','Name2','Size','AverageAddIsoScore','nodes','rtDiff', - 'Component','Weight','AIS','Name','Assigned','Intensity','Label','Relative Abundance','m/z','Group','N' -)) \ No newline at end of file diff --git a/R/MFgen.R b/R/MFgen.R deleted file mode 100644 index 142e64c..0000000 --- a/R/MFgen.R +++ /dev/null @@ -1,35 +0,0 @@ -#' @importFrom mzAnnotation generateMF -#' @importFrom tibble as_tibble - -MFgen <- function(M,mz,ppm = 6){ - carb <- round(M/12) - Hs <- round(carb * 2) - NO <- round(carb / 2) - PS <- round(carb / 4) - - maxi <- c(C = carb, - H = Hs, - N = NO, - O = NO, - P = PS, - S = PS) - - if (M < 100) { - ppm <- 10 - } else { - ppm <- ppm - } - - ppm <- (ppm/10^6 * mz)/M * 10^6 - - if (M < 200) { - gr <- F - } else { - gr <- T - } - - res <- generateMF(M,ppm = ppm,charge = 0,validation = gr,element_max = maxi) %>% - rename(`Theoretical M` = Mass) %>% - mutate(`Measured M` = M, `Measured m/z` = mz) - return(res) -} \ No newline at end of file diff --git a/R/MFscore.R b/R/MFscore.R deleted file mode 100644 index 3755b58..0000000 --- a/R/MFscore.R +++ /dev/null @@ -1,93 +0,0 @@ -#' @importFrom CHNOSZ count.elements - -MFscore <- function(mf){ - elements <- c('C','H','N','O','P','S') - eleFreq <- as.vector(count.elements(mf)) - ele <- names(count.elements(mf)) - - if (length(which(!(elements %in% ele))) > 0) { - eleFreq <- c(eleFreq,rep(0,length(which(!(elements %in% ele))))) - ele <- c(ele,elements[!(elements %in% ele)]) - } - - colnames(eleFreq) <- NULL - - eleRatios <- c( - `H/C` = if ('H' %in% ele & 'C' %in% ele) { - eleFreq[ele == 'H']/eleFreq[ele == 'C'] - }, - `N/C` = if ('N' %in% ele & 'C' %in% ele) { - eleFreq[ele == 'N']/eleFreq[ele == 'C'] - }, - `O/C` = if ('O' %in% ele & 'C' %in% ele) { - eleFreq[ele == 'O']/eleFreq[ele == 'C'] - }, - `P/C` = if ('P' %in% ele & 'C' %in% ele) { - eleFreq[ele == 'P']/eleFreq[ele == 'C'] - }, - `S/C` = if ('S' %in% ele & 'C' %in% ele) { - eleFreq[ele == 'S']/eleFreq[ele == 'C'] - }, - `N/O` = if ('N' %in% ele & 'O' %in% ele) { - eleFreq[ele == 'N']/eleFreq[ele == 'O'] - }, - `P/O` = if ('P' %in% ele & 'O' %in% ele) { - eleFreq[ele == 'P']/eleFreq[ele == 'O'] - }, - `S/O` = if ('S' %in% ele & 'O' %in% ele) { - eleFreq[ele == 'S']/eleFreq[ele == 'O'] - }, - `O/P` = if ('O' %in% ele & 'P' %in% ele) { - eleFreq[ele == 'O']/eleFreq[ele == 'P'] - }, - `S/P` = if ('S' %in% ele & 'P' %in% ele) { - eleFreq[ele == 'S']/eleFreq[ele == 'P'] - } - ) - if (!is.null(eleRatios)) { - if ('H/C' %in% names(eleRatios)) { - eleRatios['H/C'] <- abs(eleRatios['H/C'] - 1.6) - } - if ('O/C' %in% names(eleRatios)) { - eleRatios['O/C'] <- abs(eleRatios['O/C'] - 0.3) - } - if (is.nan(eleRatios['N/O'])) { - eleRatios['N/O'] <- 0 - } - if (is.infinite(eleRatios['N/O'])) { - eleRatios['N/O'] <- eleFreq[ele == 'N'] - } - if (is.nan(eleRatios['P/O'])) { - eleRatios['P/O'] <- 0 - } - if (is.infinite(eleRatios['P/O'])) { - eleRatios['P/O'] <- eleFreq[ele == 'P'] - } - if (is.nan(eleRatios['S/O'])) { - eleRatios['S/O'] <- 0 - } - if (is.infinite(eleRatios['S/O'])) { - eleRatios['S/O'] <- eleFreq[ele == 'S'] - } - if (is.nan(eleRatios['O/P'])) { - eleRatios['O/P'] <- 0 - } - if ('O/P' %in% names(eleRatios) & eleRatios['O/P'] >= 3) { - eleRatios['O/P'] <- 0 - } - if (is.nan(eleRatios['P/S'])) { - eleRatios['P/S'] <- 0 - } - if (is.nan(eleRatios['S/P'])) { - eleRatios['S/P'] <- 0 - } - if (is.infinite(eleRatios['S/P'])) { - eleRatios['S/P'] <- eleFreq[ele == 'S'] - } - score <- sum(eleRatios) - } else { - score <- NA - } - - return(score) -} diff --git a/R/access-methods.R b/R/access-methods.R deleted file mode 100644 index c9af841..0000000 --- a/R/access-methods.R +++ /dev/null @@ -1,75 +0,0 @@ -#' assignments-Assignment -#' @rdname assignments -#' @description Get table of assigned features from an Assignment -#' @param assignment S4 object of class Assignment -#' @export - -setMethod('assignments',signature = 'Assignment', - function(assignment){ - assignment@assignments -}) - -#' nodes -#' @description extract node table from tbl_graph object. -#' @param graph object of class tbl_graph -#' @export - -nodes <- function(graph){ - graph %>% - vertex.attributes() %>% - as_tibble() -} - -#' edges -#' @description extract edge table from tbl_graph object. -#' @param graph object of class tbl_graph -#' @importFrom igraph edge.attributes -#' @export - -edges <- function(graph){ - graph %>% - edge.attributes() %>% - as_tibble() -} - -#' assignmentData -#' @rdname assignmentData -#' @description Return data table used for assignments. -#' @param assignment S4 object of class Assignment -#' @export - -setMethod('assignmentData', signature = 'Assignment', - function(assignment){ - assignment@data -}) - -#' assignedData -#' @rdname assignedData -#' @description Return data table used for assignments with feature assignments added to column names. -#' @param assignment S4 object of class Assignment -#' @export - -setMethod('assignedData', signature = 'Assignment', - function(assignment){ - - d <- assignment %>% - assignmentData() - - assignedFeats <- assignment %>% - assignments() %>% - select(Feature,Name) - - assignedFeats <- left_join( - tibble(Feature = d %>% colnames()), - assignedFeats, - by = "Feature") - - assignedFeats$Name[is.na(assignedFeats$Name)] <- assignedFeats$Feature[is.na(assignedFeats$Name)] - - assignedFeats <- assignedFeats %>% - filter(!duplicated(Feature)) - - colnames(d) <- assignedFeats$Name - - return(d) -}) \ No newline at end of file diff --git a/R/addIsoAssign-method.R b/R/addIsoAssign-method.R deleted file mode 100644 index f9bdd05..0000000 --- a/R/addIsoAssign-method.R +++ /dev/null @@ -1,128 +0,0 @@ -#' @importFrom dplyr arrange rowwise slice_sample left_join ungroup -#' @importFrom stringr str_detect -#' @importFrom mzAnnotation calcM calcMZ ppmError -#' @importFrom igraph vertex.attributes V -#' @importFrom furrr furrr_options - -setMethod('addIsoAssign',signature = 'Assignment', - function(assignment){ - - if (assignment@log$verbose == T) { - startTime <- proc.time() - message(blue('Adduct & isotope assignment '),cli::symbol$continue,'\r',appendLF = FALSE) - } - - parameters <- assignment@parameters - - rel <- assignment@relationships %>% - filter(is.na(Transformation1) & is.na(Transformation2) & r > 0) %>% - filter(!(is.na(Isotope1) & !is.na(Isotope2) & Adduct1 == Adduct2 & log2IntensityRatio < 0)) %>% - filter(!(!is.na(Isotope1) & is.na(Isotope2) & Adduct1 == Adduct2)) - - M <- bind_rows(select(rel,mz = `m/z1`,RetentionTime = RetentionTime1,Isotope = Isotope1, Adduct = Adduct1, Feature = Feature1), - select(rel,mz = `m/z2`,RetentionTime = RetentionTime2,Isotope = Isotope2, Adduct = Adduct2, Feature = Feature2)) %>% - filter(!duplicated(.)) %>% - arrange(mz) %>% - rowwise() %>% - mutate(M = calcM(mz,adduct = Adduct,isotope = Isotope)) %>% - arrange(M) %>% - filter(M <= parameters@maxM) - - nM <- nrow(M) - - MF <- M %>% - ungroup() %>% - slice_sample(n = nM) %>% - split(1:nrow(.)) %>% - future_map(~{ - mf <- MFgen(.x$M,.x$mz,ppm = parameters@ppm) - - if (nrow(mf) > 0) { - mf %>% - left_join(M,by = c('Measured M' = 'M','Measured m/z' = 'mz')) %>% - rowwise() %>% - mutate(`Theoretical m/z` = calcMZ(`Theoretical M`,Adduct,Isotope), - `PPM Error` = ppmError(`Measured m/z`,`Theoretical m/z`)) %>% - select(Feature,RetentionTime,MF,Isotope,Adduct,`Theoretical M`, - `Measured M`,`Theoretical m/z`,`Measured m/z`, `PPM Error`) %>% - rowwise() %>% - mutate(Score = MFscore(MF), - `PPM Error` = abs(`PPM Error`), - AddIsoScore = addIsoScore(Adduct, - Isotope, - parameters@adducts, - parameters@isotopes)) %>% - ungroup() %>% - filter(Score == min(Score,na.rm = TRUE)) %>% - filter(Score < parameters@maxMFscore) - } else { - return(NULL) - } - },.options = furrr_options(seed = 1234)) %>% - bind_rows() - - rel <- rel %>% - addMFs(MF) %>% - filter(MF1 == MF2) %>% - mutate(RetentionTime1 = as.numeric(RetentionTime1),RetentionTime2 = as.numeric(RetentionTime2)) %>% - addNames() - - MFs <- bind_rows(select(rel,Name = Name1,Feature = Feature1,mz = `m/z1`,RetentionTime = RetentionTime1,Isotope = Isotope1, Adduct = Adduct1, MF = MF1), - select(rel,Name = Name2,Feature = Feature2,mz = `m/z2`,RetentionTime = RetentionTime2,Isotope = Isotope2, Adduct = Adduct2,MF = MF2)) %>% - mutate(RetentionTime = as.numeric(RetentionTime)) %>% - arrange(mz) %>% - select(-mz) %>% - left_join(MF, by = c("Feature", "RetentionTime", "Isotope", "Adduct",'MF')) %>% - distinct() %>% - mutate(ID = 1:nrow(.)) - - graph <- calcComponents(MFs,rel,parameters) - - filters <- tibble(Measure = c('Plausibility','Size','AIS','Score','PPM Error'), - Direction = c(rep('max',3),rep('min',2))) - - filteredGraph <- graph - - for (i in 1:nrow(filters)) { - f <- filters[i,] - filteredGraph <- filteredGraph %>% - activate(nodes) %>% - filter(name %in% {filteredGraph %>% - vertex.attributes() %>% - as_tibble() %>% - eliminate(f$Measure,f$Direction) %>% - .$name}) - if (V(filteredGraph) %>% length() > 0) { - filteredGraph <- filteredGraph %>% - recalcComponents(parameters) - } else { - break() - } - } - - assignment@addIsoAssign <- list( - graph = graph, - filteredGraph = filteredGraph, - assigned = filteredGraph %>% - vertex.attributes() %>% - as_tibble() %>% - rename(Name = name) %>% - mutate(Mode = str_sub(Feature,1,1)) - ) - - assignment@assignments <- assignment@addIsoAssign$assigned %>% - select(Name:Score,Mode) %>% - mutate(Iteration = 'A&I') - - if (assignment@log$verbose == T) { - endTime <- proc.time() - elapsed <- {endTime - startTime} %>% - .[3] %>% - round(1) %>% - seconds_to_period() %>% - str_c('[',.,']') - message(blue('Adduct & isotope assignment '),'\t',green(cli::symbol$tick),' ',elapsed) - } - - return(assignment) - }) diff --git a/R/addIsoAssign.R b/R/addIsoAssign.R new file mode 100644 index 0000000..1656e11 --- /dev/null +++ b/R/addIsoAssign.R @@ -0,0 +1,176 @@ + +#' @rdname assignment-methods +#' @export + +setGeneric("addIsoAssign", function(assignment) + standardGeneric("addIsoAssign") +) + +#' @rdname assignment-methods +#' @importFrom dplyr arrange rowwise slice_sample left_join ungroup anti_join +#' @importFrom stringr str_detect +#' @importFrom mzAnnotation calcM ipMF +#' @importFrom igraph vertex.attributes V +#' @importFrom furrr furrr_options +#' @importFrom methods as + +setMethod('addIsoAssign',signature = 'Assignment', + function(assignment){ + + invisible(gc()) + + rel <- assignment %>% + relationships() + + if (ncol(rel) == 0){ + stop('No relationships found. Has `calcRelationships()` been called on this object?', + call. = FALSE) + } + + if (isTRUE(assignment@log$verbose)) { + ai_start_time <- proc.time() + message(blue('Adduct & isotopic assignment '), + cli::symbol$continue) + } + + assignment_technique <- technique(assignment) + + if (str_detect(assignment_technique,'LC')){ + rel <- rel %>% + filter(RetentionTimeDiff <= assignment@RT_diff_limit) + } + + rel <- rel %>% + filter(is.na(Transformation1) & + is.na(Transformation2) & + coefficient > 0) %>% + filter(!(is.na(Isotope1) & + !is.na(Isotope2) & + Adduct1 == Adduct2 & + log2IntensityRatio < 0)) %>% + filter(!(!is.na(Isotope1) & + is.na(Isotope2) & + Adduct1 == Adduct2)) + + M <- collateM(rel, + maxM(assignment)) + + if (isTRUE(assignment@log$verbose)) { + start_time <- proc.time() + message('generating molecular formulas', + cli::symbol$continue, + '\r', + appendLF = FALSE) + } + + MFs <- generateMFs(M, + ppm(assignment), + MFrankThreshold(assignment), + adductRules(assignment), + isotopeRules(assignment), + AIS(assignment)) + + if (isTRUE(assignment@log$verbose)) { + end_time <- proc.time() + elapsed <- elapsedTime(start_time,end_time) + message('generating molecular formulas', + '\t', + green(cli::symbol$tick), + ' ', + elapsed) + } + + graph_edges <- rel %>% + addMFs(MFs) %>% + filter(MF1 == MF2) %>% + mutate(RetentionTime1 = as.numeric(RetentionTime1), + RetentionTime2 = as.numeric(RetentionTime2)) %>% + addNames() + + graph_nodes <- collateMFs(graph_edges,MFs) + + graph <- calcComponents(graph_nodes, + graph_edges, + assignment) + + counter <- 0 + + repeat { + + counter <- counter + 1 + + if (isTRUE(assignment@log$verbose)) { + start_time <- proc.time() + message(paste0('iteration ',counter), + cli::symbol$continue, + '\r', + appendLF = FALSE) + } + + if (counter > 1){ + graph <- assignment@addIsoAssign[[counter - 1]]$graph %>% + activate(nodes) %>% + anti_join(assignment %>% + assignments() %>% + select(dplyr::any_of(c('Feature','Isotope','Adduct','MF'))), + by = 'Feature') + + if (nrow(edges(graph)) == 0) break() + + graph <- graph %>% + clean(adductRules(assignment)) %>% + recalcComponents(assignment) + } + + filtered_graph <- graph + + filtered_graph <- filtered_graph %>% + filterComponents(assignment, + filters = componentFilters()) + + assigned_features <- filtered_graph %>% + nodes() %>% + rename(Name = name) %>% + mutate(Mode = str_sub(Feature,1,1)) + + if (nrow(assigned_features) == 0) break() + + assignment@addIsoAssign[[counter]] <- list( + graph = graph, + filtered_graph = filtered_graph, + assigned = assigned_features + ) + + assignment@assignments <- bind_rows( + assignment@assignments, + assigned_features %>% + select(Name:`MF Plausibility (%)`, + Mode, + Component) %>% + mutate(Iteration = paste0('A&I',counter)) + ) + + if (isTRUE(assignment@log$verbose)) { + end_time <- proc.time() + elapsed <- elapsedTime(start_time,end_time) + message(paste0('iteration ',counter), + '\t\t\t', + green(cli::symbol$tick), + ' ', + elapsed) + } + + } + + names(assignment@addIsoAssign) <- paste0('A&I', + seq_along(assignment@addIsoAssign)) + + if (isTRUE(assignment@log$verbose)) { + ai_end_time <- proc.time() + elapsed <- elapsedTime(ai_start_time, + ai_end_time) + message(blue('Adduct & isotopic assignment '),'\t',green(cli::symbol$tick),' ',elapsed) + } + + return(assignment) + }) diff --git a/R/addIsoScore.R b/R/addIsoScore.R deleted file mode 100644 index 2719ab9..0000000 --- a/R/addIsoScore.R +++ /dev/null @@ -1,22 +0,0 @@ -#' @importFrom purrr map_lgl - -addIsoScore <- function(add,iso,addRank,isoRank){ - add <- tibble(Adduct = add) - iso <- tibble(Isotope = iso) - iso$Isotope[is.na(iso$Isotope)] <- 'NA' - addRank <- addRank[map_lgl(addRank,~{add %in% .})] %>% - .[[1]] %>% - {tibble(Adduct = ., - Rank = (length(.) - 1):0)} - isoRank <- tibble(Isotope = c('NA',isoRank), Rank = length(isoRank):0) - - add <- left_join(add, addRank,by = 'Adduct') %>% - .$Rank - iso <- left_join(iso, isoRank,by = 'Isotope') %>% - .$Rank - - maxScore <- max(addRank$Rank) + max(isoRank$Rank) - score <- (add + iso)/maxScore - - return(score) -} diff --git a/R/addMFs.R b/R/addMFs.R deleted file mode 100644 index e810a70..0000000 --- a/R/addMFs.R +++ /dev/null @@ -1,51 +0,0 @@ -#' @importFrom dplyr rename - -addMFs <- function(rel,MF,identMF = T){ - - if (identMF == T) { - relations <- rel %>% - filter(Feature1 %in% MF$Feature, Feature2 %in% MF$Feature) - } else { - relations <- rel %>% - filter(Feature1 %in% MF$Feature | Feature2 %in% MF$Feature) - } - relations <- relations %>% - left_join(MF %>% - select(Feature,MF,Isotope,Adduct,`Measured m/z`),by = c('Feature1' = 'Feature')) %>% - rename(MF1 = MF) - - chr_columns <- relations %>% - map_lgl(is.character) - - relations[,chr_columns] <- relations[,chr_columns] %>% - { - .[is.na(.)] <- '' - . - } - - relations <- relations %>% - filter(Isotope1 == Isotope & Adduct1 == Adduct) %>% - select(-(Isotope:`Measured m/z`)) %>% - left_join(MF %>% - select(Feature,MF,Isotope,Adduct,`Measured m/z`),by = c('Feature2' = 'Feature')) %>% - rename(MF2 = MF) - - chr_columns <- relations %>% - map_lgl(is.character) - - relations[,chr_columns] <- relations[,chr_columns] %>% - { - .[is.na(.)] <- '' - . - } - - relations <- relations %>% - filter(Isotope2 == Isotope & Adduct2 == Adduct) %>% - select(-(Isotope:`Measured m/z`)) - - if (nrow(relations) > 0) { - relations[relations == ''] <- NA - } - - return(relations) -} \ No newline at end of file diff --git a/R/addNames.R b/R/addNames.R deleted file mode 100644 index 63a76dd..0000000 --- a/R/addNames.R +++ /dev/null @@ -1,13 +0,0 @@ - -addNames <- function(rel){ - iso <- rel - iso$Isotope1[is.na(iso$Isotope1)] <- '' - iso$Isotope2[is.na(iso$Isotope2)] <- '' - iso <- iso %>% - mutate(Name1 = str_c(Feature1,MF1,Isotope1,Adduct1,sep = ' '), - Name2 = str_c(Feature2,MF2,Isotope2,Adduct2,sep = ' ')) - rel %>% - bind_cols(iso %>% - select(Name1,Name2)) %>% - select(Name1,Name2,Feature1:MF2) -} \ No newline at end of file diff --git a/R/allClasses.R b/R/allClasses.R deleted file mode 100644 index dd345fb..0000000 --- a/R/allClasses.R +++ /dev/null @@ -1,96 +0,0 @@ -#' AssignmentParameters -#' @description An S4 class to store assignment parameters. -#' @slot technique assignment technique to use -#' @slot correlations list of correlation parameters to be passed to metabolyseR correlation analysis -#' @slot filter list of r and n thresholds for filtering correlations -#' @slot maxM maximum M for which to assign molecular formulas -#' @slot maxMFscore threshold for molecular formula score -#' @slot ppm ppm threshold -#' #' @slot adducts named list of character vectors containing the adducuts to use for each mode -#' @slot limit amu deviation limit for relationship prediction -#' @slot RTwindow retention time window for chromatographic associations -#' @slot adducts list of character vectors containing the adducts to use. List element names should denote ionisation mode. -#' @slot isotopes character vector of isotopes to use -#' @slot transformations character vector of transformations to use -#' @slot adductRules tibble containing adduct formation rules as returned by mzAnnotation::adducts() -#' @slot isotopeRules tibble containing isotope rules as returned by mzAnnotation::isotopes() -#' @slot transformationRules tibble containing transformation rules as returned by mzAnnotation::transformations() -#' @importFrom mzAnnotation transformations -#' @export - -setClass('AssignmentParameters', - slots = list( - technique = 'character', - correlations = 'list', - filter = 'list', - maxM = 'numeric', - maxMFscore = 'numeric', - ppm = 'numeric', - limit = 'numeric', - RTwindow = 'numeric', - adducts = 'list', - isotopes = 'character', - transformations = 'character', - adductRules = 'tbl_df', - isotopeRules = 'tbl_df', - transformationRules = 'tbl_df' - ), - prototype = list( - technique = 'FIE', - correlations = list(method = 'pearson',pAdjustMethod = 'bonferroni',corPvalue = 0.05), - filter = list(rthresh = 0.7,n = 200000,rIncrement = 0.01,nIncrement = 20000), - maxM = 1000, - maxMFscore = 5, - ppm = 5, - limit = 0.001, - RTwindow = numeric(), - isotopes = c('13C','18O','13C2'), - adducts = list(n = c("[M-H]1-", "[M+Cl]1-", "[M+K-2H]1-", - "[M-2H]2-", "[M+Cl37]1-","[2M-H]1-"), - p = c('[M+H]1+','[M+K]1+','[M+Na]1+','[M+K41]1+', - '[M+NH4]1+','[M+2H]2+','[2M+H]1+')), - transformations = transformations()$`MF Change`, - adductRules = adducts(), - isotopeRules = isotopes(), - transformationRules = transformations() - )) - -#' Assignment -#' @description An S4 class to store assignment results -#' @slot log list containing assignment logs -#' @slot flags charactor vector containing completed assignment elements -#' @slot parameters An S4 object of class AssignmentParameters containing the assignment parameters -#' @slot data A tibble containing the peak intensity matrix -#' @slot correlations A tibble containing the correlations -#' @slot preparedCorrelations A tibble containing the prepared correlations ready for analysis -#' @slot relationships A tibble containing the predicted relationships -#' @slot addIsoAssign A list containing the results of the adduct and isotope assignment -#' @slot transAssign A list containing the results of the transformation assignment -#' @slot assignments A tibble containing the assigned molecular formulas -#' @export - -setClass('Assignment', - slots = list( - log = 'list', - flags = 'character', - parameters = 'AssignmentParameters', - data = 'tbl_df', - correlations = 'tbl_df', - preparedCorrelations = 'tbl_df', - relationships = 'tbl_df', - addIsoAssign = 'list', - transAssign = 'list', - assignments = 'tbl_df' - ), - prototype = list( - log = list(date = date(),verbose = TRUE), - flags = character(), - parameters = new('AssignmentParameters'), - data = tibble(), - correlations = tibble(), - preparedCorrelations = tibble(), - relationships = tibble(), - addIsoAssign = list(), - transAssign = list(), - assignments = tibble() - )) \ No newline at end of file diff --git a/R/allGenerics.R b/R/allGenerics.R deleted file mode 100644 index e077c6c..0000000 --- a/R/allGenerics.R +++ /dev/null @@ -1,67 +0,0 @@ -setGeneric('calcCorrelations', function(assignment){ - standardGeneric('calcCorrelations') -}) - -setGeneric('filterCorrelations', function(assignment){ - standardGeneric('filterCorrelations') -}) - -setGeneric('prepCorrelations', function(assignment){ - standardGeneric('prepCorrelations') -}) - -setGeneric("relationships", function(assignment,transformations = TRUE) { - standardGeneric("relationships") -}) - -setGeneric("addIsoAssign", function(assignment) { - standardGeneric("addIsoAssign") -}) - -setGeneric("transformationAssign", function(assignment) { - standardGeneric("transformationAssign") -}) - -setGeneric('doAssignment',function(assignment){ - standardGeneric('doAssignment') -}) - -#' @rdname assignments -setGeneric('assignments',function(assignment){ - standardGeneric('assignments') -}) - -#' @rdname summariseAssignment -setGeneric('summariseAssignment',function(assignment){ - standardGeneric('summariseAssignment') -}) - -#' @rdname assignmentData -setGeneric('assignmentData',function(assignment){ - standardGeneric('assignmentData') -}) - -#' @rdname assignedData -setGeneric('assignedData',function(assignment){ - standardGeneric('assignedData') -}) - -#' @rdname plotNetwork -setGeneric('plotNetwork',function(assignment, layout = 'stress', rThreshold = 0.7){ -standardGeneric('plotNetwork') -}) - -#' @rdname plotAdductDist -setGeneric('plotAdductDist',function(assignment){ - standardGeneric('plotAdductDist') -}) - -#' @rdname plotFeatureSolutions -setGeneric('plotFeatureSolutions',function(assignment,feature,maxComponents = 10){ - standardGeneric('plotFeatureSolutions') -}) - -#' @rdname plotSpectrum -setGeneric('plotSpectrum',function(assignment,MF){ - standardGeneric('plotSpectrum') -}) \ No newline at end of file diff --git a/R/assign.R b/R/assign.R new file mode 100644 index 0000000..9f88ea6 --- /dev/null +++ b/R/assign.R @@ -0,0 +1,116 @@ +#' Perform molecular formula assignment +#' @rdname assign +#' @description Perform automated molecular formula assignment. +#' @param feature_data a tibble or an object of S4 class `AnalysisData` or `Analysis` containing the feature intensity matrix of m/z for which to assign molecular formulas. See details. +#' @param parameters an S4 object of class `AssignmentParamters` containing the parameters for molecular formula assignment +#' @param verbose should progress output be printed to the console +#' @param type `pre-treated` or `raw` data on which to perform assignment when argument `feature_data` is of S4 class `Analysis` +#' @param ... arguments to pass to the relevant method +#' @details +#' If argument `feature_data` is specified as a tibble, this should be a feature intensity matrix where +#' the columns are the `m/z` features to assign and the rows are the individual observations, with the +#' cells as abundance values. he m/z features provided as column names should be in the form of +#' @. Ionisation mode should be given as a prefix n or p for negative +#' or positive ionisation modes respectively. Feature m/z should be provided to an accuracy of least 5 decimal +#' places. The retention time portion (@) is only required for LC-MS data and should be provided +#' in minutes. +#' @importFrom tibble tibble +#' @importFrom stringr str_split_fixed +#' @importFrom cli console_width +#' @importFrom lubridate seconds_to_period +#' @importFrom utils capture.output +#' @examples +#' plan(future::sequential) +#' p <- assignmentParameters('FIE-HRMS') +#' +#' assignments <- assignMFs(feature_data,p) +#' +#' @export + +setGeneric('assignMFs',function(feature_data, + parameters = assignmentParameters('FIE-HRMS'), + verbose = TRUE, + ...) + standardGeneric('assignMFs') +) + +#' @rdname assign + +setMethod('assignMFs',signature = 'tbl_df', + function(feature_data, + parameters = assignmentParameters('FIE-HRMS'), + verbose = TRUE) { + + if (verbose == TRUE) { + startTime <- proc.time() + message(blue('\nassignments '),red(str_c('v',packageVersion('assignments') %>% as.character())),' ',date()) + message(rep('_',console_width())) + params <- parameters %>% + {capture.output(print(.))} %>% + {.[-1]} %>% + { + .[1] <- yellow(.[1]) + . + } %>% + str_c(collapse = '\n') + message(params) + message(rep('_',console_width()),'\n') + message('No. m/z:\t',ncol(feature_data),'\n') + } + + assignment <- new('Assignment', + parameters, + data = feature_data, + log = list(verbose = verbose)) + + assignment <- calcCorrelations(assignment) + assignment <- calcRelationships(assignment) + assignment <- addIsoAssign(assignment) + assignment <- transformationAssign(assignment) + + if (verbose == TRUE) { + endTime <- proc.time() + elapsed <- {endTime - startTime} %>% + .[3] %>% + round(1) %>% + seconds_to_period() %>% + str_c('[',.,']') + message(rep('_',console_width())) + message('\n',green('Complete! '),elapsed,'\n') + } + + return(assignment) +}) + +#' @rdname assign +#' @importFrom metabolyseR dat + +setMethod('assignMFs',signature = 'AnalysisData', + function(feature_data, + parameters = assignmentParameters('FIE'), + verbose = TRUE){ + feature_data %>% + dat() %>% + assignMFs(parameters = parameters, + verbose = verbose) + }) + +#' @rdname assign +#' @importFrom metabolyseR raw preTreated + +setMethod('assignMFs',signature = 'Analysis', + function(feature_data, + parameters = assignmentParameters('FIE'), + verbose = TRUE, + type = c('pre-treated','raw')){ + + type <- match.arg(type, + choices = c('pre-treated','raw')) + + if (type == 'raw') feature_data <- raw(feature_data) + if (type == 'pre-treated') feature_data <- preTreated(feature_data) + + feature_data %>% + assignMFs(parameters = parameters, + verbose = verbose) + }) diff --git a/R/assignMFs.R b/R/assignMFs.R deleted file mode 100644 index a77713f..0000000 --- a/R/assignMFs.R +++ /dev/null @@ -1,59 +0,0 @@ -#' assignMFs -#' @description assign molecular formulas to a set of given m/z. -#' @param dat tibble containing the peak intensities of m/z for which to assign molecular formulas -#' @param parameters an S4 object of class AssignmentParamters containing the parameters for molecular formula assignment -#' @param verbose should output be printed to the console -#' @importFrom tibble tibble -#' @importFrom stringr str_split_fixed -#' @importFrom cli console_width -#' @importFrom lubridate seconds_to_period -#' @importFrom utils capture.output -#' @examples -#' plan(future::sequential) -#' p <- assignmentParameters('FIE') -#' -#' assignment <- assignMFs(peakData,p) -#' -#' @export - -assignMFs <- function(dat,parameters,verbose = TRUE) { - options(digits = 10) - - if (verbose == TRUE) { - startTime <- proc.time() - message(blue('\nMFassign '),red(str_c('v',packageVersion('MFassign') %>% as.character())),' ',date()) - message(rep('_',console_width())) - params <- parameters %>% - {capture.output(print(.))} %>% - {.[-1]} %>% - { - .[1] <- yellow(.[1]) - . - } %>% - str_c(collapse = '\n') - message(params) - message(rep('_',console_width()),'\n') - message('No. m/z:\t',ncol(dat),'\n') - } - - assignment <- new('Assignment', - data = dat, - parameters = parameters) - assignment@log$verbose <- verbose - - assignment <- assignment %>% - doAssignment() - - if (verbose == T) { - endTime <- proc.time() - elapsed <- {endTime - startTime} %>% - .[3] %>% - round(1) %>% - seconds_to_period() %>% - str_c('[',.,']') - message(rep('_',console_width())) - message('\n',green('Complete! '),elapsed,'\n') - } - - return(assignment) -} \ No newline at end of file diff --git a/R/assignMethods.R b/R/assignMethods.R deleted file mode 100644 index d192ab4..0000000 --- a/R/assignMethods.R +++ /dev/null @@ -1,15 +0,0 @@ - -assignMethods <- function(method = NULL) { - methods <- list( - FIE = FIEassignment, - `RP-LC` = LCassignment, - `NP-LC` = LCassignment - ) - - if (is.null(method)) { - method <- methods - } else { - method <- methods[[method]] - } - return(method) -} \ No newline at end of file diff --git a/R/assignment.R b/R/assignment.R new file mode 100644 index 0000000..b986106 --- /dev/null +++ b/R/assignment.R @@ -0,0 +1,447 @@ +#' Assignment +#' @rdname Assignment-class +#' @description An S4 class to store molecular formula assignment results. +#' @slot log a list containing assignment logs +#' @slot data a tibble containing the *m/z* peak intensity matrix +#' @slot correlations a tibble containing the correlations analysis results +#' @slot relationships a tibble containing the calculated mathematical relationships +#' @slot addIsoAssign a list containing the results of the adduct and isotope assignment iterations +#' @slot transAssign a list containing the results of the transformation assignment iterationst +#' @slot assignments a tibble containing the assigned molecular formulas + +setClass('Assignment', + contains = 'AssignmentParameters', + slots = list( + log = 'list', + data = 'tbl_df', + correlations = 'tbl_df', + relationships = 'tbl_df', + addIsoAssign = 'list', + transAssign = 'list', + assignments = 'tbl_df' + ), + prototype = list( + log = list(date = date(),verbose = TRUE), + data = tibble(), + correlations = tibble(), + relationships = tibble(), + addIsoAssign = list(), + transAssign = list(), + assignments = tibble() + )) + +#' @importFrom crayon blue red green +#' @importFrom purrr map_dbl +#' @importFrom utils packageVersion +#' @importFrom igraph E + +setMethod('show',signature = 'Assignment', + function(object){ + cat(blue('\nassignments'),red(str_c('v',packageVersion('assignments') %>% as.character())),'\n') + cat(yellow('Assignment:'),'\n') + cat('\t','Features:\t\t',ncol(object@data),'\n') + cat('\t','Correlations:\t\t',nrow(object@correlations),'\n') + cat('\t','Relationships:\t\t',nrow(relationships(object)),'\n') + cat('\n') + if (length(object@addIsoAssign) > 0) { + addIsoAssigned <- object %>% + assignments() %>% + filter(str_detect(Iteration,'A&I')) + cat('\t',green('Adduct & isotope assignment:'),'\n') + cat('\t\t','Iterations:\t',length(object@addIsoAssign),'\n') + cat('\t\t', + 'MFs:\t\t', + addIsoAssigned %>% + select(MF) %>% + distinct() %>% + nrow(), + '\n') + cat('\t\t','Assigned:\t',nrow(addIsoAssigned),'\n') + cat('\n') + } + if (length(object@transAssign) > 0) { + transAssign <- object %>% + assignments() %>% + filter(str_detect(Iteration,'T')) + cat('\t',green('Transformation assignment:'),'\n') + cat('\t\t','Iterations:\t',length(object@transAssign),'\n') + cat('\t\t', + 'MFs:\t\t', + transAssign %>% + select(MF) %>% + distinct() %>% + nrow(), + '\n') + cat('\t\t','Assigned:\t',nrow(transAssign),'\n') + cat('\n') + } + if (nrow(object@assignments) > 0) { + cat('\t','Total assignments:\t',blue(nrow(object@assignments)), + blue(str_c('(',round(nrow(object@assignments)/ncol(object@data) * 100),'%)')), + '\n') + cat('\t','Unique MFs:\t\t',blue(length(unique(object@assignments$MF))),'\n') + cat('\n') + } + } +) + +#' Assignment accessors +#' @rdname accessors +#' @description Access methods for `Assignment` S4 class +#' @param assignment S4 object of class Assignment +#' @param iteration the assignment iteration +#' @param type the graph type to return. `filtered` returns the assignment graph after component selection. `all` returns all assignment components. +#' @param component component number to extract +#' @param feature feature information to extract +#' @details +#' * `featureData` - Return the initially specifed *m/z* feature data. +#' * `correlations` - Return the correlation analysis results. +#' * `relationships` - Return the calculated relationships. +#' * `iterations` - Return the assignment iteration performed. +#' * `graph` - Return a selected graph. +#' * `components` - Return the component information for an assignment iteration. +#' * `featureComponents` - Return the component information for a selected feature. +#' * `component` - Extract a component graph. +#' * `assignments` - Return the molecular formulas assigned to the *m/z* features. +#' * `assignedData` - Return the *m/z* peak intensity matrix with the molecular formula assignments included in the column names. +#' * `summariseAssignments` - Return a tibble of the assignments summarised by molecular formula. +#' @return A tibble or `tbl_graph` containing assignment results depending on the method used. +#' @examples +#' plan(future::sequential) +#' p <- assignmentParameters('FIE-HRMS') +#' +#' mf_assignments <- assignMFs(feature_data,p) +#' +#' ## Return feature data +#' featureData(mf_assignments) +#' +#' ## Return correlations +#' correlations(mf_assignments) +#' +#' ## Return relationships +#' relationships(mf_assignments) +#' +#' ## Return the available iterations +#' iterations(mf_assignments) +#' +#' ## Return a selected graph +#' graph(mf_assignments,'A&I1') +#' +#' ## Return a component information for a selected graph +#' components(mf_assignments,'A&I1') +#' +#' ## Return a component information for a selected feature +#' featureComponents(mf_assignments,'n191.01962') +#' +#' ## Extract a component graph +#' component(mf_assignments,1,'A&I1') +#' +#' ## Return assignments +#' assignments(mf_assignments) +#' +#' ## Return an m/z intensity matrix with the assignments included +#' ## in the column names +#' assignedData(mf_assignments) +#' +#' ## Return the assignments summarised by molecular formula +#' summariseAssignments(mf_assignments) +#' @export + +setGeneric('featureData',function(assignment) + standardGeneric('featureData')) + +#' @rdname accessors + +setMethod('featureData', signature = 'Assignment', + function(assignment){ + assignment@data + }) + +#' @rdname accessors +#' @export + +setGeneric('correlations',function(assignment) + standardGeneric('correlations')) + +#' @rdname accessors + +setMethod('correlations',signature = 'Assignment', + function(assignment){ + assignment@correlations + }) + +#' @rdname accessors +#' @export + +setGeneric('relationships',function(assignment) + standardGeneric('relationships')) + +#' @rdname accessors + +setMethod('relationships',signature = 'Assignment', + function(assignment){ + assignment@relationships + }) + +setGeneric('relationships<-',function(assignment,value) + standardGeneric('relationships<-')) + +setMethod('relationships<-',signature = 'Assignment', + function(assignment,value){ + assignment@relationships <- value + return(assignment) + }) + +#' @rdname accessors +#' @export + +setGeneric('iterations',function(assignment) + standardGeneric('iterations')) + +#' @rdname accessors + +setMethod('iterations',signature = 'Assignment', + function(assignment){ + c(names(assignment@addIsoAssign), + names(assignment@transAssign)) + }) + +#' @rdname accessors +#' @export + +setGeneric('graph',function(assignment, + iteration, + type = c('selected','all')) + standardGeneric('graph')) + +#' @rdname accessors + +setMethod('graph',signature = 'Assignment', + function(assignment, + iteration, + type = c('selected','all')){ + + if (!iteration %in% iterations(assignment)) { + iters <- assignment %>% + iterations() %>% + paste0('"',.,'"') %>% + paste(collapse = ', ') + stop(paste0('Iteration not recognised. Argument `iteration` should be one of ',iters), + call. = FALSE) + } + + type <- match.arg(type, + choices = c('selected','all')) + + assignment_iteration <- switch( + str_remove(iteration,'[1-9]'), + `A&I` = assignment@addIsoAssign, + `T` = assignment@transAssign) + + graph <- switch(type, + selected = assignment_iteration[[iteration]]$filtered_graph, + all = assignment_iteration[[iteration]]$graph) + + return(graph) + }) + +#' @rdname accessors +#' @export + +setGeneric('components',function(assignment, + iteration, + type = c('selected','all')) + standardGeneric('components')) + +#' @rdname accessors + +setMethod('components',signature = 'Assignment', + function(assignment, + iteration, + type = c('selected','all')){ + + selected_graph <- graph(assignment,iteration,type) %>% + nodes() %>% + select(Component:`Component Plausibility`) %>% + distinct() + + return(selected_graph) + }) + +#' @rdname accessors +#' @export + +setGeneric('featureComponents',function(assignment, + feature, + type = c('selected','all')) + standardGeneric('featureComponents')) + +#' @rdname accessors + +setMethod('featureComponents',signature = 'Assignment', + function(assignment, + feature, + type = c('selected','all')){ + + available_iterations <- iterations(assignment) + + available_iterations %>% + map(graph,assignment = assignment,type = type) %>% + set_names(available_iterations) %>% + map_dfr(nodes,.id = 'Iteration') %>% + filter(Feature == feature) + }) + +#' @rdname accessors +#' @export + +setGeneric('component',function(assignment, + component, + iteration, + type = c('selected','all')) + standardGeneric('component')) + +#' @rdname accessors + +setMethod('component',signature = 'Assignment', + function(assignment, + component, + iteration, + type = c('selected','all')){ + + iteration_components <- components(assignment, + iteration, + type) + + if (!component %in% iteration_components$Component){ + stop(paste0('Component ',component, ' not found in iteration ',iteration,'.')) + } + + graph(assignment,iteration,type) %>% + filter(Component == component) + + }) + +#' @rdname accessors +#' @export + +setGeneric('assignments',function(assignment) + standardGeneric('assignments')) + +#' @rdname accessors + +setMethod('assignments',signature = 'Assignment', + function(assignment){ + assignment@assignments + }) + +#' @rdname accessors +#' @export + +setGeneric('assignedData',function(assignment) + standardGeneric('assignedData')) + +#' @rdname accessors + +setMethod('assignedData', signature = 'Assignment', + function(assignment){ + + d <- assignment %>% + featureData() + + assignedFeats <- assignment %>% + assignments() %>% + select(Feature,Name) + + assignedFeats <- left_join( + tibble(Feature = d %>% colnames()), + assignedFeats, + by = "Feature") + + assignedFeats$Name[is.na(assignedFeats$Name)] <- assignedFeats$Feature[is.na(assignedFeats$Name)] + + assignedFeats <- assignedFeats %>% + filter(!duplicated(Feature)) + + colnames(d) <- assignedFeats$Name + + return(d) + }) + +#' @rdname accessors +#' @importFrom dplyr desc +#' @export + +setGeneric('summariseAssignments',function(assignment) + standardGeneric('summariseAssignments')) + +#' @rdname accessors + +setMethod('summariseAssignments',signature = 'Assignment', + function(assignment){ + assigned <- assignment %>% + assignments() %>% + split(.$MF) %>% + map(~{ + d <- . + d$Isotope[is.na(d$Isotope)] <- '' + d <- d %>% + mutate(IIP = str_c(Isotope,Adduct,sep = ' ')) %>% + arrange(`Measured m/z`) + tibble(MF = d$MF[1],Features = str_c(d$Feature,collapse = '; '),`Isotopes & Ionisation Products` = str_c(d$IIP,collapse = '; '),Count = nrow(d)) + }) %>% + bind_rows() %>% + arrange(desc(Count)) + return(assigned) + }) + +#' Create an Assignment S4 class object +#' @rdname assignment +#' @description Constructor methods for creating an object of S4 class `Assignment`. +#' @param feature_data a tibble or an object of S4 class `AnalysisData` or `Analysis` containing the feature intensity matrix of m/z for which to assign molecular formulas. See details. +#' @param parameters an S4 object of class `AssignmentParamters` containing the parameters for molecular formula assignment +#' @param type type `pre-treated` or `raw` data on which to perform assignment when argument `feature_data` is of class `Analysis` +#' @param ... arguments to pass to the relevant method +#' @return An object of S4 class `Assignment`. +#' @examples +#' mf_assignments <- assignment(feature_data,assignmentParameters('FIE-HRMS')) +#' mf_assignments +#' @export + +setGeneric('assignment',function(feature_data,parameters,...) + standardGeneric('assignment')) + +#' @rdname assignment + +setMethod('assignment',signature = c('tbl_df','AssignmentParameters'), + function(feature_data,parameters){ + new('Assignment', + parameters, + data = feature_data) + }) + + +#' @rdname assignment + +setMethod('assignment',signature = c('AnalysisData','AssignmentParameters'), + function(feature_data,parameters){ + new('Assignment', + parameters, + data = feature_data%>% + dat()) + }) + +#' @rdname assignment + +setMethod('assignment',signature = c('Analysis','AssignmentParameters'), + function(feature_data,parameters,type = c('pre-treated','raw')){ + + type <- match.arg(type, + choices = c('pre-treated','raw')) + + if (type == 'raw') feature_data <- raw(feature_data) + if (type == 'pre-treated') feature_data <- preTreated(feature_data) + + assignment(feature_data, + parameters) + }) \ No newline at end of file diff --git a/R/assignmentParameters.R b/R/assignmentParameters.R deleted file mode 100644 index 2ea315e..0000000 --- a/R/assignmentParameters.R +++ /dev/null @@ -1,32 +0,0 @@ -#' annotationParameters -#' @description Return assignment parameters for a specified technique. -#' @param technique technique to use for assignment. \code{NULL} prints available techniques -#' @importFrom parallel detectCores -#' @importFrom methods new -#' @export - -assignmentParameters <- function(technique = NULL){ - availTechniques <- c('FIE','RP-LC','NP-LC') - if (is.null(technique)) { - cat('\nAvailable Techniques:',str_c('\n\t\t\t',str_c(availTechniques,collapse = '\n\t\t\t'),'\n')) - p <- NULL - } else { - - if (technique == 'FIE') { - p <- new('AssignmentParameters') - } - if (technique == 'RP-LC') { - p <- new('AssignmentParameters', - technique = 'RP-LC', - RTwindow = 1/60 - ) - } - if (technique == 'NP-LC') { - p <- new('AssignmentParameters', - technique = 'NP-LC', - RTwindow = 1/60 - ) - } - } - return(p) -} \ No newline at end of file diff --git a/R/assignments.R b/R/assignments.R new file mode 100644 index 0000000..6e0c8ff --- /dev/null +++ b/R/assignments.R @@ -0,0 +1,17 @@ + +globalVariables(c( + 'Mass','.','Feature1','Feature2','Feature','Isotope','Adduct', + 'Measured m/z','Isotope1','Adduct1','Isotope2','Adduct2','MF1', + 'MF2','Nodes','AddIsoScore','RetentionTime','MF Plausibility (%)', + 'coefficient','EdgeWeight1','EdgeWeight2','Degree','Measured M', + 'Cluster', 'Average AddIsoScore','Transformation1','Transformation2', + 'log2IntensityRatio','m/z1','m/z2','RetentionTime1','RetentionTime2', + 'mz','Theoretical M','Theoretical m/z','PPM error','name','Mode', + 'V1','V2','Mode1','Mode2','ID','Adducts','Isotopes','Transformations', + 'Error','Count','TransformedMF1', 'TransformedMF2','Component Plausibility', + 'absPPM','Name1','Name2','Size','AverageAddIsoScore','nodes','rtDiff', + 'Component','Weight','AIS','Name','Assigned','Intensity','Label', + 'Relative Abundance','m/z','Group','N','adduct_rank','isotope_rank', + 'M','total','MF','RetentionTimeDiff','Adduct_Score','Isotope_Score', + 'Iteration','border','selected' +)) diff --git a/R/calcComponents.R b/R/calcComponents.R deleted file mode 100644 index d3c8197..0000000 --- a/R/calcComponents.R +++ /dev/null @@ -1,44 +0,0 @@ -#' @importFrom tidygraph as_tbl_graph activate morph unmorph graph_size group_components tbl_graph -#' @importFrom magrittr set_names - -calcComponents <- function(MFs,rel,parameters) { - no <- MFs - - ed <- rel - - graph <- as_tbl_graph(ed,directed = F) %>% - activate(nodes) %>% - left_join(no,by = c('name' = 'Name')) %>% - mutate(Component = group_components()) - - comp <- graph %>% - nodes() %>% - .$Component %>% - unique() - - weights <- comp %>% - future_map(~{ - graph %>% - filter(Component == .x) %>% - edges() %>% - .$r %>% - mean() %>% - tibble(Weight = .) - },graph = graph) %>% - set_names(comp) %>% - bind_rows(.id = 'Component') %>% - mutate(Component = as.numeric(Component)) - - graph <- graph %>% - left_join(weights,by = 'Component') %>% - morph(to_components) %>% - mutate(Size = graph_size(), - Nodes = n(), - Density = (2 * Size) / (Nodes * (Nodes - 1)), - Weight = sum(Weight) / Nodes, - AIS = sum(AddIsoScore) / Nodes, - Plausibility = AIS * Size * Weight) %>% - unmorph() - - return(graph) -} \ No newline at end of file diff --git a/R/calcCorrelations-method.R b/R/calcCorrelations-method.R deleted file mode 100644 index 16bf7d9..0000000 --- a/R/calcCorrelations-method.R +++ /dev/null @@ -1,69 +0,0 @@ -#' @importFrom metabolyseR analysisParameters metabolyse analysisResults keepFeatures analysisData dat sinfo -#' @importFrom magrittr set_rownames -#' @importFrom stats cutree dist hclust - -setMethod('calcCorrelations',signature = 'Assignment',function(assignment){ - if (assignment@log$verbose == T) { - startTime <- proc.time() - message(blue('Calculating correlations '),cli::symbol$continue,'\r',appendLF = FALSE) - } - - p <- analysisParameters('correlations') - p@correlations <- assignment@parameters@correlations - - if (str_detect(assignment@parameters@technique,'LC')) { - feat <- tibble(Feature = colnames(assignment@data)) %>% - mutate(RT = str_split_fixed(Feature,'@',2)[,2] %>% - as.numeric()) - RTgroups <- feat %>% - data.frame() %>% - set_rownames(.$Feature) %>% - select(-Feature) %>% - dist() %>% - hclust() %>% - cutree(h = assignment@parameters@RTwindow) %>% - {tibble(Feature = names(.),Group = .)} - - RTsum <- RTgroups %>% - group_by(Group) %>% - summarise(N = n()) - - RTgroups <- RTgroups %>% - filter(Group %in% {RTsum %>% - filter(N > 1) %>% - .$Group}) - - cors <- RTgroups %>% - split(.$Group) %>% - future_map(~{ - analysisData(assignment@data,tibble(ID = 1:nrow(assignment@data))) %>% - keepFeatures(features = .x$Feature) %>% - {metabolyse(dat(.),sinfo(.),p,verbose = FALSE)} %>% - analysisResults(element = 'correlations') - }) %>% - bind_rows(.id = 'RT Group') - - } else { - cors <- metabolyse(assignment@data, - tibble(ID = 1:nrow(assignment@data)), - p, - verbose = FALSE) %>% - analysisResults(element = 'correlations') - } - - assignment@correlations <- cors - - if (assignment@log$verbose == T) { - endTime <- proc.time() - elapsed <- {endTime - startTime} %>% - .[3] %>% - round(1) %>% - seconds_to_period() %>% - str_c('[',.,']') - ncors <- nrow(assignment@correlations) %>% - str_c('[',.,' correlations',']') - message(blue('Calculating correlations '),'\t',green(cli::symbol$tick),' ',ncors,' ',elapsed) - } - - return(assignment) -}) \ No newline at end of file diff --git a/R/components.R b/R/components.R new file mode 100644 index 0000000..6290209 --- /dev/null +++ b/R/components.R @@ -0,0 +1,189 @@ + +avg_degree <- function(n_nodes,n_edges){ + 2 * (n_edges / n_nodes) +} + +plausibility <- function(degree,AIS,weight){ + degree * AIS * weight +} + +#' @importFrom purrr compact +#' @importFrom tidygraph bind_graphs + +clean <- function(graph,adduct_rules_table){ + cleaned_graph <- graph %>% + morph(to_components) %>% + furrr::future_map(~{ + component_adducts <- .x %>% + nodes() %>% + .$Adduct %>% + unique() + + adduct_info <- adduct_rules_table %>% + select(Adduct = Name, + adduct_isotopic = Isotopic) + + component_nodes <- .x %>% + nodes() %>% + left_join(adduct_info, + by = "Adduct") %>% + mutate(isotopic = !is.na(Isotope), + adduct_isotopic = as.logical(adduct_isotopic), + either_isotopic = adduct_isotopic | isotopic) + + if (all(component_nodes$isotopic) == TRUE | + all(component_nodes$adduct_isotopic) == TRUE | + all(component_nodes$either_isotopic) == TRUE | + nrow(component_nodes) < 2) NULL + else return(.x) + }) %>% + compact() + + if (length(cleaned_graph) == 0){ + cleaned_graph <- graph %>% + slice(0) + } + + if (length(cleaned_graph) > 0){ + cleaned_graph <- cleaned_graph %>% + bind_graphs() + } + + return(cleaned_graph) +} + +nComponents <- function(graph){ + graph %>% + morph(to_components) %>% + length() +} + +componentMetrics <- function(component,max_add_iso_total){ + component %>% + mutate(Size = graph_size(), + Nodes = n(), + Degree = avg_degree(Nodes,Size), + Density = (2 * Size) / (Nodes * (Nodes - 1)), + Weight = sum(Weight) / Nodes, + AIS = sum(AIS) / max_add_iso_total, + `Component Plausibility` = plausibility(Degree,AIS,Weight) + ) +} + +componentFilters <- function(){ + tibble(Measure = c('Component Plausibility', + 'MF Plausibility (%)', + 'PPM error'), + Direction = c(rep('max',2),'min')) +} + +#' @importFrom tidygraph as_tbl_graph activate morph unmorph graph_size group_components tbl_graph +#' @importFrom magrittr set_names + +calcComponents <- function(graph_nodes, + graph_edges, + assignment) { + + graph <- as_tbl_graph(graph_edges,directed = FALSE) %>% + activate(nodes) %>% + left_join(graph_nodes,by = c('name' = 'Name')) %>% + mutate(Component = group_components()) %>% + clean(adductRules(assignment)) + + if (nComponents(graph) > 0){ + comp <- graph %>% + nodes() %>% + .$Component %>% + unique() + + weights <- comp %>% + future_map(~{ + graph %>% + filter(Component == .x) %>% + edges() %>% + .$coefficient %>% + mean() %>% + tibble(Weight = .) + },graph = graph) %>% + set_names(comp) %>% + bind_rows(.id = 'Component') %>% + mutate(Component = as.numeric(Component)) + + graph <- graph %>% + left_join(weights,by = 'Component') %>% + morph(to_components) %>% + componentMetrics(max_add_iso_total = maxAIS(assignment)) %>% + unmorph() + } + + return(graph) +} + +#' @importFrom tidygraph to_components +#' @importFrom dplyr n + +recalcComponents <- function(graph, + assignment){ + + graph <- graph %>% + clean(adductRules(assignment)) + + if (nComponents(graph) > 0){ + g <- graph %>% + activate(nodes) + + comp <- g %>% + nodes() %>% + .$Component %>% + unique() + + weights <- comp %>% + future_map(~{ + graph %>% + filter(Component == .x) %>% + edges() %>% + .$coefficient %>% + mean() %>% + tibble(Weight = .) + },graph = g) %>% + set_names(comp) %>% + bind_rows(.id = 'Component') %>% + mutate(Component = as.numeric(Component)) + + graph <- g %>% + select(-Weight) %>% + left_join(weights,by = 'Component') %>% + morph(to_components) %>% + componentMetrics(max_add_iso_total = maxAIS(assignment)) %>% + unmorph() + } + + return(graph) +} + +#' @importFrom igraph degree + +filterComponents <- function(graph, + assignment, + filters = componentFilters()){ + filtered_graph <- graph + + for (i in 1:nrow(filters)) { + f <- filters[i,] + filtered_graph <- filtered_graph %>% + activate(nodes) %>% + filter(name %in% {filtered_graph %>% + nodes() %>% + eliminate(f$Measure,f$Direction) %>% + .$name}) %>% + filter(degree(.) != 0) + if (E(filtered_graph) %>% length() > 0) { + filtered_graph <- filtered_graph %>% + recalcComponents(assignment) + } else { + break() + } + } + + return(filtered_graph) +} diff --git a/R/continueAssignment.R b/R/continueAssignment.R deleted file mode 100644 index afeb391..0000000 --- a/R/continueAssignment.R +++ /dev/null @@ -1,8 +0,0 @@ -#' continueAssignment -#' @description continue a failed assignment -#' @param assignment an S4 object of class Assignment -#' @export - -continueAssignment <- function(assignment){ - doAssignment(assignment) -} \ No newline at end of file diff --git a/R/correlations.R b/R/correlations.R new file mode 100644 index 0000000..f5ec97e --- /dev/null +++ b/R/correlations.R @@ -0,0 +1,116 @@ +#' Molecular formula assignment methods +#' @rdname assignment-methods +#' @description These methods provide access to performing the individual steps of the molecular +#' formula assignment approach. See Details for more information of when it is best to use these +#' instead of `assignMFs()`. +#' @param assignment an object of S4 class `Assignment` +#' @details +#' In circumstances where the molecular formula assignment approach has high memory requirements, +#' such as where there are many correlations (> 2 million) or many high *m/z* (>700), it may be +#' preferable to perform the assignment steps separately as opposed to using `assignMFs()`. This +#' can reduce the memory overheads required to successfully assign molecular formulas to the data +#' and also enable the possibility of objects to be saved and/or unloaded between the assignment +#' steps where needed. +#' @return An object of S4 class `Assignment` containing molecular formula assignments. +#' @examples +#' plan(future::sequential) +#' p <- assignmentParameters('FIE-HRMS') +#' +#' mf_assignments <- assignment(feature_data,p) +#' +#' mf_assignments <- mf_assignments %>% +#' calcCorrelations() %>% +#' calcRelationships() %>% +#' addIsoAssign() %>% +#' transformationAssign() +#' +#' mf_assignments +#' @export + +setGeneric('calcCorrelations', function(assignment) + standardGeneric('calcCorrelations')) + +#' @rdname assignment-methods +#' @importFrom metabolyseR analysisParameters metabolyse analysisResults + +setMethod('calcCorrelations',signature = 'Assignment',function(assignment){ + if (assignment@log$verbose == TRUE) { + startTime <- proc.time() + message(blue('Calculating correlations '),cli::symbol$continue,'\r',appendLF = FALSE) + } + + invisible(gc()) + + p <- analysisParameters('correlations') + parameters <- as(assignment,'AssignmentParameters') + + p@correlations <- parameters@correlations_parameters[c('method', + 'pAdjustMethod', + 'corPvalue', + 'minCoef', + 'maxCor')] + + assignment@correlations <- metabolyse(assignment@data, + tibble(ID = 1:nrow(assignment@data)), + p, + verbose = FALSE) %>% + analysisResults(element = 'correlations') + + assignment <- assignment %>% + prepCorrelations() + + if (assignment@log$verbose == TRUE) { + endTime <- proc.time() + elapsed <- {endTime - startTime} %>% + .[3] %>% + round(1) %>% + seconds_to_period() %>% + str_c('[',.,']') + ncors <- nrow(assignment@correlations) %>% + str_c('[',.,' correlations',']') + message(blue('Calculating correlations '),'\t',green(cli::symbol$tick),' ',ncors,' ',elapsed) + } + + return(assignment) +}) + +setGeneric('prepCorrelations', function(assignment) + standardGeneric('prepCorrelations')) + +setMethod('prepCorrelations',signature = 'Assignment', + function(assignment){ + + correlations <- assignment@correlations + + correlations <- correlations %>% + mutate(Mode1 = str_split_fixed(Feature1,'@',2) %>% + .[,1] %>% + str_sub(1,1), + Mode2 = str_split_fixed(Feature2,'@',2) %>% + .[,1] %>% + str_sub(1,1), + `m/z1` = str_split_fixed(Feature1,'@',2) %>% + .[,1] %>% + str_replace_all('[:alpha:]','') %>% + as.numeric(), + `m/z2` = str_split_fixed(Feature2,'@',2) %>% + .[,1] %>% + str_replace_all('[:alpha:]','') %>% + as.numeric(), + RetentionTime1 = str_split_fixed(Feature1,'@',2) %>% + .[,2] %>% + as.numeric(), + RetentionTime2 = str_split_fixed(Feature2,'@',2) %>% + .[,2] %>% + as.numeric(), + RetentionTimeDiff = abs(RetentionTime1 - RetentionTime2), + ID = 1:nrow(.) + ) %>% + select(Feature1,Feature2, + Mode1:RetentionTimeDiff, + log2IntensityRatio,coefficient,ID) + + assignment@correlations <- correlations + + return(assignment) + }) diff --git a/R/doAssignment-method.R b/R/doAssignment-method.R deleted file mode 100644 index e03fc9e..0000000 --- a/R/doAssignment-method.R +++ /dev/null @@ -1,16 +0,0 @@ - -setMethod('doAssignment',signature = 'Assignment', - function(assignment){ - assignmentMethod <- assignMethods(assignment@parameters@technique) - - elements <- names(assignmentMethod()) - elements <- elements[!(elements %in% assignment@flags)] - - for(i in elements){ - method <- assignmentMethod(i) - assignment <- method(assignment) - assignment@flags <- c(assignment@flags,i) - } - - return(assignment) - }) \ No newline at end of file diff --git a/R/eliminate.R b/R/eliminate.R deleted file mode 100644 index 69520e8..0000000 --- a/R/eliminate.R +++ /dev/null @@ -1,15 +0,0 @@ -#' @importFrom dplyr bind_cols - -eliminate <- function(MFs,by,direction){ - MFs %>% - bind_cols(MFs %>% select(by = by)) %>% - split(.$Feature) %>% - map(~{ - d <- . - direct <- get(direction) - d %>% - filter(by == direct(by)) - }) %>% - bind_rows() %>% - select(-by) -} \ No newline at end of file diff --git a/R/feature_data.R b/R/feature_data.R new file mode 100644 index 0000000..f17d3b9 --- /dev/null +++ b/R/feature_data.R @@ -0,0 +1,5 @@ +#' Example feature data +#' @description An example `m/z` peak intensity matrix containing total ion count normalised positive and negative mode flow infusion electrospray ionisation mass spectrometry *m/z* features. +#' @format A tibble containing 60 rows and 10 variables. + +'feature_data' \ No newline at end of file diff --git a/R/filterCorrelations-method.R b/R/filterCorrelations-method.R deleted file mode 100644 index 6fda1cb..0000000 --- a/R/filterCorrelations-method.R +++ /dev/null @@ -1,55 +0,0 @@ - -filterCors <- function(correlations, rthresh = 0.7, n = 100000, rIncrement = 0.01, nIncrement = 20000){ - filCors <- function(cors,rthresh,n){ - while (nrow(cors) > n) { - cors <- correlations %>% - filter(r > rthresh | r < -rthresh) - rthresh <- rthresh + rIncrement - } - return(cors) - } - - while (TRUE) { - cors <- filCors(correlations,rthresh,n) - if (nrow(cors) > 0) { - break() - } else { - n <- n + nIncrement - } - } - return(cors) -} - -setMethod('filterCorrelations',signature = 'Assignment',function(assignment){ - if (assignment@log$verbose == T) { - startTime <- proc.time() - message(blue('Filtering correlations '),cli::symbol$continue,'\r',appendLF = FALSE) - } - - if (str_detect(assignment@parameters@technique,'LC')) { - cors <- assignment@correlations %>% - filter(r < -(assignment@parameters@filter$rthresh) | r > assignment@parameters@filter$rthresh) - } else { - cors <- assignment@correlations %>% - filterCors(rthresh = assignment@parameters@filter$rthresh, - n = assignment@parameters@filter$n, - rIncrement = assignment@parameters@filter$rIncrement, - nIncrement = assignment@parameters@filter$nIncrement - ) - } - - assignment@preparedCorrelations <- cors - - if (assignment@log$verbose == T) { - endTime <- proc.time() - elapsed <- {endTime - startTime} %>% - .[3] %>% - round(1) %>% - seconds_to_period() %>% - str_c('[',.,']') - ncors <- nrow(assignment@preparedCorrelations) %>% - str_c('[',.,' correlations',']') - message(blue('Filtering correlations '),'\t\t',green(cli::symbol$tick),' ',ncors,' ',elapsed) - } - return(assignment) -}) \ No newline at end of file diff --git a/R/graph.R b/R/graph.R new file mode 100644 index 0000000..7a24709 --- /dev/null +++ b/R/graph.R @@ -0,0 +1,38 @@ +#' Extract graph attributes +#' @rdname graph +#' @description Extract node or edge attributes from a *tidygraph* `tbl_graph` object. +#' @param graph object of class tbl_graph +#' @examples +#' a_graph <- tidygraph::tbl_graph( +#' nodes = data.frame( +#' name = c('a','b','c') +#' ), +#' edges = data.frame( +#' from = c(1,2), +#' to = c(2,3), +#' type = c(1,2) +#' )) +#' +#' ## Extract graph nodes +#' nodes(a_graph) +#' +#' ## Extract graph edges +#' edges(a_graph) +#' @importFrom tibble as_tibble +#' @export + +nodes <- function(graph){ + graph %>% + vertex.attributes() %>% + as_tibble() +} + +#' @rdname graph +#' @importFrom igraph edge.attributes +#' @export + +edges <- function(graph){ + graph %>% + edge.attributes() %>% + as_tibble() +} diff --git a/R/internals.R b/R/internals.R new file mode 100644 index 0000000..98fe3c2 --- /dev/null +++ b/R/internals.R @@ -0,0 +1,270 @@ + +elapsedTime <- function(start_time,end_time){ + {end_time - start_time} %>% + .[3] %>% + round(1) %>% + seconds_to_period() %>% + str_c('[',.,']') +} + +#' @importFrom dplyr bind_cols all_of + +eliminate <- function(MFs,by,direction){ + direct <- get(direction) + + MFs %>% + bind_cols(MFs %>% select(by = all_of(by))) %>% + group_by(Feature) %>% + filter(by == direct(by)) %>% + select(-all_of(by)) %>% + ungroup() +} + +#' @importFrom dplyr rename +#' @importFrom purrr map_lgl + +addMFs <- function(rel,MF,identMF = T){ + + if (identMF == T) { + relations <- rel %>% + filter(Feature1 %in% MF$Feature, Feature2 %in% MF$Feature) + } else { + relations <- rel %>% + filter(Feature1 %in% MF$Feature | Feature2 %in% MF$Feature) + } + relations <- relations %>% + left_join(MF %>% + select(Feature,MF,Isotope,Adduct,`Measured m/z`),by = c('Feature1' = 'Feature')) %>% + rename(MF1 = MF) + + chr_columns <- relations %>% + map_lgl(is.character) + + relations[,chr_columns] <- relations[,chr_columns] %>% + { + .[is.na(.)] <- '' + . + } + + relations <- relations %>% + filter(Isotope1 == Isotope & Adduct1 == Adduct) %>% + select(-(Isotope:`Measured m/z`)) %>% + left_join(MF %>% + select(Feature,MF,Isotope,Adduct,`Measured m/z`),by = c('Feature2' = 'Feature')) %>% + rename(MF2 = MF) + + chr_columns <- relations %>% + map_lgl(is.character) + + relations[,chr_columns] <- relations[,chr_columns] %>% + { + .[is.na(.)] <- '' + . + } + + relations <- relations %>% + filter(Isotope2 == Isotope & Adduct2 == Adduct) %>% + select(-(Isotope:`Measured m/z`)) + + if (nrow(relations) > 0) { + relations[relations == ''] <- NA + } + + return(relations) +} + +addNames <- function(rel){ + iso <- rel + iso$Isotope1[is.na(iso$Isotope1)] <- '' + iso$Isotope2[is.na(iso$Isotope2)] <- '' + iso <- iso %>% + mutate(Name1 = str_c(Feature1,MF1,Isotope1,Adduct1,sep = ' '), + Name2 = str_c(Feature2,MF2,Isotope2,Adduct2,sep = ' ')) + rel %>% + bind_cols(iso %>% + select(Name1,Name2)) %>% + select(Name1,Name2,Feature1:MF2) +} + +collateM <- function(rel,max_M){ + bind_rows(select(rel, + mz = `m/z1`, + RetentionTime = RetentionTime1, + Isotope = Isotope1, + Adduct = Adduct1, + Feature = Feature1), + select(rel, + mz = `m/z2`, + RetentionTime = RetentionTime2, + Isotope = Isotope2, + Adduct = Adduct2, + Feature = Feature2)) %>% + distinct() %>% + arrange(mz) %>% + rowwise() %>% + group_split() %>% + furrr::future_map_dfr(~.x %>% + mutate(M = calcM(mz, + adduct = Adduct, + isotope = Isotope)) + ) %>% + arrange(M) %>% + filter(M <= max_M) +} + +collateMFs <- function(rel,MF){ + bind_rows(select(rel, + Name = Name1, + Feature = Feature1, + mz = `m/z1`, + RetentionTime = RetentionTime1, + Isotope = Isotope1, + Adduct = Adduct1, + MF = MF1), + select(rel, + Name = Name2, + Feature = Feature2, + mz = `m/z2`, + RetentionTime = RetentionTime2, + Isotope = Isotope2, + Adduct = Adduct2, + MF = MF2)) %>% + mutate(RetentionTime = as.numeric(RetentionTime)) %>% + arrange(mz) %>% + select(-mz) %>% + left_join(MF, by = c("Feature", + "RetentionTime", + "Isotope", + "Adduct", + 'MF')) %>% + distinct() %>% + mutate(ID = 1:nrow(.)) +} + +AIS <- function(assignment){ + possible_products <- expand_grid( + Adduct = adducts(assignment) %>% + flatten_chr(), + Isotope = c(NA,isotopes(assignment)) + ) + + adducts_scores <- assignment %>% + adducts() %>% + map(~tibble(Adduct = .x, + Adduct_Score = (length(.x) -1):0)) %>% + bind_rows() + + isotopes_scores <- assignment %>% + isotopes() %>% + {c(NA,.)} %>% + {tibble(Isotope = ., + Isotope_Score = (length(.) -1):0)} + + possible_products %>% + left_join(adducts_scores,by = 'Adduct') %>% + left_join(isotopes_scores,by = 'Isotope') %>% + mutate(AIS = Adduct_Score + Isotope_Score, + AIS = AIS / max(AIS)) %>% + select(-contains('Score')) +} + +#' @importFrom tidyr expand_grid +#' @importFrom purrr flatten_chr map_dfr + +maxAIS <- function(assignment){ + assignment_adducts <- adducts(assignment) + assignment_isotopes <- isotopes(assignment) + + n_adducts <- assignment %>% + adducts() %>% + flatten_chr() %>% + length() + + n_isotopes <- assignment %>% + isotopes() %>% + {c(NA,.)} %>% + length() + + max_score <- (n_adducts * n_isotopes) / 2 + + return(max_score) +} + + +#' @importFrom furrr future_map_dfr + +generateMFs <- function(M, + ppm, + rank_threshold, + adduct_rules, + isotope_rules, + AIS){ + nM <- nrow(M) + + M %>% + ungroup() %>% + slice_sample(n = nM) %>% + split(1:nrow(.)) %>% + future_map_dfr(~{ + mf <- ipMF(mz = .x$mz, + adduct = .x$Adduct, + isotope = .x$Isotope, + ppm = ppm, + adduct_rules_table = adduct_rules, + isotope_rules_table = isotope_rules) %>% + mutate(Rank = rank(100 - `Plausibility (%)`, + ties.method = 'min')) %>% + filter(Rank <= rank_threshold) + + if (nrow(mf) > 0) { + mf %>% + left_join(select(M, + Feature, + RetentionTime, + M, + mz), + by = c('Measured M' = 'M','Measured m/z' = 'mz')) %>% + rowwise() %>% + select(Feature,RetentionTime,MF,Isotope,Adduct,`Theoretical M`, + `Measured M`,`Theoretical m/z`,`Measured m/z`, `PPM error`, + `MF Plausibility (%)` = `Plausibility (%)`) %>% + left_join(AIS, + by = c('Adduct','Isotope')) + } else { + return(NULL) + } + }, + .options = furrr_options(seed = 1234)) +} + +#' @importFrom mzAnnotation transformationPossible + +sanitiseTransformations <- function(graph_edges,transformation_rules_table){ + transforms <- dplyr::bind_rows( + graph_edges %>% + filter(is.na(Transformation1)) %>% + select( + from = MF1, + to = MF2, + transformation = Transformation2), + graph_edges %>% + filter(is.na(Transformation2)) %>% + select( + from = MF2, + to = MF1, + transformation = Transformation1) + + ) + + transformation_possible <- transforms %>% + rowwise() %>% + group_split() %>% + map_lgl(~mzAnnotation::transformationPossible( + .x$from, + .x$to, + .x$transformation, + transformation_rules_table)) + + graph_edges %>% + filter(transformation_possible) +} diff --git a/R/networkComponents.R b/R/networkComponents.R deleted file mode 100644 index 5babed4..0000000 --- a/R/networkComponents.R +++ /dev/null @@ -1,4 +0,0 @@ - -networkComponents <- function(nodes,edges){ - graph <- as_tbl_graph(edges,directed = F) -} \ No newline at end of file diff --git a/R/parameters.R b/R/parameters.R new file mode 100644 index 0000000..fb86832 --- /dev/null +++ b/R/parameters.R @@ -0,0 +1,561 @@ +#' S4 class for assignment parameters +#' @rdname AssignmentParameters-class +#' @description An S4 class to store assignment parameters. +#' @slot technique the analytical technique +#' @slot correlations_parameters a list of correlation parameters to be passed to `metabolyseR::correlations()` +#' @slot max_M the maximum molecular mass for which to assign molecular formulas +#' @slot MF_rank_threshold rank threshold for molecular formula selection +#' @slot ppm the parts per million error threshold +#' @slot limit the atomic mass unit deviation limit for relationship calculation +#' @slot RT_diff_limit the limit for retention time differences for correlated features in adduct and isotopic assignment +#' @slot adducts a list of character vectors containing the adducts names. List element names should denote ionisation mode. The order that these adducts are provided denotes their expected relative importance to assignments with the first expected to be the most common and the last the least common within each ionisation mode. +#' @slot isotopes a character vector of isotopes to use. Similarly to the adducts, their order denotes the expected commonality in the data. +#' @slot transformations a character vector of transformations molecular formula changes +#' @slot adduct_rules a tibble containing the adduct formation rules as returned by `mzAnnotation::adduct_rules()` +#' @slot isotope_rules a tibble containing the isotope rules as returned by `mzAnnotation::isotope_rules()` +#' @slot transformation_rules tibble containing the transformation rules as returned by `mzAnnotation::transformation_rules()` +#' @importFrom mzAnnotation adduct_rules isotope_rules transformation_rules + +setClass('AssignmentParameters', + slots = list( + technique = 'character', + correlations_parameters = 'list', + max_M = 'numeric', + MF_rank_threshold = 'numeric', + ppm = 'numeric', + limit = 'numeric', + RT_diff_limit = 'numeric', + adducts = 'list', + isotopes = 'character', + transformations = 'character', + adduct_rules = 'tbl_df', + isotope_rules = 'tbl_df', + transformation_rules = 'tbl_df' + ), + prototype = list( + technique = 'FIE-HRMS', + correlations_parameters = list(method = 'spearman', + pAdjustMethod = 'bonferroni', + corPvalue = 0.05, + minCoef = 0.7, + maxCor = Inf), + max_M = 800, + MF_rank_threshold = 3, + ppm = 6, + limit = 0.001, + RT_diff_limit = numeric(), + isotopes = c('13C','18O','13C2'), + adducts = list(n = c("[M-H]1-", "[M+Cl]1-", "[M+K-2H]1-", + "[M-2H]2-", "[M+Cl37]1-","[2M-H]1-"), + p = c('[M+H]1+','[M+K]1+','[M+Na]1+','[M+K41]1+', + '[M+2H]2+','[2M+H]1+')), + transformations = transformation_rules()$`MF Change`, + adduct_rules = adduct_rules(), + isotope_rules = isotope_rules(), + transformation_rules = transformation_rules() + )) + +setValidity('AssignmentParameters',function(object){ + technique_correct <- technique(object) %in% availableTechniques() + + if (isFALSE(technique_correct)) { + availableTechniques() %>% + paste(collapse = ', ') %>% + paste0('Technique should be one of ',.) + } + else TRUE +}) + +setValidity('AssignmentParameters',function(object){ + + correlations_parameters <- metabolyseR::correlationsParameters() %>% + names() + + if (!any(names(object@correlations_parameters) %in% + correlations_parameters)) { + correlations_parameters %>% + paste0('`',.,'`') %>% + paste(collapse = ', ') %>% + paste0('Correlations parameters should only include ',.) + } + else TRUE +}) + +#' @importFrom methods show +#' @importFrom crayon yellow +#' @importFrom purrr map + +setMethod('show',signature = 'AssignmentParameters', + function(object){ + cat(yellow('\nAssignment Parameters:'),'\n') + cat('\n') + cat('\t','Technique:\t\t',object@technique,'\n') + cat('\t','Max M:\t\t\t',object@max_M,'\n') + cat('\t','MF rank threshold:\t',object@MF_rank_threshold,'\n') + cat('\t','PPM threshold:\t\t',object@ppm,'\n') + cat('\t','Relationship limit:\t',object@limit,'\n') + + if (object@technique != 'FIE') { + cat('\t','RT limit:\t\t',object@RT_diff_limit,'\n') + } + + + cat('\t','Correlations:\n') + correlationsParameters(object) %>% + paste0('\t\t',names(.),': ',.,'\n') %>% + cat() + + cat('\n\t','Adducts:','\n') + adducts <- map(names(object@adducts),~{ + a <- str_c(object@adducts[[.]],collapse = ', ') + str_c(.,': ',a) + }) %>% + str_c(collapse = '\n\t ') + cat('\t',adducts,'\n') + + cat('\t','Isotopes:',str_c(object@isotopes,collapse = ', '),'\n') + + cat('\t','Transformations:',str_c(object@transformations,collapse = ', ')) + + cat('\n') + } +) + +#' Parameter get and set methods +#' @rdname parameters +#' @description Get and set methods for the `AssignmentParameters` S4 class. +#' @param x S4 object of class `AssignmentParameters` +#' @param value the value to set +#' @details +#' * `technique` - Get the analytical technique. +#' * `correlationsParameters` - Get or set the correlation analysis parameters to be passed to `metabolyseR::correlations()`. +#' * `limit` - Get or set the atomic mass unit limit for relationship calculation. +#' * `maxM` - Get or set the maximum molecular mass limit for which to assign molecular formulas. +#' * `MFrankThreshold` - Get or set the molecular formula rank threshold for molecular formula selection. +#' * `ppm` - Get or set the parts per million error threshold. +#' * `isotopes` - Get or set the isotope names. The order in which these are specified denotes the expected relative commonality within the data set. +#' * `adducts` - Get or set the adduct names for the ionisation modes. The order in which these are specified denotes the expected relative commonality within the data set for each ionisation mode. +#' * `transformations` - Get or set the transformation molecular formula changes. +#' * `isotopeRules` - Get or set the isotope rules table. The format of this tibble should match that of `mzAnnotation::isotope_rules()`. +#' * `adductRules` - Get or set the adduct rules table. The format of this tibble should match that of `mzAnnotation::adduct_rules()`. +#' * `techniqueRules` - Get or set the transformation rules table. The format of this tibble should match that of `mzAnnotation::transformation_rules()`. +#' @examples +#' assignment_parameters <- assignmentParameters('FIE') +#' +#' ## Return the analytical technique +#' technique(assignment_parameters) +#' +#' ## Return correlations parameters +#' correlationsParameters(assignment_parameters) +#' +#' ## Set correlations parameters +#' correlationsParameters(assignment_parameters)$minCoef <- 0.75 +#' +#' ## Return limit +#' limit(assignment_parameters) +#' +#' ## Set limit +#' limit(assignment_parameters) <- 0.002 +#' +#' ## Return max M +#' maxM(assignment_parameters) +#' +#' ## Set max M +#' maxM(assignment_parameters) <- 500 +#' +#' ## Return MF rank threshold +#' MFrankThreshold(assignment_parameters) +#' +#' ## Set MF rank threshold +#' MFrankThreshold(assignment_parameters) <- 3 +#' +#' ## Return ppm +#' ppm(assignment_parameters) +#' +#' ## Set ppm +#' ppm(assignment_parameters) <- 3 +#' +#' ## Return isotopes +#' isotopes(assignment_parameters) +#' +#' ## Set isotopes +#' isotopes(assignment_parameters) <- '13C' +#' +#' ## Return adducts +#' adducts(assignment_parameters) +#' +#' ## Set adducts +#' adducts(assignment_parameters) <- list(n = c('[M-H]1-','[M+Cl]1-'), +#' p = c('[M+H]1+','[M+K]1+')) +#' +#' ## Return transformations +#' transformations(assignment_parameters) +#' +#' ## Set transformations +#' transformations(assignment_parameters) <- "M - [O] + [NH2]" +#' +#' ## Return adduct rules +#' adductRules(assignment_parameters) +#' +#' ## Set adduct rules +#' adductRules(assignment_parameters) <- mzAnnotation::adduct_rules() +#' +#' ## Return isotope rules +#' isotopeRules(assignment_parameters) +#' +#' ## Set isotope rules +#' isotopeRules(assignment_parameters) <- mzAnnotation::isotope_rules() +#' +#' ## Return transformation rules +#' transformationRules(assignment_parameters) +#' +#' ## Set transformation rules +#' transformationRules(assignment_parameters) <- mzAnnotation::transformation_rules() +#' @export + +setGeneric('technique',function(x) + standardGeneric('technique')) + +#' @rdname parameters + +setMethod('technique',signature = 'AssignmentParameters', + function(x){ + x@technique + }) + +#' @rdname parameters +#' @export + +setGeneric('correlationsParameters', + function(x) standardGeneric('correlationsParameters')) + +#' @rdname parameters + +setMethod('correlationsParameters',signature = 'AssignmentParameters', + function(x) x@correlations_parameters) + +#' @rdname parameters +#' @export + +setGeneric('correlationsParameters<-', + function(x,value) standardGeneric('correlationsParameters<-')) + +#' @rdname parameters +#' @importFrom methods validObject + +setMethod('correlationsParameters<-',signature = c('AssignmentParameters','list'), + function(x,value){ + x@correlations_parameters <- value + validObject(x) + return(x) + }) + +#' @rdname parameters +#' @export + +setGeneric('limit',function(x) + standardGeneric('limit')) + +#' @rdname parameters + +setMethod('limit',signature = 'AssignmentParameters', + function(x){ + x@limit + }) + +#' @rdname parameters +#' @export + +setGeneric('limit<-',function(x,value) + standardGeneric('limit<-')) + +#' @rdname parameters + +setMethod('limit<-',signature = 'AssignmentParameters', + function(x,value){ + x@limit <- value + return(x) + }) + +#' @rdname parameters +#' @export + +setGeneric('maxM',function(x) + standardGeneric('maxM')) + +#' @rdname parameters + +setMethod('maxM',signature = 'AssignmentParameters', + function(x){ + x@max_M + }) + +#' @rdname parameters +#' @export + +setGeneric('maxM<-',function(x,value) + standardGeneric('maxM<-')) + +#' @rdname parameters + +setMethod('maxM<-',signature = 'AssignmentParameters', + function(x,value){ + x@max_M <- value + return(x) + }) + +#' @rdname parameters +#' @export + +setGeneric('MFrankThreshold',function(x) + standardGeneric('MFrankThreshold')) + +#' @rdname parameters + +setMethod('MFrankThreshold',signature = 'AssignmentParameters', + function(x){ + x@MF_rank_threshold + }) + +#' @rdname parameters +#' @export + +setGeneric('MFrankThreshold<-',function(x,value) + standardGeneric('MFrankThreshold<-')) + +#' @rdname parameters + +setMethod('MFrankThreshold<-',signature = 'AssignmentParameters', + function(x,value){ + x@MF_rank_threshold <- value + return(x) + }) + +#' @rdname parameters +#' @export + +setGeneric('ppm',function(x) + standardGeneric('ppm')) + +#' @rdname parameters + +setMethod('ppm',signature = 'AssignmentParameters', + function(x){ + x@ppm + }) + +#' @rdname parameters +#' @export + +setGeneric('ppm<-',function(x,value) + standardGeneric('ppm<-')) + +#' @rdname parameters + +setMethod('ppm<-',signature = 'AssignmentParameters', + function(x,value){ + x@ppm <- value + return(x) + }) + +#' @rdname parameters +#' @export + +setGeneric('isotopes',function(x) + standardGeneric('isotopes')) + +#' @rdname parameters + +setMethod('isotopes',signature = 'AssignmentParameters', + function(x){ + x@isotopes + }) + +#' @rdname parameters +#' @export + +setGeneric('isotopes<-',function(x,value) + standardGeneric('isotopes<-')) + +#' @rdname parameters + +setMethod('isotopes<-',signature = 'AssignmentParameters', + function(x,value){ + x@isotopes <- value + return(x) + }) + +#' @rdname parameters +#' @export + +setGeneric('adducts',function(x) + standardGeneric('adducts')) + +#' @rdname parameters + +setMethod('adducts',signature = 'AssignmentParameters', + function(x){ + x@adducts + }) + +#' @rdname parameters +#' @export + +setGeneric('adducts<-',function(x,value) + standardGeneric('adducts<-')) + +#' @rdname parameters + +setMethod('adducts<-',signature = 'AssignmentParameters', + function(x,value){ + x@adducts <- value + return(x) + }) + +#' @rdname parameters +#' @export + +setGeneric('transformations',function(x) + standardGeneric('transformations')) + +#' @rdname parameters + +setMethod('transformations',signature = 'AssignmentParameters', + function(x){ + x@transformations + }) + +#' @rdname parameters +#' @export + +setGeneric('transformations<-',function(x,value) + standardGeneric('transformations<-')) + +#' @rdname parameters + +setMethod('transformations<-',signature = 'AssignmentParameters', + function(x,value){ + x@transformations <- value + return(x) + }) + +#' @rdname parameters +#' @export + +setGeneric('adductRules',function(x) + standardGeneric('adductRules')) + +#' @rdname parameters + +setMethod('adductRules',signature = 'AssignmentParameters', + function(x){ + x@adduct_rules + }) + +#' @rdname parameters +#' @export + +setGeneric('adductRules<-',function(x,value) + standardGeneric('adductRules<-')) + +#' @rdname parameters + +setMethod('adductRules<-',signature = 'AssignmentParameters', + function(x,value){ + x@adduct_rules <- value + return(x) + }) + +#' @rdname parameters +#' @export + +setGeneric('isotopeRules',function(x) + standardGeneric('isotopeRules')) + +#' @rdname parameters + +setMethod('isotopeRules',signature = 'AssignmentParameters', + function(x){ + x@isotope_rules + }) + +#' @rdname parameters +#' @export + +setGeneric('isotopeRules<-',function(x,value) + standardGeneric('isotopeRules<-')) + +#' @rdname parameters + +setMethod('isotopeRules<-',signature = 'AssignmentParameters', + function(x,value){ + x@isotope_rules <- value + return(x) + }) + +#' @rdname parameters +#' @export + +setGeneric('transformationRules',function(x) + standardGeneric('transformationRules')) + +#' @rdname parameters + +setMethod('transformationRules',signature = 'AssignmentParameters', + function(x){ + x@transformation_rules + }) + +#' @rdname parameters +#' @export + +setGeneric('transformationRules<-',function(x,value) + standardGeneric('transformationRules<-')) + +#' @rdname parameters + +setMethod('transformationRules<-',signature = 'AssignmentParameters', + function(x,value){ + x@transformation_rules <- value + return(x) + }) + +#' Available analytical techniques +#' @description The available analytical techniques for molecular formula assignment parameters. +#' @return A `character` vector of technique names. +#' @examples +#' availableTechniques() +#' @export + +availableTechniques <- function(){ + c('FIE-HRMS','RP-LC-HRMS','NP-LC-HRMS') +} + +#' Assignment parameters +#' @description Return the default molecular formula assignment parameters for a given analytical technique. +#' @param technique technique to use for assignment +#' @return An object of S4 class `AssignmentParameters` +#' @examples assignmentParameters('FIE-HRMS') +#' @importFrom methods new +#' @export + +assignmentParameters <- function(technique = availableTechniques()){ + + technique <- match.arg(technique, + choices = availableTechniques()) + + parameters <- switch(technique, + `FIE-HRMS` = new('AssignmentParameters'), + `RP-LC-HRMS` = new('AssignmentParameters', + technique = 'RP-LC-HRMS', + RT_diff_limit = 1/60), + `NP-LC-HRMS` = new('AssignmentParameters', + technique = 'NP-LC-HRMS', + RT_diff_limit = 1/60, + adducts = list(n = c("[M-H]1-", "[M+Cl]1-", "[M+K-2H]1-", + "[M-2H]2-", "[M+Cl37]1-","[2M-H]1-"), + p = c('[M+H]1+','[M+K]1+','[M+Na]1+','[M+K41]1+', + '[M+NH4]1+','[M+2H]2+','[2M+H]1+')))) + + return(parameters) +} \ No newline at end of file diff --git a/R/peakData.R b/R/peakData.R deleted file mode 100644 index 9af722b..0000000 --- a/R/peakData.R +++ /dev/null @@ -1,5 +0,0 @@ -#' peakData -#' @description example peak intensity table of sample data from the example FIE-HRMS B. distachyon ecotype data in the metaboData package. -#' @format A tibble containining 60 rows and 1003 variables - -'peakData' \ No newline at end of file diff --git a/R/plotAdductDist-method.R b/R/plotAdductDist-method.R deleted file mode 100644 index 2b7c675..0000000 --- a/R/plotAdductDist-method.R +++ /dev/null @@ -1,34 +0,0 @@ -#' plotAdductDist-Assignment -#' @rdname plotAdductDist -#' @description Plot adduct distributions. -#' @param assignment S4 object of class Assignment -#' @importFrom patchwork wrap_plots -#' @importFrom ggthemes ptol_pal -#' @importFrom ggplot2 ggplot geom_bar theme_bw facet_wrap theme element_text -#' @export - -setMethod('plotAdductDist',signature = 'Assignment', - function(assignment){ - assign <- assignment %>% - assignments() - - assign$Mode[assign$Mode == 'n'] <- 'Negative Mode' - assign$Mode[assign$Mode == 'p'] <- 'Positive Mode' - - assign %>% split(.$Mode) %>% - map(~{ - d <- . - ggplot(d,aes(x = Adduct)) + - geom_bar(colour = 'black',fill = ptol_pal()(1)) + - theme_bw() + - facet_wrap(~Isotope) + - labs(title = d$Mode[1], - y = 'Count', - caption = str_c('N = ',nrow(d))) + - theme(plot.title = element_text(face = 'bold'), - axis.title = element_text(face = 'bold'), - axis.text.x = element_text(angle = 45, hjust = 1)) - }) %>% - wrap_plots() - } -) \ No newline at end of file diff --git a/R/plotAdductDist.R b/R/plotAdductDist.R new file mode 100644 index 0000000..9ef01fa --- /dev/null +++ b/R/plotAdductDist.R @@ -0,0 +1,58 @@ + +plotDist <- function(x){ + check_installed(c('ggplot2', + 'ggthemes')) + + ggplot2::ggplot(x,ggplot2::aes(x = Adduct)) + + ggplot2::geom_bar(colour = 'black', + fill = ggthemes::ptol_pal()(1)) + + ggplot2::scale_y_continuous(expand = c(0,0)) + + ggplot2::scale_x_discrete(expand = c(0,0)) + + ggplot2::theme_bw() + + ggplot2::facet_wrap(~Isotope, + scales = 'free') + + ggplot2::labs(title = x$Mode[1], + y = 'Count', + caption = str_c('N = ',nrow(x))) + + ggplot2::theme(plot.title = ggplot2::element_text(face = 'bold',hjust = 0.5), + axis.title = ggplot2::element_text(face = 'bold'), + axis.text.x = ggplot2::element_text(angle = 45, hjust = 1), + panel.border = ggplot2::element_blank(), + axis.line = ggplot2::element_line(), + panel.grid = ggplot2::element_blank(), + strip.background = ggplot2::element_blank(), + strip.text = ggplot2::element_text(face = 'bold')) +} + +#' @rdname plotting +#' @importFrom tidyr replace_na +#' @export + +setGeneric('plotAdductDist',function(assignment){ + standardGeneric('plotAdductDist') +}) + +#' @rdname plotting +#' @importFrom rlang check_installed + +setMethod('plotAdductDist',signature = 'Assignment', + function(assignment){ + + check_installed('patchwork') + + assign <- assignment %>% + assignments() %>% + replace_na(list(Isotope = '')) %>% + mutate(Isotope = factor(Isotope, + levels = c('',isotopes(assignment))) + ) + + assign$Mode[assign$Mode == 'n'] <- 'Negative Mode' + assign$Mode[assign$Mode == 'p'] <- 'Positive Mode' + + assign %>% + split(.$Mode) %>% + map(plotDist) %>% + patchwork::wrap_plots() + } +) \ No newline at end of file diff --git a/R/plotFeatureSolutions.R b/R/plotFeatureSolutions.R deleted file mode 100644 index 8d2521a..0000000 --- a/R/plotFeatureSolutions.R +++ /dev/null @@ -1,92 +0,0 @@ -#' plotFeatureSolutions -#' @rdname plotFeatureSolutions -#' @description Plot possible MF solutions for a given feature. -#' @param assignment S4 object of class Assignent -#' @param feature name of feature to plot -#' @param maxComponents maximum number of components to plot -#' @importFrom patchwork plot_annotation -#' @importFrom ggraph create_layout scale_edge_color_gradient geom_node_label -#' @importFrom ggplot2 scale_fill_manual margin xlim ylim guides -#' @export - -setMethod('plotFeatureSolutions',signature = 'Assignment', - function(assignment,feature,maxComponents = 10){ - - n <- nodes(assignment@addIsoAssign$graph) - - comp <- n %>% - filter(Feature == feature) %>% - select(Component,Plausibility) %>% - distinct() %>% - arrange(Component) - - graph <- assignment@addIsoAssign$graph %>% - filter(Component %in% comp$Component) %>% - mutate(name = str_replace_all(name,' ','\n')) %>% - mutate(name = str_replace_all(name,' ','\n')) %>% - morph(to_components) - - graphComponents <- graph %>% - map_dbl(~{nodes(.) %>% - .$Component %>% - .[1] - }) - - comp <- comp %>% - arrange(desc(Plausibility)) %>% - .$Component - - graph <- graph %>% - set_names(graphComponents) %>% - .[comp %>% as.character()] - - if (length(comp) > maxComponents) { - graph <- graph[1:maxComponents] - } - - selectedComp <- assignment@addIsoAssign$filteredGraph %>% - nodes() %>% - select(Feature,Component) %>% - filter(Feature == feature) %>% - .$Component - - graph %>% - map(~{ - stats <- nodes(.) %>% - select(Component:Plausibility) %>% - .[1,] - - if (stats$Component[1] == selectedComp){ - border <- 'red' - } else { - border <- 'black' - } - - g <- . - g <- g %>% - mutate(Feat = Feature == feature) %>% - create_layout('nicely') - ggraph(g) + - geom_edge_link(aes(colour = r)) + - scale_edge_color_gradient(low = 'white',high = 'black',limits = c(0.5,1)) + - geom_node_label(aes(label = name,fill = Feat),size = 2,) + - scale_fill_manual(values = c('white','steelblue')) + - theme_graph(title_size = 12, - title_face = 'plain', - foreground = border, - plot_margin = margin(5, 5, 5, 5)) + - labs(title = str_c('Component ',stats$Component), - caption = str_c('Size = ',stats$Size,'; ', - 'Nodes = ',stats$Nodes,'; ', - 'Weight = ',stats$Weight %>% round(2),'; ', - 'Density = ',stats$Density %>% round(2),'; ', - 'AIS = ',stats$AIS %>% round(2),'; ', - 'Plausibility = ',stats$Plausibility %>% round(2))) + - xlim(min(g$x) - (max(g$x) - min(g$x)) * 0.05, - max(g$x) + (max(g$x) - min(g$x)) * 0.05) + - ylim(min(g$y) - (max(g$y) - min(g$y)) * 0.05, - max(g$y) + (max(g$y) - min(g$y)) * 0.05) + - guides(fill = FALSE) - }) %>% - wrap_plots() + plot_annotation(title = str_c('Solutions for feature ',feature)) - }) diff --git a/R/plotNetwork-method.R b/R/plotNetwork-method.R deleted file mode 100644 index 6fa9799..0000000 --- a/R/plotNetwork-method.R +++ /dev/null @@ -1,92 +0,0 @@ -#' plotNetwork-Assignment -#' @rdname plotNetwork -#' @description plot assignment network -#' @param assignment of class Assignment -#' @param layout graph layout to use. See \code{\link[ggraph]{ggraph}} for layout options -#' @param rThreshold r threhold to use for filtering edge correlation weights -#' @importFrom tidygraph as_tbl_graph bind_graphs -#' @importFrom igraph set_vertex_attr set_edge_attr -#' @importFrom ggraph ggraph geom_edge_link geom_node_point theme_graph geom_node_text facet_edges -#' @importFrom ggthemes scale_fill_ptol -#' @importFrom ggplot2 labs aes element_blank coord_fixed -#' @importFrom graphlayouts layout_igraph_stress -#' @export - -setMethod('plotNetwork',signature = 'Assignment', - function(assignment, layout = 'stress', rThreshold = 0.7){ - - AI <- assignment@addIsoAssign$filteredGraph - TA <- assignment@transAssign %>% - map(~{.$filteredGraph}) - - graph <- AI %>% - bind_graphs({a <- TA[[1]] - for (i in 2:length(TA)) { - a <- bind_graphs(a,TA[[i]]) - } - a - }) - - e <- edges(graph) %>% - mutate(Explained = 'Explained') - n <- nodes(graph) %>% - select(name:Score) %>% - distinct() %>% - mutate(Assigned = 'Assigned') - - network <- assignment %>% - .@preparedCorrelations %>% - filter(r > rThreshold) %>% - as_tbl_graph(directed = F) %>% - activate(nodes) %>% - rename(Feature = name) %>% - mutate(Mode = str_sub(Feature,1,1)) %>% - left_join(n, by = "Feature") %>% - activate(edges) %>% - left_join(e, by = c("Mode1", "Mode2", "m/z1", "m/z2", "RetentionTime1", "RetentionTime2", "log2IntensityRatio", "r", "ID")) - - assigned <- nodes(network)$Assigned - assigned[is.na(assigned)] <- 'Unassigned' - - network <- set_vertex_attr(network,'Assigned',value = assigned) - - explained <- edges(network)$Explained - explained[is.na(explained)] <- 'Unexplained' - - network <- set_edge_attr(network,'Explained',value = explained) - - explainedEdges <- network %>% - edges() %>% - .$Explained %>% - table() - - assignedNodes <- network %>% - nodes() %>% - .$Assigned %>% - table() - - rt <- str_c('Visualised using threshold of r > ',rThreshold) - nn <- str_c('Total nodes = ',sum(assignedNodes)) - an <- str_c('Assigned nodes = ', - assignedNodes[1], - ' (', - {assignedNodes[1]/sum(assignedNodes) * 100} %>% - round(),'%)') - ne <- str_c('Total edges = ',sum(explainedEdges)) - ee <- str_c('Explained edges = ', - explainedEdges[1], - ' (', - {explainedEdges[1]/sum(explainedEdges) * 100} %>% - round(),'%)') - - ggraph(network,layout = layout) + - geom_edge_link(alpha = 0.2) + - geom_node_point(aes(fill = Assigned),shape = 21) + - scale_fill_ptol() + - theme_graph() + - theme(legend.title = element_blank()) + - coord_fixed() + - facet_edges(~Explained) + - labs(title = str_c('Assignment correlation network'), - caption = str_c(rt,nn,an,ne,ee,sep = '\n')) - }) \ No newline at end of file diff --git a/R/plotSpectrum-method.R b/R/plotSpectrum-method.R deleted file mode 100644 index 72aedb6..0000000 --- a/R/plotSpectrum-method.R +++ /dev/null @@ -1,40 +0,0 @@ -#' plotSpectrum -#' @rdname plotSpectrum -#' @description Plot a spectrum for a given molecular formula -#' @param assignment S4 object of class Assignment -#' @param MF molecular formula -#' @importFrom tidyr gather -#' @importFrom dplyr group_by summarise -#' @importFrom ggplot2 geom_segment -#' @importFrom ggrepel geom_text_repel -#' @export - -setMethod('plotSpectrum',signature = 'Assignment',function(assignment,MF){ - mf <- MF - - feat <- assignment %>% - assignments() %>% - filter(MF == mf) - - dat <- assignment@data %>% - select(feat$Feature) %>% - gather('Feature','Intensity') %>% - group_by(Feature) %>% - summarise(Intensity = mean(Intensity)) %>% - mutate(`Relative Abundance` = Intensity / max(Intensity)) %>% - left_join(feat %>% - select(Feature,Adduct,Isotope,Mode,`m/z` = `Measured m/z`), by = "Feature") - - dat[is.na(dat)] <- '' - - dat <- dat %>% - mutate(Label = str_c(Isotope,Adduct,sep = ' ')) - - ggplot(dat) + - geom_segment(aes(x = `m/z`,xend = `m/z`, y = 0, yend = `Relative Abundance`),colour = ptol_pal()(1)) + - geom_text_repel(aes(x = `m/z`,y = `Relative Abundance`,label = Label)) + - theme_bw() + - labs(title = MF, - y = 'Relative Abundance') + - facet_wrap(~Mode,scales = 'free') -}) \ No newline at end of file diff --git a/R/plotSpectrum.R b/R/plotSpectrum.R new file mode 100644 index 0000000..e993fc4 --- /dev/null +++ b/R/plotSpectrum.R @@ -0,0 +1,68 @@ + +spectrumPlot <- function(dat,MF){ + check_installed(c('ggplot2', + 'ggrepel', + 'ggthemes')) + + dat$Mode[dat$Mode == 'n'] <- 'Negative mode' + dat$Mode[dat$Mode == 'n'] <- 'Positive mode' + + ggplot2::ggplot(dat) + + ggplot2::geom_segment( + ggplot2::aes(x = `m/z`, + xend = `m/z`, + y = 0, + yend = `Relative Abundance`), + colour = ggthemes::ptol_pal()(1)) + + ggrepel::geom_text_repel( + ggplot2::aes(x = `m/z`, + y = `Relative Abundance`, + label = Label)) + + ggplot2::theme_bw() + + ggplot2::scale_y_continuous(expand = c(0,0)) + + ggplot2::theme(panel.border = ggplot2::element_blank(), + panel.grid = ggplot2::element_blank(), + axis.line = ggplot2::element_line(), + axis.title = ggplot2::element_text(face = 'bold'), + strip.background = ggplot2::element_blank(), + strip.text = ggplot2::element_text(face = 'bold'), + plot.title = ggplot2::element_text(face = 'bold', + hjust = 0.5)) + + ggplot2::labs(title = MF, + y = 'Relative Abundance') + + ggplot2::facet_wrap(~Mode,scales = 'free') +} + +#' @rdname plotting +#' @importFrom tidyr gather +#' @importFrom dplyr group_by summarise +#' @export + +setGeneric('plotSpectrum',function(assignment,MF) + standardGeneric('plotSpectrum')) + +#' @rdname plotting + +setMethod('plotSpectrum',signature = 'Assignment',function(assignment,MF){ + mf <- MF + + feat <- assignment %>% + assignments() %>% + filter(MF == mf) + + dat <- assignment@data %>% + select(all_of(feat$Feature)) %>% + gather('Feature','Intensity') %>% + group_by(Feature) %>% + summarise(Intensity = mean(Intensity)) %>% + mutate(`Relative Abundance` = Intensity / max(Intensity)) %>% + left_join(feat %>% + select(Feature,Adduct,Isotope,Mode,`m/z` = `Measured m/z`), by = "Feature") + + dat[is.na(dat)] <- '' + + dat <- dat %>% + mutate(Label = str_c(Isotope,Adduct,sep = ' ')) + + spectrumPlot(dat,MF) +}) \ No newline at end of file diff --git a/R/plot_components.R b/R/plot_components.R new file mode 100644 index 0000000..47e069a --- /dev/null +++ b/R/plot_components.R @@ -0,0 +1,265 @@ + +graphTheme <- function(){ + ggplot2::theme( + legend.title = ggplot2::element_text(face = 'bold'), + plot.margin = ggplot2::margin(5, 5, 5, 5), + plot.title = ggplot2::element_text(face = 'bold',hjust = 0.5), + plot.caption = ggtext::element_markdown(hjust = 0.5) + ) +} + +plotGraph <- function(graph, + min_coef, + label_size = 3, + axis_offset = 0.1, + border = 'black', + highlight = NA){ + + if (!is.na(highlight)){ + graph <- graph %>% + activate(nodes) %>% + mutate(selected = Feature == highlight) + } + + g <- graph %>% + activate(nodes) %>% + mutate(name = str_replace_all(name,' ','\n') %>% + str_replace_all(' ','\n')) %>% + ggraph::create_layout('nicely') + + pl <- g %>% + ggraph::ggraph() + + ggraph::geom_edge_link(ggplot2::aes(colour = coefficient)) + + ggraph::scale_edge_color_gradient(low = 'lightgrey', + high = 'black', + limits = c(min_coef,1)) + + if (!is.na(highlight)) { + pl <- pl + + ggraph::geom_node_label( + ggplot2::aes(label = name,fill = selected), + size = label_size) + + ggplot2::scale_fill_manual(values = c('white','lightblue')) + + ggplot2::guides(fill = 'none') + } else { + pl <- pl + + ggraph::geom_node_label( + ggplot2::aes(label = name), + size = label_size) + } + + pl + + ggraph::theme_graph(base_family = '', + base_size = 10, + title_size = 11, + foreground = border) + + graphTheme() + + ggplot2::lims( + x = c( + min(g$x) - (max(g$x) - min(g$x)) * axis_offset, + max(g$x) + (max(g$x) - min(g$x)) * axis_offset + ), + y = c( + min(g$y) - (max(g$y) - min(g$y)) * axis_offset, + max(g$y) + (max(g$y) - min(g$y)) * axis_offset + ) + ) +} + +#' Plot assignment results +#' @rdname plotting +#' @description Plot molecular formula assignment results. +#' @param assignment an object of S4 class Assignment +#' @param feature the *m/z* feature to plot +#' @param MF the assigned molecular formula to plot +#' @param component component number to plot +#' @param iteration the assignment iteration of the component or components +#' @param type the graph type to return. `selected` returns the assignment graph after component selection. `all` returns all assignment components. +#' @param max_components themaximum number of components to plot +#' @param label_size node label size +#' @param axis_offset axis proportion by which to increase axis limits. Prevents cut off of node labels. +#' @param border specify a plot border colour +#' @param highlight specify a feature node to highlight +#' @details +#' * `plotComponent` - Plot a molecular formula component graph. +#' * `plotFeatureComponents` - Plot the possible component graphs for a given feature. +#' * `plotAdductDist` - Plot frequency distributions of the assigned adducts. +#' * `plotSpectrum` - Plot the spectrum of an assigned molecular formula. +#' @examples +#' library(ggraph) +#' plan(future::sequential) +#' p <- assignmentParameters('FIE-HRMS') +#' +#' mf_assignments <- assignMFs(feature_data,p) +#' +#' ## Plot a component +#' plotComponent(mf_assignments,1,'A&I1') +#' +#' ## Plot the components for a feature +#' plotFeatureComponents(mf_assignments,'n191.01962','A&I1') +#' +#' ## Plot the adduct distributions +#' plotAdductDist(mf_assignments) +#' +#' ## Plot the spectrum of an assigned molecular formula +#' plotSpectrum(mf_assignments,'C6H8O7') +#' @export + +setGeneric('plotComponent', + function(assignment, + component, + iteration, + type = c('selected','all'), + label_size = 3, + axis_offset = 0.1, + border = NA, + highlight = NA) + standardGeneric('plotComponent')) + +#' @importFrom dplyr mutate_if +#' @rdname plotting + +setMethod('plotComponent',signature = 'Assignment', + function(assignment, + component, + iteration, + type = c('selected','all'), + label_size = 3, + axis_offset = 0.1, + border = NA, + highlight = NA + ){ + + check_installed(c('ggraph', + 'ggplot2', + 'ggtext', + 'glue')) + + component_graph <- component(assignment, + component, + iteration, + type) + + if (!is.na(highlight) & + !highlight %in% nodes(component_graph)$Feature) { + stop(paste0('Highlight feature ',highlight,' not found in component.')) + } + + component_stats <- component_graph %>% + nodes() %>% + select(AIS, + Component:`Component Plausibility`) %>% + distinct() %>% + mutate_if(is.numeric,signif,digits = 3) + + min_coef <- correlationsParameters(assignment)$minCoef + + plotGraph(component_graph, + min_coef, + label_size, + axis_offset, + border, + highlight + ) + + ggplot2::labs( + title = paste0('Component ',component), + caption = glue::glue(' + Pc = {component_stats$`Component Plausibility`}; + Degree = {component_stats$Degree}; + AISc = {component_stats$AIS}' + )) + }) + +#' @rdname plotting +#' @export + +setGeneric('plotFeatureComponents', + function(assignment, + feature, + iteration, + type = c('all','selected'), + max_components = 6, + label_size = 3, + axis_offset = 0.1) + standardGeneric('plotFeatureComponents') +) + +#' @rdname plotting +#' @importFrom dplyr slice + +setMethod('plotFeatureComponents',signature = 'Assignment', + function(assignment, + feature, + iteration, + type = c('all','selected'), + max_components = 6, + label_size = 2, + axis_offset = 0.05){ + + check_installed(c('ggraph', + 'ggplot2', + 'ggtext', + 'patchwork')) + + if (!feature %in% colnames(featureData(assignment))) { + stop('Feature not found in assignment data.', + call. = FALSE) + } + + type <- match.arg(type, + choices = c('all','selected')) + + selected_component <- assignments(assignment) %>% + filter(Feature == feature, + Iteration == iteration) %>% + .$Component + + feature_components <- featureComponents(assignment,feature,type) %>% + filter(Iteration == iteration) %>% + select(Component) %>% + arrange(Component) %>% + mutate(border = 'black', + border = border %>% + replace(Component == selected_component, + 'red')) + + if (nrow(feature_components) == 0){ + stop(paste0('No components for feature ', + feature, + ' found in iteration ', + iteration,'.'), + call. = FALSE) + } + + if (nrow(feature_components) > max_components){ + feature_components <- slice( + feature_components, + seq_len(max_components)) + } + + pl <- feature_components %>% + rowwise() %>% + group_split() %>% + map(~plotComponent( + assignment, + .x$Component, + iteration, + type, + label_size, + axis_offset, + highlight = feature, + border = .x$border + )) %>% + patchwork::wrap_plots() + + patchwork::plot_layout(guides = 'collect') + + if (length(selected_component) > 0){ + pl <- pl + + patchwork::plot_annotation( + caption = 'Red highlighted graph denotes the component selected for assignment.', + theme = ggplot2::theme(plot.caption = ggplot2::element_text(hjust = 0)) + ) + } + + return(pl) + }) diff --git a/R/prepCorrelations-method.R b/R/prepCorrelations-method.R deleted file mode 100644 index e256345..0000000 --- a/R/prepCorrelations-method.R +++ /dev/null @@ -1,50 +0,0 @@ - -setMethod('prepCorrelations',signature = 'Assignment', - function(assignment){ - - if (assignment@log$verbose == T) { - startTime <- proc.time() - message(blue('Preparing correlations '),cli::symbol$continue,'\r',appendLF = FALSE) - } - - correlations <- assignment@preparedCorrelations - - correlations <- correlations %>% - mutate(Mode1 = str_split_fixed(Feature1,'@',2) %>% - .[,1] %>% - str_sub(1,1), - Mode2 = str_split_fixed(Feature2,'@',2) %>% - .[,1] %>% - str_sub(1,1), - `m/z1` = str_split_fixed(Feature1,'@',2) %>% - .[,1] %>% - str_replace_all('[:alpha:]','') %>% - as.numeric(), - `m/z2` = str_split_fixed(Feature2,'@',2) %>% - .[,1] %>% - str_replace_all('[:alpha:]','') %>% - as.numeric(), - RetentionTime1 = str_split_fixed(Feature1,'@',2) %>% - .[,2] %>% - as.numeric(), - RetentionTime2 = str_split_fixed(Feature2,'@',2) %>% - .[,2] %>% - as.numeric(), - ID = 1:nrow(.) - ) %>% - select(Feature1,Feature2,Mode1:RetentionTime2,log2IntensityRatio,r,ID) - - assignment@preparedCorrelations <- correlations - - if (assignment@log$verbose == T) { - endTime <- proc.time() - elapsed <- {endTime - startTime} %>% - .[3] %>% - round(1) %>% - seconds_to_period() %>% - str_c('[',.,']') - message(blue('Preparing correlations '),'\t\t',green(cli::symbol$tick),' ',elapsed) - } - - return(assignment) - }) \ No newline at end of file diff --git a/R/recalcComponents.R b/R/recalcComponents.R deleted file mode 100644 index 13ffbae..0000000 --- a/R/recalcComponents.R +++ /dev/null @@ -1,37 +0,0 @@ -#' @importFrom tidygraph to_components -#' @importFrom dplyr n - -recalcComponents <- function(graph,parameters){ - g <- graph %>% - activate(nodes) - - comp <- g %>% - nodes() %>% - .$Component %>% - unique() - - weights <- comp %>% - future_map(~{ - graph %>% - filter(Component == .x) %>% - edges() %>% - .$r %>% - mean() %>% - tibble(Weight = .) - },graph = g) %>% - set_names(comp) %>% - bind_rows(.id = 'Component') %>% - mutate(Component = as.numeric(Component)) - - g %>% - select(-Weight) %>% - left_join(weights,by = 'Component') %>% - morph(to_components) %>% - mutate(Size = graph_size(), - Nodes = n(), - Density = (2 * Size) / (Nodes * (Nodes - 1)), - Weight = sum(Weight) / Nodes, - AIS = sum(AddIsoScore) / Nodes, - Plausibility = AIS * Size * Weight) %>% - unmorph() -} \ No newline at end of file diff --git a/R/reexports.R b/R/reexports.R index 6df8a4b..5d70681 100644 --- a/R/reexports.R +++ b/R/reexports.R @@ -2,3 +2,8 @@ #' @importFrom future plan #' @export future::plan + +#' @rdname reexports +#' @importFrom magrittr %>% +#' @export +magrittr::`%>%` \ No newline at end of file diff --git a/R/relationships-method.R b/R/relationships.R similarity index 65% rename from R/relationships-method.R rename to R/relationships.R index 06431f5..3610589 100644 --- a/R/relationships-method.R +++ b/R/relationships.R @@ -1,29 +1,41 @@ + +#' @rdname assignment-methods +#' @export + +setGeneric("calcRelationships", function(assignment) + standardGeneric("calcRelationships")) + +#' @rdname assignment-methods #' @importFrom furrr future_map #' @importFrom dplyr mutate bind_rows filter vars contains -#' @importFrom dplyr inner_join semi_join select mutate_at -#' @importFrom stringr str_sub str_replace_all +#' @importFrom dplyr inner_join semi_join select mutate_at relocate +#' @importFrom stringr str_sub str_replace_all str_remove #' @importFrom mzAnnotation relationshipCalculator #' @importFrom magrittr %>% -#' @importFrom tibble tibble -#' @importFrom mzAnnotation adducts isotopes +#' @importFrom tibble tibble enframe -setMethod('relationships',signature = 'Assignment', - function(assignment,transformations = T){ +setMethod('calcRelationships',signature = 'Assignment', + function(assignment){ - if (assignment@log$verbose == T) { + invisible(gc()) + + cors <- assignment@correlations + + if (ncol(cors) == 0){ + stop('No correlations found. Has `calcCorrelations()` been called on this object?', + call. = FALSE) + } + + if (assignment@log$verbose == TRUE) { startTime <- proc.time() message(blue('Calculating relationships '),cli::symbol$continue,'\r',appendLF = 'FALSE') } - parameters <- assignment@parameters + parameters <- as(assignment,'AssignmentParameters') - cors <- assignment@preparedCorrelations - if (isTRUE(transformations)) { - trans <- c(NA,parameters@transformations) - } else { - trans <- NA - } + trans <- c(NA,transformations(assignment)) + rel <- cors %>% select(`m/z1`,`m/z2`,Mode1,Mode2) %>% @@ -33,39 +45,39 @@ setMethod('relationships',signature = 'Assignment', mzs <- bind_rows( .x %>% select(contains('1')) %>% - setNames(stringr::str_remove(names(.),'1')), + setNames(str_remove(names(.),'1')), .x %>% select(contains('2')) %>% - setNames(stringr::str_remove(names(.),'2')) + setNames(str_remove(names(.),'2')) ) modes <- mzs$Mode %>% unique() if (length(modes) > 1){ - adducts <- parameters@adducts %>% + specified_adducts <- adducts(assignment) %>% unlist() } else { - adducts <- parameters@adducts[[modes]] + specified_adducts <- adducts(assignment)[[modes]] } relationships <- relationshipCalculator(mzs$`m/z`, - limit = parameters@limit, - adducts = adducts, - isotopes = c(NA,parameters@isotopes), + limit = limit(assignment), + adducts = specified_adducts, + isotopes = c(NA,isotopes(assignment)), transformations = trans, - adductTable = parameters@adductRules, - isotopeTable = parameters@isotopeRules, - transformationTable = parameters@transformationRules) %>% + adduct_rules_table = adductRules(assignment), + isotope_rules_table = isotopeRules(assignment), + transformation_rules_table = transformationRules(assignment)) %>% left_join(mzs,by = c('m/z1' = 'm/z')) %>% rename(Mode1 = Mode) %>% left_join(mzs,by = c('m/z2' = 'm/z')) %>% rename(Mode2 = Mode) %>% - dplyr::relocate(contains('Mode'),.after = `m/z2`) + relocate(contains('Mode'),.after = `m/z2`) if (length(modes) > 1){ - adduct_modes <- parameters@adducts %>% - map(tibble::enframe,value = 'Adduct') %>% + adduct_modes <- adducts(assignment) %>% + map(enframe,value = 'Adduct') %>% bind_rows(.id = 'Mode') %>% select(-name) @@ -90,12 +102,12 @@ setMethod('relationships',signature = 'Assignment', contains('Isotope'), contains('Transformation'), log2IntensityRatio, - r, + coefficient, Error, ID) %>% mutate_at(vars(RetentionTime1,RetentionTime2),as.numeric) - assignment@relationships <- rel + relationships(assignment) <- rel if (assignment@log$verbose == T) { endTime <- proc.time() diff --git a/R/show-method.R b/R/show-method.R deleted file mode 100644 index 4a3e4a9..0000000 --- a/R/show-method.R +++ /dev/null @@ -1,83 +0,0 @@ -#' show-AssignmentParameters -#' @description show method for AssignmentParameters class. -#' @param object S4 object of class AssignmentParameters -#' @importFrom methods show -#' @importFrom crayon yellow -#' @importFrom purrr map -#' @export - -setMethod('show',signature = 'AssignmentParameters', - function(object){ - cat(yellow('\nAssignment Parameters:'),'\n') - cat('\n') - cat('\t','Technique:\t\t',object@technique,'\n') - cat('\t','Max M:\t\t\t',object@maxM,'\n') - cat('\t','Max MF score:\t\t',object@maxMFscore,'\n') - cat('\t','PPM threshold:\t\t',object@ppm,'\n') - cat('\t','Relationship limit:\t',object@limit,'\n') - - if (object@technique != 'FIE') { - cat('\t','RT window:\t\t',object@RTwindow,'\n') - } - - cat('\n\t','Adducts:','\n') - adducts <- map(names(object@adducts),~{ - a <- str_c(object@adducts[[.]],collapse = ', ') - str_c(.,': ',a) - }) %>% - str_c(collapse = '\n\t ') - cat('\t',adducts,'\n') - - cat('\t','Isotopes:',str_c(object@isotopes,collapse = ', '),'\n') - - cat('\t','Transformations:',str_c(object@transformations,collapse = ', ')) - - cat('\n') - } -) - -#' show-Assignment -#' @description show mehtod for Assignment class. -#' @param object S4 object of class Assignment -#' @importFrom crayon blue red green -#' @importFrom purrr map_dbl -#' @importFrom utils packageVersion -#' @importFrom igraph E -#' @export - -setMethod('show',signature = 'Assignment', - function(object){ - cat(blue('\nMFassign'),red(str_c('v',packageVersion('MFassign') %>% as.character())),'\n') - cat(yellow('Assignment:'),'\n') - cat('\t','Features:\t\t',ncol(object@data),'\n') - cat('\t','Correlations:\t\t',nrow(object@correlations),'\n') - cat('\t','Relationships:\t\t',nrow(object@relationships),'\n') - cat('\n') - if (length(object@addIsoAssign) > 0) { - cat('\t',green('Adduct & isotope assignment:'),'\n') - cat('\t\t','MFs:\t\t',length(unique(object@addIsoAssign$assigned$MF)),'\n') - cat('\t\t','Relationships:\t',object@addIsoAssign$filteredGraph %>% E() %>% length(),'\n') - cat('\t\t','Assigned:\t',nrow(object@addIsoAssign$assigned),'\n') - cat('\n') - } - if (length(object@transAssign) > 0) { - cat('\t',green('Transformation assignment:'),'\n') - cat('\t\t','Iterations:\t',length(object@transAssign),'\n') - transAssigned <- object@transAssign %>% - {.[map_dbl(.,length) > 0]} %>% - map_dbl(~{ - return(nrow(.$assigned)) - }) %>% - sum() - cat('\t\t','Assigned:\t',transAssigned,'\n') - cat('\n') - } - if (nrow(object@assignments) > 0) { - cat('\t','Total assignments:\t',blue(nrow(object@assignments)), - blue(str_c('(',round(nrow(object@assignments)/ncol(object@data) * 100),'%)')), - '\n') - cat('\t','Unique MFs:\t\t',blue(length(unique(object@assignments$MF))),'\n') - cat('\n') - } - } -) \ No newline at end of file diff --git a/R/summariseAssignment-method.R b/R/summariseAssignment-method.R deleted file mode 100644 index 2363339..0000000 --- a/R/summariseAssignment-method.R +++ /dev/null @@ -1,24 +0,0 @@ -#' summariseAssignment-Assignment -#' @rdname summariseAssignment -#' @description Summarise features assigned to moleuclar formulas. -#' @param assignment S4 object of class Assignment -#' @importFrom dplyr desc -#' @export - -setMethod('summariseAssignment',signature = 'Assignment', - function(assignment){ - assigned <- assignment %>% - assignments() %>% - split(.$MF) %>% - map(~{ - d <- . - d$Isotope[is.na(d$Isotope)] <- '' - d <- d %>% - mutate(IIP = str_c(Isotope,Adduct,sep = ' ')) %>% - arrange(`Measured m/z`) - tibble(MF = d$MF[1],Features = str_c(d$Feature,collapse = '; '),`Isotopes & Ionisation Products` = str_c(d$IIP,collapse = '; '),Count = nrow(d)) - }) %>% - bind_rows() %>% - arrange(desc(Count)) - return(assigned) -}) diff --git a/R/sysdata.rda b/R/sysdata.rda deleted file mode 100644 index 8f377ef..0000000 Binary files a/R/sysdata.rda and /dev/null differ diff --git a/R/transformationAssign-method.R b/R/transformationAssign-method.R deleted file mode 100644 index 73531d4..0000000 --- a/R/transformationAssign-method.R +++ /dev/null @@ -1,163 +0,0 @@ -#' @importFrom stringr str_c -#' @importFrom dplyr full_join select distinct -#' @importFrom mzAnnotation transformMF - -setMethod('transformationAssign',signature = 'Assignment', - function(assignment){ - - parameters <- assignment@parameters - count <- length(assignment@transAssign) - assigned <- assignment@assignments - - if (assignment@log$verbose == T) { - startTime <- proc.time() - message(blue(str_c('Transformation assignment iteration ', count + 1,' ')),cli::symbol$continue,'\r',appendLF = FALSE) - } - - rel <- assignment@relationships %>% - filter((`m/z1` %in% assigned$`Measured m/z` | (`m/z2` %in% assigned$`Measured m/z`)) & !(`m/z1` %in% assigned$`Measured m/z` & (`m/z2` %in% assigned$`Measured m/z`))) - - mz1 <- rel %>% - semi_join(assigned,by = c('m/z1' = 'Measured m/z', 'Adduct1' = 'Adduct', 'Isotope1' = 'Isotope')) %>% - filter(is.na(Transformation1)) - mz2 <- rel %>% - semi_join(assigned,by = c('m/z2' = 'Measured m/z', 'Adduct2' = 'Adduct', 'Isotope2' = 'Isotope')) %>% - filter(is.na(Transformation2)) - - rel <- bind_rows(mz1,mz2) - - if (nrow(rel) > 0) { - M <- bind_rows(select(rel,mz = `m/z1`,RetentionTime = RetentionTime1,Isotope = Isotope1, Adduct = Adduct1, Feature = Feature1), - select(rel,mz = `m/z2`,RetentionTime = RetentionTime2,Isotope = Isotope2, Adduct = Adduct2, Feature = Feature2)) %>% - distinct() %>% - arrange(mz) %>% - rowwise() %>% - mutate(M = calcM(mz,Adduct,Isotope)) %>% - arrange(M) %>% - filter(M <= parameters@maxM) %>% - filter(!(mz %in% assigned$`Measured m/z`)) - - nM <- nrow(M) - - MF <- M %>% - ungroup() %>% - slice_sample(n = nM) %>% - split(1:nrow(.)) %>% - future_map(~{ - mf <- MFgen(.x$M,.x$mz,ppm = parameters@ppm) - - if (nrow(mf) > 0) { - mf %>% - left_join(M,by = c('Measured M' = 'M','Measured m/z' = 'mz')) %>% - rowwise() %>% - mutate(`Theoretical m/z` = calcMZ(`Theoretical M`,Adduct,Isotope), - `PPM Error` = ppmError(`Measured m/z`,`Theoretical m/z`)) %>% - select(Feature,RetentionTime,MF,Isotope,Adduct,`Theoretical M`, - `Measured M`,`Theoretical m/z`,`Measured m/z`, `PPM Error`) %>% - rowwise() %>% - mutate(Score = MFscore(MF), - `PPM Error` = abs(`PPM Error`), - AddIsoScore = addIsoScore(Adduct, - Isotope, - parameters@adducts, - parameters@isotopes)) %>% - ungroup() %>% - filter(Score == min(Score,na.rm = TRUE)) %>% - filter(Score < parameters@maxMFscore) - } else { - return(NULL) - } - },.options = furrr_options(seed = 1234)) %>% - bind_rows() - - if (nrow(MF) > 0) { - - MF <- MF %>% - bind_rows(assigned %>% - select(names(MF)[!(names(MF) == 'AddIsoScore')]) %>% - rowwise() %>% - mutate(AddIsoScore = addIsoScore(Adduct,Isotope,parameters@adducts,parameters@isotopes))) - rel <- rel %>% - addMFs(MF,identMF = F) %>% - mutate(RetentionTime1 = as.numeric(RetentionTime1),RetentionTime2 = as.numeric(RetentionTime2)) %>% - addNames() - - if (nrow(rel) > 0) { - MFs <- bind_rows(select(rel,Name = Name1,Feature = Feature1,mz = `m/z1`,RetentionTime = RetentionTime1,Isotope = Isotope1, Adduct = Adduct1, MF = MF1), - select(rel,Name = Name2,Feature = Feature2,mz = `m/z2`,RetentionTime = RetentionTime2,Isotope = Isotope2, Adduct = Adduct2,MF = MF2)) %>% - mutate(RetentionTime = as.numeric(RetentionTime)) %>% - arrange(mz) %>% - select(-mz) %>% - left_join(MF, by = c("Feature", "RetentionTime", "Isotope", "Adduct",'MF')) %>% - distinct() %>% - mutate(ID = 1:nrow(.)) - - graph <- calcComponents(MFs,rel,parameters) - - filters <- tibble(Measure = c('Plausibility','Size','AIS','Score','PPM Error'), - Direction = c(rep('max',3),rep('min',2))) - - filteredGraph <- graph - - for (i in 1:nrow(filters)) { - f <- filters[i,] - filteredGraph <- filteredGraph %>% - activate(nodes) %>% - filter(name %in% {filteredGraph %>% - vertex.attributes() %>% - as_tibble() %>% - eliminate(f$Measure,f$Direction) %>% - .$name}) - if (V(filteredGraph) %>% length() > 0) { - filteredGraph <- filteredGraph %>% - recalcComponents(parameters) - } else { - break() - } - } - - newlyAssigned <- filteredGraph %>% - vertex.attributes() %>% - as_tibble() %>% - rename(Name = name) %>% - mutate(Mode = str_sub(Feature,1,1)) %>% - filter(!(Name %in% assigned$Name)) %>% - select(Name:Score,Mode) %>% - mutate(Iteration = str_c('T',count + 1)) - - outputs <- list( - graph = graph, - filteredGraph = filteredGraph, - assigned = newlyAssigned) - - assignment@assignments <- bind_rows(assignment@assignments,newlyAssigned) - - if (count == 0) { - assignment@transAssign <- list(`1` = outputs) - } else { - assignment@transAssign <- c(assignment@transAssign,list(outputs)) - } - } else { - assignment@transAssign <- c(assignment@transAssign,list(list())) - } - } else { - assignment@transAssign <- c(assignment@transAssign,list(list())) - } - } else { - assignment@transAssign <- c(assignment@transAssign,list(list())) - } - names(assignment@transAssign)[count + 1] <- count + 1 - - if (assignment@log$verbose == T) { - endTime <- proc.time() - elapsed <- {endTime - startTime} %>% - .[3] %>% - round(1) %>% - seconds_to_period() %>% - str_c('[',.,']') - message(blue(str_c('Transformation assignment iteration ', count + 1,' ')),'\t',green(cli::symbol$tick),' ',elapsed) - } - - return(assignment) - } -) diff --git a/R/transformationAssign.R b/R/transformationAssign.R new file mode 100644 index 0000000..917a57d --- /dev/null +++ b/R/transformationAssign.R @@ -0,0 +1,160 @@ + +#' @rdname assignment-methods +#' @export + +setGeneric("transformationAssign", function(assignment) + standardGeneric("transformationAssign")) + +#' @rdname assignment-methods +#' @importFrom stringr str_c +#' @importFrom dplyr full_join select distinct group_split +#' @importFrom mzAnnotation transformMF + +setMethod('transformationAssign',signature = 'Assignment', + function(assignment){ + + invisible(gc()) + + assigned <- assignments(assignment) + + if (ncol(assigned) == 0){ + stop('No assignments found. Has `addIsoAssign()` been called on this object?', + call. = FALSE) + } + + if (assignment@log$verbose == TRUE) { + t_start_time <- proc.time() + message(blue('Transformation assignment'), + cli::symbol$continue) + } + count <- 0 + repeat { + count <- count + 1 + + if (assignment@log$verbose == TRUE) { + start_time <- proc.time() + message(str_c('iteration ', + count,' '), + cli::symbol$continue, + '\r', + appendLF = FALSE) + } + + assigned <- assignments(assignment) + + rel <- assignment %>% + relationships() %>% + filter( + (`m/z1` %in% assigned$`Measured m/z` | + (`m/z2` %in% assigned$`Measured m/z`)) & + !(`m/z1` %in% assigned$`Measured m/z` & + (`m/z2` %in% assigned$`Measured m/z`)), + !(is.na(Transformation1) & is.na(Transformation2)) + ) + + mz1 <- rel %>% + semi_join(assigned, + by = c('m/z1' = 'Measured m/z', + 'Adduct1' = 'Adduct', + 'Isotope1' = 'Isotope')) %>% + filter(is.na(Transformation1)) + mz2 <- rel %>% + semi_join(assigned, + by = c('m/z2' = 'Measured m/z', + 'Adduct2' = 'Adduct', + 'Isotope2' = 'Isotope')) %>% + filter(is.na(Transformation2)) + + rel <- bind_rows(mz1,mz2) + + if (nrow(rel) == 0) break + + M <- collateM(rel, + maxM(assignment))%>% + filter(!(mz %in% assigned$`Measured m/z`)) + + MFs <- generateMFs(M, + ppm(assignment), + MFrankThreshold(assignment), + adductRules(assignment), + isotopeRules(assignment), + AIS(assignment)) + + if (nrow(MFs) == 0) break + + MFs <- MFs %>% + bind_rows(assigned %>% + select(dplyr::any_of(names(MFs))) %>% + left_join(AIS(assignment), + by = c('Adduct','Isotope'))) + graph_edges <- rel %>% + addMFs(MFs, + identMF = FALSE) %>% + sanitiseTransformations(assignment@transformation_rules) %>% + mutate(RetentionTime1 = as.numeric(RetentionTime1), + RetentionTime2 = as.numeric(RetentionTime2)) %>% + addNames() + + if (nrow(graph_edges) == 0) break + + graph_nodes <- collateMFs(graph_edges,MFs) + + graph <- calcComponents(graph_nodes, + graph_edges, + assignment) + + filtered_graph <- filterComponents(graph, + assignment) + + newly_assigned <- filtered_graph %>% + nodes() %>% + rename(Name = name) %>% + mutate(Mode = str_sub(Feature,1,1)) %>% + filter(!(Name %in% assigned$Name)) %>% + select(Name:`MF Plausibility (%)`, + Mode, + Component) %>% + mutate(Iteration = str_c('T',count)) %>% + group_split(MF) %>% + map_dfr(~{ + if (NA %in% .x$Isotope) return(.x) + else NULL + }) + + if (nrow(newly_assigned) == 0) break + + assignment@transAssign[[count]] <- list( + graph = graph, + filtered_graph = filtered_graph, + assigned = newly_assigned) + + assignment@assignments <- bind_rows(assignment@assignments, + newly_assigned) + + if (isTRUE(assignment@log$verbose)) { + end_time <- proc.time() + elapsed <- elapsedTime(start_time,end_time) + message(str_c('iteration ', + count,' '), + '\t\t\t', + green(cli::symbol$tick), + ' ', + elapsed) + } + } + + if (length(assignment@transAssign) > 0){ + names(assignment@transAssign) <- paste0('T', + seq_along(assignment@transAssign)) + } + + if (isTRUE(assignment@log$verbose)) { + t_end_time <- proc.time() + elapsed <- elapsedTime(t_start_time, + t_end_time) + message(blue('Transformation assignment '),'\t',green(cli::symbol$tick),' ',elapsed) + } + + return(assignment) + } +) diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..606e9fc --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,5 @@ +.onLoad <- function(libname, pkgname) { + if (getOption('digits') < 10) options(digits = 10) + + invisible() +} \ No newline at end of file diff --git a/README.md b/README.md index c8e6d1a..6008486 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,35 @@ -# MFassign +# assignments -[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) - [![R build status](https://github.com/jasenfinch/MFassign/workflows/R-CMD-check/badge.svg)](https://github.com/jasenfinch/MFassign/actions) -[![Coverage Status](https://img.shields.io/codecov/c/github/jasenfinch/MFassign/master.svg)](https://codecov.io/github/jasenfinch/MFassign?branch=master) -[![license](https://img.shields.io/badge/license-GNU%20GPL%20v3.0-blue.svg)](https://github.com/jasenfinch/MFassign/blob/master/DESCRIPTION) +[![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) +[![R-CMD-check](https://github.com/aberHRML/assignments/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/aberHRML/assignments/actions/workflows/R-CMD-check.yaml) +[![Coverage Status](https://img.shields.io/codecov/c/github/aberHRML/assignments/master.svg)](https://codecov.io/github/aberHRML/assignments?branch=master) +[![license](https://img.shields.io/badge/license-GNU%20GPL%20v3.0-blue.svg)](https://github.com/aberHRML/assignments/blob/master/DESCRIPTION) +[![GitHub release](https://img.shields.io/github/release/aberHRML/assignments.svg)](https://GitHub.com/aberHRML/assignments/releases/) -An R package for molecular formula assignment in high resolution metabolomics +> An R package for automated molecular formula assignment of ultra-high resolution ESI-MS based metabolomics data + +### Overview + +This R package provides an automated molecular formula assignment approach for electrospray ionisation ultra-high resolution mass spectrometry (ESI-HRMS) metabolomics data. This includes data from direct and flow injection/infustion (FIE-HRMS) fingerprinting as well as liquid chromatography mass spectrometry (LC-HRMS) profiling. The approach includes correlation analysis, relationship calculation, molecular formula generation and selection and graphical component selection based on adducts, isotopes and transformations. ### Installation +The `assignments` package can be installed from GitHub using the +following: + ``` r -devtools::install_github('jasenfinch/MFassign') +remotes::install_github('aberHRML/assignments') ``` + +### Learn more + +The package documentation can be browsed online at +. + +If this is your first time using `assignments` see the +[vignette](https://aberhrml.github.io/assignments/articles/assignments.html) for information on how to get started. + +If you believe you’ve found a bug in `assignments`, please file a bug (and, if possible, a [reproducible example](https://reprex.tidyverse.org)) at +. diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..0433b43 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,42 @@ +url: https://aberhrml.github.io/assignments/ +template: + bootstrap: 5 + +reference: + - title: S4 classes + contents: + - Assignment-class + - AssignmentParameters-class + - assignment + + - title: Example data + contents: + - feature_data + + - title: Parameters + contents: + - availableTechniques + - assignmentParameters + - technique + + - title: Assignment + contents: + - assignMFs + - calcCorrelations + + - title: Results + contents: + - featureData + - assignedData + - summariseAssignments + + - title: Plotting + contents: + - plotAdductDist + - plotComponent + - plotFeatureComponents + - plotSpectrum + + - title: Other + contents: + - nodes diff --git a/MFassign.Rproj b/assignments.Rproj similarity index 100% rename from MFassign.Rproj rename to assignments.Rproj diff --git a/data/feature_data.RData b/data/feature_data.RData new file mode 100644 index 0000000..138d1de Binary files /dev/null and b/data/feature_data.RData differ diff --git a/data/peakData.RData b/data/peakData.RData deleted file mode 100644 index 868b376..0000000 Binary files a/data/peakData.RData and /dev/null differ diff --git a/man/Assignment-class.Rd b/man/Assignment-class.Rd index 229e623..c7b748b 100644 --- a/man/Assignment-class.Rd +++ b/man/Assignment-class.Rd @@ -1,33 +1,27 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allClasses.R +% Please edit documentation in R/assignment.R \docType{class} \name{Assignment-class} \alias{Assignment-class} \title{Assignment} \description{ -An S4 class to store assignment results +An S4 class to store molecular formula assignment results. } \section{Slots}{ \describe{ -\item{\code{log}}{list containing assignment logs} +\item{\code{log}}{a list containing assignment logs} -\item{\code{flags}}{charactor vector containing completed assignment elements} +\item{\code{data}}{a tibble containing the \emph{m/z} peak intensity matrix} -\item{\code{parameters}}{An S4 object of class AssignmentParameters containing the assignment parameters} +\item{\code{correlations}}{a tibble containing the correlations analysis results} -\item{\code{data}}{A tibble containing the peak intensity matrix} +\item{\code{relationships}}{a tibble containing the calculated mathematical relationships} -\item{\code{correlations}}{A tibble containing the correlations} +\item{\code{addIsoAssign}}{a list containing the results of the adduct and isotope assignment iterations} -\item{\code{preparedCorrelations}}{A tibble containing the prepared correlations ready for analysis} +\item{\code{transAssign}}{a list containing the results of the transformation assignment iterationst} -\item{\code{relationships}}{A tibble containing the predicted relationships} - -\item{\code{addIsoAssign}}{A list containing the results of the adduct and isotope assignment} - -\item{\code{transAssign}}{A list containing the results of the transformation assignment} - -\item{\code{assignments}}{A tibble containing the assigned molecular formulas} +\item{\code{assignments}}{a tibble containing the assigned molecular formulas} }} diff --git a/man/AssignmentParameters-class.Rd b/man/AssignmentParameters-class.Rd index 1d3faf5..c26b043 100644 --- a/man/AssignmentParameters-class.Rd +++ b/man/AssignmentParameters-class.Rd @@ -1,42 +1,39 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allClasses.R +% Please edit documentation in R/parameters.R \docType{class} \name{AssignmentParameters-class} \alias{AssignmentParameters-class} -\title{AssignmentParameters} +\title{S4 class for assignment parameters} \description{ An S4 class to store assignment parameters. } \section{Slots}{ \describe{ -\item{\code{technique}}{assignment technique to use} +\item{\code{technique}}{the analytical technique} -\item{\code{correlations}}{list of correlation parameters to be passed to metabolyseR correlation analysis} +\item{\code{correlations_parameters}}{a list of correlation parameters to be passed to \code{metabolyseR::correlations()}} -\item{\code{filter}}{list of r and n thresholds for filtering correlations} +\item{\code{max_M}}{the maximum molecular mass for which to assign molecular formulas} -\item{\code{maxM}}{maximum M for which to assign molecular formulas} +\item{\code{MF_rank_threshold}}{rank threshold for molecular formula selection} -\item{\code{maxMFscore}}{threshold for molecular formula score} +\item{\code{ppm}}{the parts per million error threshold} -\item{\code{ppm}}{ppm threshold -#' @slot adducts named list of character vectors containing the adducuts to use for each mode} +\item{\code{limit}}{the atomic mass unit deviation limit for relationship calculation} -\item{\code{limit}}{amu deviation limit for relationship prediction} +\item{\code{RT_diff_limit}}{the limit for retention time differences for correlated features in adduct and isotopic assignment} -\item{\code{RTwindow}}{retention time window for chromatographic associations} +\item{\code{adducts}}{a list of character vectors containing the adducts names. List element names should denote ionisation mode. The order that these adducts are provided denotes their expected relative importance to assignments with the first expected to be the most common and the last the least common within each ionisation mode.} -\item{\code{adducts}}{list of character vectors containing the adducts to use. List element names should denote ionisation mode.} +\item{\code{isotopes}}{a character vector of isotopes to use. Similarly to the adducts, their order denotes the expected commonality in the data.} -\item{\code{isotopes}}{character vector of isotopes to use} +\item{\code{transformations}}{a character vector of transformations molecular formula changes} -\item{\code{transformations}}{character vector of transformations to use} +\item{\code{adduct_rules}}{a tibble containing the adduct formation rules as returned by \code{mzAnnotation::adduct_rules()}} -\item{\code{adductRules}}{tibble containing adduct formation rules as returned by mzAnnotation::adducts()} +\item{\code{isotope_rules}}{a tibble containing the isotope rules as returned by \code{mzAnnotation::isotope_rules()}} -\item{\code{isotopeRules}}{tibble containing isotope rules as returned by mzAnnotation::isotopes()} - -\item{\code{transformationRules}}{tibble containing transformation rules as returned by mzAnnotation::transformations()} +\item{\code{transformation_rules}}{tibble containing the transformation rules as returned by \code{mzAnnotation::transformation_rules()}} }} diff --git a/man/accessors.Rd b/man/accessors.Rd new file mode 100644 index 0000000..8fc5975 --- /dev/null +++ b/man/accessors.Rd @@ -0,0 +1,143 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/assignment.R +\name{featureData} +\alias{featureData} +\alias{featureData,Assignment-method} +\alias{correlations} +\alias{correlations,Assignment-method} +\alias{relationships} +\alias{relationships,Assignment-method} +\alias{iterations} +\alias{iterations,Assignment-method} +\alias{graph} +\alias{graph,Assignment-method} +\alias{components} +\alias{components,Assignment-method} +\alias{featureComponents} +\alias{featureComponents,Assignment-method} +\alias{component} +\alias{component,Assignment-method} +\alias{assignments} +\alias{assignments,Assignment-method} +\alias{assignedData} +\alias{assignedData,Assignment-method} +\alias{summariseAssignments} +\alias{summariseAssignments,Assignment-method} +\title{Assignment accessors} +\usage{ +featureData(assignment) + +\S4method{featureData}{Assignment}(assignment) + +correlations(assignment) + +\S4method{correlations}{Assignment}(assignment) + +relationships(assignment) + +\S4method{relationships}{Assignment}(assignment) + +iterations(assignment) + +\S4method{iterations}{Assignment}(assignment) + +graph(assignment, iteration, type = c("selected", "all")) + +\S4method{graph}{Assignment}(assignment, iteration, type = c("selected", "all")) + +components(assignment, iteration, type = c("selected", "all")) + +\S4method{components}{Assignment}(assignment, iteration, type = c("selected", "all")) + +featureComponents(assignment, feature, type = c("selected", "all")) + +\S4method{featureComponents}{Assignment}(assignment, feature, type = c("selected", "all")) + +component(assignment, component, iteration, type = c("selected", "all")) + +\S4method{component}{Assignment}(assignment, component, iteration, type = c("selected", "all")) + +assignments(assignment) + +\S4method{assignments}{Assignment}(assignment) + +assignedData(assignment) + +\S4method{assignedData}{Assignment}(assignment) + +summariseAssignments(assignment) + +\S4method{summariseAssignments}{Assignment}(assignment) +} +\arguments{ +\item{assignment}{S4 object of class Assignment} + +\item{iteration}{the assignment iteration} + +\item{type}{the graph type to return. \code{filtered} returns the assignment graph after component selection. \code{all} returns all assignment components.} + +\item{feature}{feature information to extract} + +\item{component}{component number to extract} +} +\value{ +A tibble or \code{tbl_graph} containing assignment results depending on the method used. +} +\description{ +Access methods for \code{Assignment} S4 class +} +\details{ +\itemize{ +\item \code{featureData} - Return the initially specifed \emph{m/z} feature data. +\item \code{correlations} - Return the correlation analysis results. +\item \code{relationships} - Return the calculated relationships. +\item \code{iterations} - Return the assignment iteration performed. +\item \code{graph} - Return a selected graph. +\item \code{components} - Return the component information for an assignment iteration. +\item \code{featureComponents} - Return the component information for a selected feature. +\item \code{component} - Extract a component graph. +\item \code{assignments} - Return the molecular formulas assigned to the \emph{m/z} features. +\item \code{assignedData} - Return the \emph{m/z} peak intensity matrix with the molecular formula assignments included in the column names. +\item \code{summariseAssignments} - Return a tibble of the assignments summarised by molecular formula. +} +} +\examples{ +plan(future::sequential) +p <- assignmentParameters('FIE-HRMS') + +mf_assignments <- assignMFs(feature_data,p) + +## Return feature data +featureData(mf_assignments) + +## Return correlations +correlations(mf_assignments) + +## Return relationships +relationships(mf_assignments) + +## Return the available iterations +iterations(mf_assignments) + +## Return a selected graph +graph(mf_assignments,'A&I1') + +## Return a component information for a selected graph +components(mf_assignments,'A&I1') + +## Return a component information for a selected feature +featureComponents(mf_assignments,'n191.01962') + + ## Extract a component graph +component(mf_assignments,1,'A&I1') + +## Return assignments +assignments(mf_assignments) + +## Return an m/z intensity matrix with the assignments included +## in the column names +assignedData(mf_assignments) + +## Return the assignments summarised by molecular formula +summariseAssignments(mf_assignments) +} diff --git a/man/assign.Rd b/man/assign.Rd new file mode 100644 index 0000000..332699a --- /dev/null +++ b/man/assign.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/assign.R +\name{assignMFs} +\alias{assignMFs} +\alias{assignMFs,tbl_df-method} +\alias{assignMFs,AnalysisData-method} +\alias{assignMFs,Analysis-method} +\title{Perform molecular formula assignment} +\usage{ +assignMFs( + feature_data, + parameters = assignmentParameters("FIE-HRMS"), + verbose = TRUE, + ... +) + +\S4method{assignMFs}{tbl_df}( + feature_data, + parameters = assignmentParameters("FIE-HRMS"), + verbose = TRUE +) + +\S4method{assignMFs}{AnalysisData}( + feature_data, + parameters = assignmentParameters("FIE"), + verbose = TRUE +) + +\S4method{assignMFs}{Analysis}( + feature_data, + parameters = assignmentParameters("FIE"), + verbose = TRUE, + type = c("pre-treated", "raw") +) +} +\arguments{ +\item{feature_data}{a tibble or an object of S4 class \code{AnalysisData} or \code{Analysis} containing the feature intensity matrix of m/z for which to assign molecular formulas. See details.} + +\item{parameters}{an S4 object of class \code{AssignmentParamters} containing the parameters for molecular formula assignment} + +\item{verbose}{should progress output be printed to the console} + +\item{...}{arguments to pass to the relevant method} + +\item{type}{\code{pre-treated} or \code{raw} data on which to perform assignment when argument \code{feature_data} is of S4 class \code{Analysis}} +} +\description{ +Perform automated molecular formula assignment. +} +\details{ +If argument \code{feature_data} is specified as a tibble, this should be a feature intensity matrix where +the columns are the \code{m/z} features to assign and the rows are the individual observations, with the +cells as abundance values. he m/z features provided as column names should be in the form of +@. Ionisation mode should be given as a prefix n or p for negative +or positive ionisation modes respectively. Feature m/z should be provided to an accuracy of least 5 decimal +places. The retention time portion (@) is only required for LC-MS data and should be provided +in minutes. +} +\examples{ +plan(future::sequential) +p <- assignmentParameters('FIE-HRMS') + +assignments <- assignMFs(feature_data,p) + +} diff --git a/man/assignMFs.Rd b/man/assignMFs.Rd deleted file mode 100644 index 61edc99..0000000 --- a/man/assignMFs.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/assignMFs.R -\name{assignMFs} -\alias{assignMFs} -\title{assignMFs} -\usage{ -assignMFs(dat, parameters, verbose = TRUE) -} -\arguments{ -\item{dat}{tibble containing the peak intensities of m/z for which to assign molecular formulas} - -\item{parameters}{an S4 object of class AssignmentParamters containing the parameters for molecular formula assignment} - -\item{verbose}{should output be printed to the console} -} -\description{ -assign molecular formulas to a set of given m/z. -} -\examples{ -plan(future::sequential) -p <- assignmentParameters('FIE') - -assignment <- assignMFs(peakData,p) - -} diff --git a/man/assignedData.Rd b/man/assignedData.Rd deleted file mode 100644 index d539ce4..0000000 --- a/man/assignedData.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/access-methods.R -\name{assignedData} -\alias{assignedData} -\alias{assignedData,Assignment-method} -\title{assignedData} -\usage{ -assignedData(assignment) - -\S4method{assignedData}{Assignment}(assignment) -} -\arguments{ -\item{assignment}{S4 object of class Assignment} -} -\description{ -Return data table used for assignments with feature assignments added to column names. -} diff --git a/man/assignment-methods.Rd b/man/assignment-methods.Rd new file mode 100644 index 0000000..d1af259 --- /dev/null +++ b/man/assignment-methods.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/correlations.R, R/addIsoAssign.R, +% R/transformationAssign.R, R/relationships.R +\name{calcCorrelations} +\alias{calcCorrelations} +\alias{calcCorrelations,Assignment-method} +\alias{addIsoAssign} +\alias{addIsoAssign,Assignment-method} +\alias{transformationAssign} +\alias{transformationAssign,Assignment-method} +\alias{calcRelationships} +\alias{calcRelationships,Assignment-method} +\title{Molecular formula assignment methods} +\usage{ +calcCorrelations(assignment) + +\S4method{calcCorrelations}{Assignment}(assignment) + +addIsoAssign(assignment) + +\S4method{addIsoAssign}{Assignment}(assignment) + +transformationAssign(assignment) + +\S4method{transformationAssign}{Assignment}(assignment) + +calcRelationships(assignment) + +\S4method{calcRelationships}{Assignment}(assignment) +} +\arguments{ +\item{assignment}{an object of S4 class \code{Assignment}} +} +\value{ +An object of S4 class \code{Assignment} containing molecular formula assignments. +} +\description{ +These methods provide access to performing the individual steps of the molecular +formula assignment approach. See Details for more information of when it is best to use these +instead of \code{assignMFs()}. +} +\details{ +In circumstances where the molecular formula assignment approach has high memory requirements, +such as where there are many correlations (> 2 million) or many high \emph{m/z} (>700), it may be +preferable to perform the assignment steps separately as opposed to using \code{assignMFs()}. This +can reduce the memory overheads required to successfully assign molecular formulas to the data +and also enable the possibility of objects to be saved and/or unloaded between the assignment +steps where needed. +} +\examples{ +plan(future::sequential) +p <- assignmentParameters('FIE-HRMS') + +mf_assignments <- assignment(feature_data,p) + +mf_assignments <- mf_assignments \%>\% + calcCorrelations() \%>\% + calcRelationships() \%>\% + addIsoAssign() \%>\% + transformationAssign() + +mf_assignments +} diff --git a/man/assignment.Rd b/man/assignment.Rd new file mode 100644 index 0000000..020dbe5 --- /dev/null +++ b/man/assignment.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/assignment.R +\name{assignment} +\alias{assignment} +\alias{assignment,tbl_df,AssignmentParameters-method} +\alias{assignment,AnalysisData,AssignmentParameters-method} +\alias{assignment,Analysis,AssignmentParameters-method} +\title{Create an Assignment S4 class object} +\usage{ +assignment(feature_data, parameters, ...) + +\S4method{assignment}{tbl_df,AssignmentParameters}(feature_data, parameters) + +\S4method{assignment}{AnalysisData,AssignmentParameters}(feature_data, parameters) + +\S4method{assignment}{Analysis,AssignmentParameters}(feature_data, parameters, type = c("pre-treated", "raw")) +} +\arguments{ +\item{feature_data}{a tibble or an object of S4 class \code{AnalysisData} or \code{Analysis} containing the feature intensity matrix of m/z for which to assign molecular formulas. See details.} + +\item{parameters}{an S4 object of class \code{AssignmentParamters} containing the parameters for molecular formula assignment} + +\item{...}{arguments to pass to the relevant method} + +\item{type}{type \code{pre-treated} or \code{raw} data on which to perform assignment when argument \code{feature_data} is of class \code{Analysis}} +} +\value{ +An object of S4 class \code{Assignment}. +} +\description{ +Constructor methods for creating an object of S4 class \code{Assignment}. +} +\examples{ +mf_assignments <- assignment(feature_data,assignmentParameters('FIE-HRMS')) +mf_assignments +} diff --git a/man/assignmentData.Rd b/man/assignmentData.Rd deleted file mode 100644 index e686ea5..0000000 --- a/man/assignmentData.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/access-methods.R -\name{assignmentData} -\alias{assignmentData} -\alias{assignmentData,Assignment-method} -\title{assignmentData} -\usage{ -assignmentData(assignment) - -\S4method{assignmentData}{Assignment}(assignment) -} -\arguments{ -\item{assignment}{S4 object of class Assignment} -} -\description{ -Return data table used for assignments. -} diff --git a/man/assignmentParameters.Rd b/man/assignmentParameters.Rd index f84a9f9..203bb03 100644 --- a/man/assignmentParameters.Rd +++ b/man/assignmentParameters.Rd @@ -1,14 +1,20 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/assignmentParameters.R +% Please edit documentation in R/parameters.R \name{assignmentParameters} \alias{assignmentParameters} -\title{annotationParameters} +\title{Assignment parameters} \usage{ -assignmentParameters(technique = NULL) +assignmentParameters(technique = availableTechniques()) } \arguments{ -\item{technique}{technique to use for assignment. \code{NULL} prints available techniques} +\item{technique}{technique to use for assignment} +} +\value{ +An object of S4 class \code{AssignmentParameters} } \description{ -Return assignment parameters for a specified technique. +Return the default molecular formula assignment parameters for a given analytical technique. +} +\examples{ +assignmentParameters('FIE-HRMS') } diff --git a/man/assignments.Rd b/man/assignments.Rd deleted file mode 100644 index 247ba00..0000000 --- a/man/assignments.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/access-methods.R -\name{assignments} -\alias{assignments} -\alias{assignments,Assignment-method} -\title{assignments-Assignment} -\usage{ -assignments(assignment) - -\S4method{assignments}{Assignment}(assignment) -} -\arguments{ -\item{assignment}{S4 object of class Assignment} -} -\description{ -Get table of assigned features from an Assignment -} diff --git a/man/availableTechniques.Rd b/man/availableTechniques.Rd new file mode 100644 index 0000000..abde02b --- /dev/null +++ b/man/availableTechniques.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parameters.R +\name{availableTechniques} +\alias{availableTechniques} +\title{Available analytical techniques} +\usage{ +availableTechniques() +} +\value{ +A \code{character} vector of technique names. +} +\description{ +The available analytical techniques for molecular formula assignment parameters. +} +\examples{ +availableTechniques() +} diff --git a/man/continueAssignment.Rd b/man/continueAssignment.Rd deleted file mode 100644 index d46928c..0000000 --- a/man/continueAssignment.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/continueAssignment.R -\name{continueAssignment} -\alias{continueAssignment} -\title{continueAssignment} -\usage{ -continueAssignment(assignment) -} -\arguments{ -\item{assignment}{an S4 object of class Assignment} -} -\description{ -continue a failed assignment -} diff --git a/man/edges.Rd b/man/edges.Rd deleted file mode 100644 index 25c4b37..0000000 --- a/man/edges.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/access-methods.R -\name{edges} -\alias{edges} -\title{edges} -\usage{ -edges(graph) -} -\arguments{ -\item{graph}{object of class tbl_graph} -} -\description{ -extract edge table from tbl_graph object. -} diff --git a/man/feature_data.Rd b/man/feature_data.Rd new file mode 100644 index 0000000..6d099d7 --- /dev/null +++ b/man/feature_data.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/feature_data.R +\docType{data} +\name{feature_data} +\alias{feature_data} +\title{Example feature data} +\format{ +A tibble containing 60 rows and 10 variables. +} +\usage{ +feature_data +} +\description{ +An example \code{m/z} peak intensity matrix containing total ion count normalised positive and negative mode flow infusion electrospray ionisation mass spectrometry \emph{m/z} features. +} +\keyword{datasets} diff --git a/man/graph.Rd b/man/graph.Rd new file mode 100644 index 0000000..59b6f0f --- /dev/null +++ b/man/graph.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/graph.R +\name{nodes} +\alias{nodes} +\alias{edges} +\title{Extract graph attributes} +\usage{ +nodes(graph) + +edges(graph) +} +\arguments{ +\item{graph}{object of class tbl_graph} +} +\description{ +Extract node or edge attributes from a \emph{tidygraph} \code{tbl_graph} object. +} +\examples{ +a_graph <- tidygraph::tbl_graph( + nodes = data.frame( + name = c('a','b','c') + ), + edges = data.frame( + from = c(1,2), + to = c(2,3), + type = c(1,2) + )) + +## Extract graph nodes +nodes(a_graph) + +## Extract graph edges +edges(a_graph) +} diff --git a/man/nodes.Rd b/man/nodes.Rd deleted file mode 100644 index b970e2a..0000000 --- a/man/nodes.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/access-methods.R -\name{nodes} -\alias{nodes} -\title{nodes} -\usage{ -nodes(graph) -} -\arguments{ -\item{graph}{object of class tbl_graph} -} -\description{ -extract node table from tbl_graph object. -} diff --git a/man/parameters.Rd b/man/parameters.Rd new file mode 100644 index 0000000..ce0cf55 --- /dev/null +++ b/man/parameters.Rd @@ -0,0 +1,240 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parameters.R +\name{technique} +\alias{technique} +\alias{technique,AssignmentParameters-method} +\alias{correlationsParameters} +\alias{correlationsParameters,AssignmentParameters-method} +\alias{correlationsParameters<-} +\alias{correlationsParameters<-,AssignmentParameters,list-method} +\alias{limit} +\alias{limit,AssignmentParameters-method} +\alias{limit<-} +\alias{limit<-,AssignmentParameters-method} +\alias{maxM} +\alias{maxM,AssignmentParameters-method} +\alias{maxM<-} +\alias{maxM<-,AssignmentParameters-method} +\alias{MFrankThreshold} +\alias{MFrankThreshold,AssignmentParameters-method} +\alias{MFrankThreshold<-} +\alias{MFrankThreshold<-,AssignmentParameters-method} +\alias{ppm} +\alias{ppm,AssignmentParameters-method} +\alias{ppm<-} +\alias{ppm<-,AssignmentParameters-method} +\alias{isotopes} +\alias{isotopes,AssignmentParameters-method} +\alias{isotopes<-} +\alias{isotopes<-,AssignmentParameters-method} +\alias{adducts} +\alias{adducts,AssignmentParameters-method} +\alias{adducts<-} +\alias{adducts<-,AssignmentParameters-method} +\alias{transformations} +\alias{transformations,AssignmentParameters-method} +\alias{transformations<-} +\alias{transformations<-,AssignmentParameters-method} +\alias{adductRules} +\alias{adductRules,AssignmentParameters-method} +\alias{adductRules<-} +\alias{adductRules<-,AssignmentParameters-method} +\alias{isotopeRules} +\alias{isotopeRules,AssignmentParameters-method} +\alias{isotopeRules<-} +\alias{isotopeRules<-,AssignmentParameters-method} +\alias{transformationRules} +\alias{transformationRules,AssignmentParameters-method} +\alias{transformationRules<-} +\alias{transformationRules<-,AssignmentParameters-method} +\title{Parameter get and set methods} +\usage{ +technique(x) + +\S4method{technique}{AssignmentParameters}(x) + +correlationsParameters(x) + +\S4method{correlationsParameters}{AssignmentParameters}(x) + +correlationsParameters(x) <- value + +\S4method{correlationsParameters}{AssignmentParameters,list}(x) <- value + +limit(x) + +\S4method{limit}{AssignmentParameters}(x) + +limit(x) <- value + +\S4method{limit}{AssignmentParameters}(x) <- value + +maxM(x) + +\S4method{maxM}{AssignmentParameters}(x) + +maxM(x) <- value + +\S4method{maxM}{AssignmentParameters}(x) <- value + +MFrankThreshold(x) + +\S4method{MFrankThreshold}{AssignmentParameters}(x) + +MFrankThreshold(x) <- value + +\S4method{MFrankThreshold}{AssignmentParameters}(x) <- value + +ppm(x) + +\S4method{ppm}{AssignmentParameters}(x) + +ppm(x) <- value + +\S4method{ppm}{AssignmentParameters}(x) <- value + +isotopes(x) + +\S4method{isotopes}{AssignmentParameters}(x) + +isotopes(x) <- value + +\S4method{isotopes}{AssignmentParameters}(x) <- value + +adducts(x) + +\S4method{adducts}{AssignmentParameters}(x) + +adducts(x) <- value + +\S4method{adducts}{AssignmentParameters}(x) <- value + +transformations(x) + +\S4method{transformations}{AssignmentParameters}(x) + +transformations(x) <- value + +\S4method{transformations}{AssignmentParameters}(x) <- value + +adductRules(x) + +\S4method{adductRules}{AssignmentParameters}(x) + +adductRules(x) <- value + +\S4method{adductRules}{AssignmentParameters}(x) <- value + +isotopeRules(x) + +\S4method{isotopeRules}{AssignmentParameters}(x) + +isotopeRules(x) <- value + +\S4method{isotopeRules}{AssignmentParameters}(x) <- value + +transformationRules(x) + +\S4method{transformationRules}{AssignmentParameters}(x) + +transformationRules(x) <- value + +\S4method{transformationRules}{AssignmentParameters}(x) <- value +} +\arguments{ +\item{x}{S4 object of class \code{AssignmentParameters}} + +\item{value}{the value to set} +} +\description{ +Get and set methods for the \code{AssignmentParameters} S4 class. +} +\details{ +\itemize{ +\item \code{technique} - Get the analytical technique. +\item \code{correlationsParameters} - Get or set the correlation analysis parameters to be passed to \code{metabolyseR::correlations()}. +\item \code{limit} - Get or set the atomic mass unit limit for relationship calculation. +\item \code{maxM} - Get or set the maximum molecular mass limit for which to assign molecular formulas. +\item \code{MFrankThreshold} - Get or set the molecular formula rank threshold for molecular formula selection. +\item \code{ppm} - Get or set the parts per million error threshold. +\item \code{isotopes} - Get or set the isotope names. The order in which these are specified denotes the expected relative commonality within the data set. +\item \code{adducts} - Get or set the adduct names for the ionisation modes. The order in which these are specified denotes the expected relative commonality within the data set for each ionisation mode. +\item \code{transformations} - Get or set the transformation molecular formula changes. +\item \code{isotopeRules} - Get or set the isotope rules table. The format of this tibble should match that of \code{mzAnnotation::isotope_rules()}. +\item \code{adductRules} - Get or set the adduct rules table. The format of this tibble should match that of \code{mzAnnotation::adduct_rules()}. +\item \code{techniqueRules} - Get or set the transformation rules table. The format of this tibble should match that of \code{mzAnnotation::transformation_rules()}. +} +} +\examples{ +assignment_parameters <- assignmentParameters('FIE') + +## Return the analytical technique +technique(assignment_parameters) + +## Return correlations parameters +correlationsParameters(assignment_parameters) + +## Set correlations parameters +correlationsParameters(assignment_parameters)$minCoef <- 0.75 + +## Return limit +limit(assignment_parameters) + +## Set limit +limit(assignment_parameters) <- 0.002 + +## Return max M +maxM(assignment_parameters) + +## Set max M +maxM(assignment_parameters) <- 500 + +## Return MF rank threshold +MFrankThreshold(assignment_parameters) + +## Set MF rank threshold +MFrankThreshold(assignment_parameters) <- 3 + +## Return ppm +ppm(assignment_parameters) + +## Set ppm +ppm(assignment_parameters) <- 3 + +## Return isotopes +isotopes(assignment_parameters) + +## Set isotopes +isotopes(assignment_parameters) <- '13C' + +## Return adducts +adducts(assignment_parameters) + +## Set adducts +adducts(assignment_parameters) <- list(n = c('[M-H]1-','[M+Cl]1-'), + p = c('[M+H]1+','[M+K]1+')) + +## Return transformations +transformations(assignment_parameters) + +## Set transformations +transformations(assignment_parameters) <- "M - [O] + [NH2]" + +## Return adduct rules +adductRules(assignment_parameters) + +## Set adduct rules +adductRules(assignment_parameters) <- mzAnnotation::adduct_rules() + +## Return isotope rules +isotopeRules(assignment_parameters) + +## Set isotope rules +isotopeRules(assignment_parameters) <- mzAnnotation::isotope_rules() + +## Return transformation rules +transformationRules(assignment_parameters) + +## Set transformation rules +transformationRules(assignment_parameters) <- mzAnnotation::transformation_rules() +} diff --git a/man/peakData.Rd b/man/peakData.Rd deleted file mode 100644 index 566ce05..0000000 --- a/man/peakData.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/peakData.R -\docType{data} -\name{peakData} -\alias{peakData} -\title{peakData} -\format{ -A tibble containining 60 rows and 1003 variables -} -\usage{ -peakData -} -\description{ -example peak intensity table of sample data from the example FIE-HRMS B. distachyon ecotype data in the metaboData package. -} -\keyword{datasets} diff --git a/man/plotAdductDist.Rd b/man/plotAdductDist.Rd deleted file mode 100644 index 6f96f34..0000000 --- a/man/plotAdductDist.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/plotAdductDist-method.R -\name{plotAdductDist} -\alias{plotAdductDist} -\alias{plotAdductDist,Assignment-method} -\title{plotAdductDist-Assignment} -\usage{ -plotAdductDist(assignment) - -\S4method{plotAdductDist}{Assignment}(assignment) -} -\arguments{ -\item{assignment}{S4 object of class Assignment} -} -\description{ -Plot adduct distributions. -} diff --git a/man/plotFeatureSolutions.Rd b/man/plotFeatureSolutions.Rd deleted file mode 100644 index f16a65d..0000000 --- a/man/plotFeatureSolutions.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/plotFeatureSolutions.R -\name{plotFeatureSolutions} -\alias{plotFeatureSolutions} -\alias{plotFeatureSolutions,Assignment-method} -\title{plotFeatureSolutions} -\usage{ -plotFeatureSolutions(assignment, feature, maxComponents = 10) - -\S4method{plotFeatureSolutions}{Assignment}(assignment, feature, maxComponents = 10) -} -\arguments{ -\item{assignment}{S4 object of class Assignent} - -\item{feature}{name of feature to plot} - -\item{maxComponents}{maximum number of components to plot} -} -\description{ -Plot possible MF solutions for a given feature. -} diff --git a/man/plotNetwork.Rd b/man/plotNetwork.Rd deleted file mode 100644 index 466f2c9..0000000 --- a/man/plotNetwork.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/plotNetwork-method.R -\name{plotNetwork} -\alias{plotNetwork} -\alias{plotNetwork,Assignment-method} -\title{plotNetwork-Assignment} -\usage{ -plotNetwork(assignment, layout = "stress", rThreshold = 0.7) - -\S4method{plotNetwork}{Assignment}(assignment, layout = "stress", rThreshold = 0.7) -} -\arguments{ -\item{assignment}{of class Assignment} - -\item{layout}{graph layout to use. See \code{\link[ggraph]{ggraph}} for layout options} - -\item{rThreshold}{r threhold to use for filtering edge correlation weights} -} -\description{ -plot assignment network -} diff --git a/man/plotSpectrum.Rd b/man/plotSpectrum.Rd deleted file mode 100644 index 50311d8..0000000 --- a/man/plotSpectrum.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/plotSpectrum-method.R -\name{plotSpectrum} -\alias{plotSpectrum} -\alias{plotSpectrum,Assignment-method} -\title{plotSpectrum} -\usage{ -plotSpectrum(assignment, MF) - -\S4method{plotSpectrum}{Assignment}(assignment, MF) -} -\arguments{ -\item{assignment}{S4 object of class Assignment} - -\item{MF}{molecular formula} -} -\description{ -Plot a spectrum for a given molecular formula -} diff --git a/man/plotting.Rd b/man/plotting.Rd new file mode 100644 index 0000000..0fb0c95 --- /dev/null +++ b/man/plotting.Rd @@ -0,0 +1,117 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotAdductDist.R, R/plot_components.R, +% R/plotSpectrum.R +\name{plotAdductDist} +\alias{plotAdductDist} +\alias{plotAdductDist,Assignment-method} +\alias{plotComponent} +\alias{plotComponent,Assignment-method} +\alias{plotFeatureComponents} +\alias{plotFeatureComponents,Assignment-method} +\alias{plotSpectrum} +\alias{plotSpectrum,Assignment-method} +\title{Plot assignment results} +\usage{ +plotAdductDist(assignment) + +\S4method{plotAdductDist}{Assignment}(assignment) + +plotComponent( + assignment, + component, + iteration, + type = c("selected", "all"), + label_size = 3, + axis_offset = 0.1, + border = NA, + highlight = NA +) + +\S4method{plotComponent}{Assignment}( + assignment, + component, + iteration, + type = c("selected", "all"), + label_size = 3, + axis_offset = 0.1, + border = NA, + highlight = NA +) + +plotFeatureComponents( + assignment, + feature, + iteration, + type = c("all", "selected"), + max_components = 6, + label_size = 3, + axis_offset = 0.1 +) + +\S4method{plotFeatureComponents}{Assignment}( + assignment, + feature, + iteration, + type = c("all", "selected"), + max_components = 6, + label_size = 2, + axis_offset = 0.05 +) + +plotSpectrum(assignment, MF) + +\S4method{plotSpectrum}{Assignment}(assignment, MF) +} +\arguments{ +\item{assignment}{an object of S4 class Assignment} + +\item{component}{component number to plot} + +\item{iteration}{the assignment iteration of the component or components} + +\item{type}{the graph type to return. \code{selected} returns the assignment graph after component selection. \code{all} returns all assignment components.} + +\item{label_size}{node label size} + +\item{axis_offset}{axis proportion by which to increase axis limits. Prevents cut off of node labels.} + +\item{border}{specify a plot border colour} + +\item{highlight}{specify a feature node to highlight} + +\item{feature}{the \emph{m/z} feature to plot} + +\item{max_components}{themaximum number of components to plot} + +\item{MF}{the assigned molecular formula to plot} +} +\description{ +Plot molecular formula assignment results. +} +\details{ +\itemize{ +\item \code{plotComponent} - Plot a molecular formula component graph. +\item \code{plotFeatureComponents} - Plot the possible component graphs for a given feature. +\item \code{plotAdductDist} - Plot frequency distributions of the assigned adducts. +\item \code{plotSpectrum} - Plot the spectrum of an assigned molecular formula. +} +} +\examples{ +library(ggraph) +plan(future::sequential) +p <- assignmentParameters('FIE-HRMS') + +mf_assignments <- assignMFs(feature_data,p) + +## Plot a component +plotComponent(mf_assignments,1,'A&I1') + +## Plot the components for a feature +plotFeatureComponents(mf_assignments,'n191.01962','A&I1') + +## Plot the adduct distributions +plotAdductDist(mf_assignments) + +## Plot the spectrum of an assigned molecular formula +plotSpectrum(mf_assignments,'C6H8O7') +} diff --git a/man/reexports.Rd b/man/reexports.Rd index b2a7ed3..850f1fe 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -4,6 +4,7 @@ \name{reexports} \alias{reexports} \alias{plan} +\alias{\%>\%} \title{Objects exported from other packages} \keyword{internal} \description{ @@ -12,5 +13,7 @@ below to see their documentation. \describe{ \item{future}{\code{\link[future]{plan}}} + + \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}} }} diff --git a/man/show-Assignment-method.Rd b/man/show-Assignment-method.Rd deleted file mode 100644 index 3614a72..0000000 --- a/man/show-Assignment-method.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/show-method.R -\name{show,Assignment-method} -\alias{show,Assignment-method} -\title{show-Assignment} -\usage{ -\S4method{show}{Assignment}(object) -} -\arguments{ -\item{object}{S4 object of class Assignment} -} -\description{ -show mehtod for Assignment class. -} diff --git a/man/show-AssignmentParameters-method.Rd b/man/show-AssignmentParameters-method.Rd deleted file mode 100644 index e27905e..0000000 --- a/man/show-AssignmentParameters-method.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/show-method.R -\name{show,AssignmentParameters-method} -\alias{show,AssignmentParameters-method} -\title{show-AssignmentParameters} -\usage{ -\S4method{show}{AssignmentParameters}(object) -} -\arguments{ -\item{object}{S4 object of class AssignmentParameters} -} -\description{ -show method for AssignmentParameters class. -} diff --git a/man/summariseAssignment.Rd b/man/summariseAssignment.Rd deleted file mode 100644 index 43dd149..0000000 --- a/man/summariseAssignment.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allGenerics.R, R/summariseAssignment-method.R -\name{summariseAssignment} -\alias{summariseAssignment} -\alias{summariseAssignment,Assignment-method} -\title{summariseAssignment-Assignment} -\usage{ -summariseAssignment(assignment) - -\S4method{summariseAssignment}{Assignment}(assignment) -} -\arguments{ -\item{assignment}{S4 object of class Assignment} -} -\description{ -Summarise features assigned to moleuclar formulas. -} diff --git a/tests/testthat.R b/tests/testthat.R index ff7878c..4c9e137 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,4 @@ library(testthat) -library(MFassign) +library(assignments) -test_check("MFassign") +test_check("assignments") diff --git a/tests/testthat/test-MFgen.R b/tests/testthat/test-MFgen.R deleted file mode 100644 index 2da4bc0..0000000 --- a/tests/testthat/test-MFgen.R +++ /dev/null @@ -1,14 +0,0 @@ - -context('MFgen') - -test_that('MFgen returns correctly',{ - res <- MFassign:::MFgen(117.07898,118.08626) - - expect_false(F %in% (class(res) == c('tbl_df','tbl','data.frame'))) - expect_false(F %in% (colnames(res) == c('MF','Theoretical M','PPM Error','Measured M','Measured m/z'))) - expect_true(class(res$MF) == 'character') - expect_true(class(res$`Theoretical M`) == 'numeric') - expect_true(class(res$`PPM Error`) == 'numeric') - expect_true(class(res$`Measured M`) == 'numeric') - expect_true(class(res$`Measured m/z`) == 'numeric') -}) \ No newline at end of file diff --git a/tests/testthat/test-MFscore.R b/tests/testthat/test-MFscore.R deleted file mode 100644 index ab558be..0000000 --- a/tests/testthat/test-MFscore.R +++ /dev/null @@ -1,7 +0,0 @@ - -context('MFscore') - -test_that('MFscore returns correctly',{ - res <- MFassign:::MFscore('C4H6O5PS') - expect_true(class(res) == 'numeric') -}) \ No newline at end of file diff --git a/tests/testthat/test-assignMFs.R b/tests/testthat/test-assignMFs.R deleted file mode 100644 index d222963..0000000 --- a/tests/testthat/test-assignMFs.R +++ /dev/null @@ -1,14 +0,0 @@ - -context('assignMFs') - -p <- assignmentParameters('FIE') - -plan(future::multisession,workers = 2) - -assignment <- assignMFs(peakData, - p, - verbose = TRUE) - -test_that('assignMFs works',{ - expect_s4_class(assignment,"Assignment") -}) \ No newline at end of file diff --git a/tests/testthat/test-assignment.R b/tests/testthat/test-assignment.R new file mode 100644 index 0000000..e44bd8b --- /dev/null +++ b/tests/testthat/test-assignment.R @@ -0,0 +1,165 @@ + +assignment_parameters_FIE <- assignmentParameters() +assignment_parameters_LC <- assignmentParameters('RP-LC-HRMS') + +test_adducts <- list( + n = c("[M-H]1-", + "[M+Cl]1-", + "[M+K-2H]1-", + "[2M-H]1-", + "[M+Cl37]1-"), + p = c("[M+H]1+")) + +adducts(assignment_parameters_FIE) <- test_adducts +adducts(assignment_parameters_LC) <- test_adducts +transformations(assignment_parameters_LC) <- character() + +FIE_features <- new('Analysis') +metabolyseR::raw(FIE_features) <- metabolyseR::analysisData( + feature_data, + info = tibble::tibble( + ID = feature_data %>% + nrow() %>% + seq_len())) + +LC_features <- new('Analysis') +metabolyseR::preTreated(LC_features) <- metabolyseR::analysisData( + feature_data %>% + {magrittr::set_colnames(., + paste0(colnames(.), + '@1.00'))}, + info = tibble::tibble( + ID = feature_data %>% + nrow() %>% + seq_len())) + + +assignment_FIE <- assignMFs(FIE_features, + assignment_parameters_FIE, + type = 'raw', + verbose = TRUE) + +assignment_LC <- assignMFs(LC_features, + assignment_parameters_LC, + verbose = TRUE) + +test_that('assignment works for FIE technique',{ + expect_s4_class(assignment_FIE,"Assignment") +}) + +test_that('assignment works for LC techniques',{ + expect_s4_class(assignment_LC,"Assignment") +}) + +test_that('assignment class show method works',{ + expect_output(print(assignment_FIE), + 'Assignment:') +}) + +test_that('assignment data can be returned',{ + expect_s3_class(featureData(assignment_FIE),'tbl_df') +}) + +test_that('assignment correlations can be returned',{ + expect_s3_class(correlations(assignment_FIE),'tbl_df') +}) + +test_that('graph method throws an error if incorrect iteration specified',{ + expect_error(graph(assignment_FIE,'incorrect')) +}) + +test_that('component method throws an error if an incorrect component is specified',{ + expect_error(component(assignment_FIE,'incorrect','A&I1')) +}) + +test_that('data with assigned feature names can be returned',{ + expect_s3_class(assignedData(assignment_FIE),'tbl_df') +}) + +test_that('a summary of assignments can be returned',{ + expect_s3_class(summariseAssignments(assignment_FIE),'tbl_df') +}) + +test_that('feature components can be plotted',{ + pl <- plotFeatureComponents(assignment_FIE, + 'n191.01962', + 'A&I1') + + expect_s3_class(pl,'patchwork') +}) + +test_that('plotFeatureComponents throws an error if incorrect feature specified',{ + expect_error(plotFeatureComponents(assignment_FIE, + 'incorrect', + 'A&I1')) +}) + +test_that('plotFeatureComponents throws an error if no components are found',{ + expect_error(plotFeatureComponents(assignment_FIE,"n228.97636",'A&I1')) +}) + +test_that('a component can be plotted',{ + pl <- plotComponent(assignment_FIE, + 1, + 'A&I1') + + expect_s3_class(pl,'ggplot') +}) + +test_that('plotComponent throws an error if incorrect feature specified for highlighting',{ + expect_error(plotComponent(assignment_FIE, + 1, + 'A&I1', + highlight = 'incorrect')) +}) + +test_that('feature solutions plotting throws an error if an incorrect feature is provided',{ + expect_error(plotFeatureSolutions(assignment_FIE, + 'test')) +}) + +test_that('adduct distributions can be plotted',{ + pl <- plotAdductDist(assignment_FIE) + + expect_s3_class(pl,'patchwork') +}) + +test_that('assignment spectrum can be plotted',{ + pl <- plotSpectrum(assignment_FIE,'C6H8O7') + + expect_s3_class(pl,'ggplot') +}) + +test_that('Assignment class object can be created from a tibble',{ + expect_s4_class(assignment(feature_data, + assignment_parameters_FIE), + 'Assignment') +}) + +test_that('Assignment class object can be created from an AnalysisData class object',{ + expect_s4_class(assignment(FIE_features %>% + raw(), + assignment_parameters_FIE), + 'Assignment') +}) + +test_that('Assignment class object can be created from an Analysis class object',{ + expect_s4_class(assignment(FIE_features, + assignment_parameters_FIE, + type = 'raw'), + 'Assignment') + + expect_s4_class(assignment(LC_features, + assignment_parameters_FIE, + type = 'pre-treated'), + 'Assignment') +}) + +test_that('assignment methods error correctly when slots are empty',{ + mf_assignments <- assignment(feature_data, + assignment_parameters_FIE) + + expect_error(calcRelationships(mf_assignments)) + expect_error(addIsoAssign(mf_assignments)) + expect_error(transformationAssign(mf_assignments)) +}) diff --git a/tests/testthat/test-onload.R b/tests/testthat/test-onload.R new file mode 100644 index 0000000..cc2f286 --- /dev/null +++ b/tests/testthat/test-onload.R @@ -0,0 +1,15 @@ +test_that("Digits correctly set upon package load", { + options('digits' = 7) + + assignments:::.onLoad() + + expect_equal(getOption('digits'),10) +}) + +test_that('Digits not set upon package load if already above 10', { + options('digits' = 11) + + assignments:::.onLoad() + + expect_equal(getOption('digits'),11) +}) diff --git a/tests/testthat/test-parameters.R b/tests/testthat/test-parameters.R new file mode 100644 index 0000000..e7c26c1 --- /dev/null +++ b/tests/testthat/test-parameters.R @@ -0,0 +1,129 @@ + +p <- assignmentParameters('FIE') + +test_that("technique can be returned", { + expect_type(technique(p),'character') +}) + +test_that("correlation parameters can be returned", { + expect_type(correlationsParameters(p),'list') +}) + +test_that("correlation parameters can be set", { + new_minCoef <- 0.9 + correlationsParameters(p)$minCoef <- new_minCoef + + expect_identical(correlationsParameters(p)$minCoef,new_minCoef) +}) + +test_that("limit can be returned", { + expect_type(limit(p),'double') +}) + +test_that("limit can be set", { + new_limit <- 0.002 + limit(p) <- new_limit + + expect_identical(limit(p),new_limit) +}) + +test_that("max M can be returned", { + expect_type(maxM(p),'double') +}) + +test_that("max M can be set", { + new_maxM <- 500 + maxM(p) <- new_maxM + + expect_identical(maxM(p),new_maxM) +}) + +test_that("MF rank threshold can be returned", { + expect_type(MFrankThreshold(p),'double') +}) + +test_that("MF rank threshold can be set", { + new_MFrankThreshold <- 3 + MFrankThreshold(p) <- new_MFrankThreshold + + expect_identical(MFrankThreshold(p),new_MFrankThreshold) +}) + + +test_that("ppm can be returned", { + expect_type(ppm(p),'double') +}) + +test_that("ppm can be set", { + new_ppm <- 3 + ppm(p) <- new_ppm + + expect_identical(ppm(p),new_ppm) +}) + +test_that("adducts can be returned", { + expect_type(adducts(p),'list') +}) + +test_that("adducts can be set", { + new_adducts <- list(n = c("[M-H]1-","[M+Cl]1-"), + p = c("[M+H]1+","[M+K]1+")) + adducts(p) <- new_adducts + + expect_identical(adducts(p),new_adducts) +}) + +test_that("isotopes can be returned", { + expect_type(isotopes(p),'character') +}) + +test_that("isotopes can be set", { + new_isotopes <- '13C' + isotopes(p) <- new_isotopes + + expect_identical(isotopes(p),new_isotopes) +}) + +test_that("transformations can be returned", { + expect_type(transformations(p),'character') +}) + +test_that("transformations can be set", { + new_transformations <- "M - [O] + [NH2]" + transformations(p) <- new_transformations + + expect_identical(transformations(p),new_transformations) +}) + +test_that("adduct rules can be returned", { + expect_s3_class(adductRules(p),'tbl_df') +}) + +test_that("adducts rules can be set", { + new_adduct_rules <- mzAnnotation::adduct_rules()[1,] + adductRules(p) <- new_adduct_rules + + expect_identical(adductRules(p),new_adduct_rules) +}) + +test_that("isotope rules can be returned", { + expect_s3_class(isotopeRules(p),'tbl_df') +}) + +test_that("isotope rules can be set", { + new_isotope_rules <- mzAnnotation::isotope_rules()[1,] + isotopeRules(p) <- new_isotope_rules + + expect_identical(isotopeRules(p),new_isotope_rules) +}) + +test_that("transformation rules can be returned", { + expect_s3_class(transformationRules(p),'tbl_df') +}) + +test_that("transformation rules can be set", { + new_transformation_rules <- mzAnnotation::transformation_rules()[1,] + transformationRules(p) <- new_transformation_rules + + expect_identical(transformationRules(p),new_transformation_rules) +}) \ No newline at end of file diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/assignments.Rmd b/vignettes/assignments.Rmd new file mode 100644 index 0000000..0deb96d --- /dev/null +++ b/vignettes/assignments.Rmd @@ -0,0 +1,213 @@ +--- +title: "assignments" +output: + rmarkdown::html_vignette: + toc: true +vignette: > + %\VignetteIndexEntry{assignments} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.align = 'center' +) +``` + +## Introduction + +The *assignments* package provides an automated molecular formula assignment approach for ultra-high resolution electrospray ionisation mass spectrometry (ESI-MS) data from metabolomics experiments. +This includes data from both direct and flow injection/infusion fingerprinting as well as liquid chromatograph mass spectrometry (LC-MS) profiling analytical techniques. + +This vignette will provide a brief overview of the input data required, parameter selection, performing the assignments and assessing the results. + +Before we begin, first load the package. + +```{r setup} +library(assignments) +``` + +## Computational requirements and parallel processing + +This approach is computationally intensive so the use of high-performance computing resources is recommended. +A suggested minimum would be the use of 16 CPU workers and at least 8GB of RAM per worker (128GB total) to ensure that processing is completed in a reasonable duration. + +The parallel back-end is provided by the [*future*](https://future.futureverse.org/) package. +Information about the available parallel strategies can be found [here](https://future.futureverse.org/#controlling-how-futures-are-resolved). +This example will use a relatively tiny data set so the following example parallel options will be used: + +```{r parallel} +plan('multisession',workers = 2) +``` + +## Input data + +The requirements for data input are designed to be as simple as possible. +Input data should consist of an *m/z* by sample intensity matrix with positive an negative mode data combined where available. +The *m/z* features provided as column names should be in the form of `@`. +Ionisation mode should be given as a prefix `n` or `p` for negative or positive ionisation modes respectively. +Feature *m/z* should be provided to an accuracy of least 5 decimal places. +The retention time portion (`@`) is only required for LC-MS data and should be provided in minutes. + +It is recommended that the data undergo pre-treatment routines such as relative standard deviation filtering, imputation and/or normalisation prior to assignment. +However, this is not essential requirement and raw intensity values could also be used. + +The input data for this example is a subset from an FIE-HRMS metabolomics experiment and is shown below. +The feature intensities are total ion count (TIC) normalised. + +```{r example-data} +feature_data +``` + +## Parameters + +Default parameters for a number of techniques are provided. +The available techniques can be viewed as shown below. + +```{r techniques} +availableTechniques() +``` + +The `FIE-HRMS` fingerprinting technique parameters would also be suitable for direct injection data. +The default parameters are designed to be as widely applicable as possible and should suit many situations. +For this example we will specify the use of the `FIE-HRMS` parameters. + +```{r parameters} +parameters <- assignmentParameters('FIE-HRMS') +``` + +The parameters can then be viewed by printing the returned object. + +```{r print-parameters} +parameters +``` + +It is possible to access and set all of these parameters. +For example, the `adducts` method can be used to access the specified adducts: + +```{r extract-adducts} +adducts(parameters) +``` + +More accessor methods for assignment parameters can be viewed by running `?parameters`. + +Additional adducts, isotopes and transformations rules can also be appended to the relevant rules tables within the assignment parameters object. +See the assignment parameters documentation for more information. + +## Assignment + +To perform the molecular formula assignment we can execute the following. + +```{r assignment} +assignment <- assignMFs(feature_data, + parameters) +``` + +For an overview of the assignment results, we can print the object. + +```{r print-assignment} +assignment +``` + +## Results + +The following can be used to access the assigned *m/z* feature information. + +```{r assignment-results} +assignments(assignment) +``` + +These feature assignments can also be summarised for each molecular formula. + +```{r summarise-assignments} +summariseAssignments(assignment) +``` + +We can extract all the calculated correlations between the *m/z* features. + +```{r correlations} +correlations(assignment) +``` + +As well as all the computed mathematical adduct, isotope and transformation relationships. + +```{r relationships} +relationships(assignment) +``` + +To view all the iterations conducted during the assignment, the following can be used. + +```{r iterations} +iterations(assignment) +``` + +Information for the component subgraphs identified in an iteration can also be extracted. +These can either be the `selected` components or `all` the possible components. + +```{r components} +components(assignment, + iteration = 'A&I1', + type = 'selected') +``` + +We can extract the [*tidygraph*](https://tidygraph.data-imaginist.com/) `tbl_graph` object for a given iterations. + +```{r graph} +graph(assignment, + iteration = 'A&I1', + type = 'selected') +``` + +Along with the graph of individual component. + +```{r component} +component(assignment, + component = 1, + iteration = 'A&I1', + type = 'selected') +``` + +And all the components of a given feature. + +```{r feature-components} +featureComponents(assignment, + feature = 'n191.01962', + type = 'all') +``` + +The graph of an individual component can be visualised. +This first requires the *ggraph* package to be loaded. + +```{r plot-component,fig.height=4,fig.width=4} +library(ggraph) + +plotComponent(assignment, + component = 1, + iteration = 'A&I1', + type = 'selected') +``` + +We can also visualise the components containing a specific feature for a given iteration. + +```{r feature-solutions,fig.width=9,fig.height=7} +plotFeatureComponents( + assignment, + feature = 'n191.01962', + iteration = 'A&I1', + type = 'all', + max_components = 6, + axis_offset = 0.2 +) +``` + +Because a molecular formula ranking threshold is applied during assignment, it may also be useful to generate all the alternative molecular formulas and their rankings for a specific *m/z*, adduct and isotope using the `ipMF` function from the [*mzAnnotation*](https://aberhrml.github.io/mzAnnotation/) package. + +```{r alternative-mfs} +mzAnnotation::ipMF(191.01962, + adduct = '[M-H]1-', + isotope = NA, + ppm = 6) +```