diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index db4dceed..c21abc88 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,4 +1,4 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: config: - - {os: macOS-latest, r: 'release'} + - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} @@ -29,18 +29,21 @@ jobs: R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-pandoc@v1 + - uses: r-lib/actions/setup-pandoc@v2 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true - - uses: r-lib/actions/setup-r-dependencies@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: rcmdcheck + extra-packages: any::rcmdcheck + needs: check - - uses: r-lib/actions/check-r-package@v1 + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 63cbb18a..087f0b05 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -1,8 +1,10 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: branches: [main, master] + pull_request: + branches: [main, master] release: types: [published] workflow_dispatch: @@ -12,24 +14,33 @@ name: pkgdown jobs: pkgdown: runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-pandoc@v1 + - uses: r-lib/actions/setup-pandoc@v2 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: use-public-rspm: true - - uses: r-lib/actions/setup-r-dependencies@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: pkgdown + extra-packages: any::pkgdown, local::. needs: website - - name: Deploy package - run: | - git config --local user.name "$GITHUB_ACTOR" - git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" - Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 3c0da1c9..2c5bb502 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -1,4 +1,4 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: @@ -15,16 +15,36 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: use-public-rspm: true - - uses: r-lib/actions/setup-r-dependencies@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: covr + extra-packages: any::covr + needs: coverage - name: Test coverage - run: covr::codecov() + run: | + covr::codecov( + quiet = FALSE, + clean = FALSE, + install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + ) shell: Rscript {0} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/DESCRIPTION b/DESCRIPTION index df42262b..9b1caa94 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,9 @@ Package: metabolyseR Title: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data -Version: 0.14.10 +Version: 0.15.0 Authors@R: person("Jasen", "Finch", email = "jsf9@aber.ac.uk", role = c("aut", "cre")) Description: A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data. -URL: https://jasenfinch.github.io/metabolyseR +URL: https://jasenfinch.github.io/metabolyseR/ BugReports: https://github.com/jasenfinch/metabolyseR/issues biocViews: Metabolomics Depends: R (>= 3.4.0) @@ -38,7 +38,7 @@ Imports: Hmisc, License: GPL (>= 3) Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.3 Suggests: knitr, rmarkdown, readr, @@ -47,7 +47,7 @@ Suggests: knitr, prettydoc, covr, kableExtra -Collate: allClasses.R +Collate: all-classes.R analysis-accessors.R aggregate.R analysisData.R @@ -61,7 +61,7 @@ Collate: allClasses.R metabolyse.R metabolyseR.R modelling.R - modellingPlots.R + modelling-plots.R nlda.R occupancy.R parameters.R @@ -80,6 +80,11 @@ Collate: allClasses.R QC.R reexports.R remove.R + randomForest-permute.R + randomForest-classification.R + randomForest-regression.R + randomForest-internals.R + randomForest-unsupervised.R randomForest.R rsd.R roc.R diff --git a/NAMESPACE b/NAMESPACE index 9e3324d2..9f1a7e33 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -75,6 +75,7 @@ export(preTreatmentElements) export(preTreatmentMethods) export(preTreatmentParameters) export(predict) +export(predictions) export(proximity) export(randomForest) export(raw) @@ -138,12 +139,14 @@ importFrom(dplyr,mutate_all) importFrom(dplyr,mutate_at) importFrom(dplyr,mutate_if) importFrom(dplyr,n) +importFrom(dplyr,pull) importFrom(dplyr,relocate) importFrom(dplyr,rename) importFrom(dplyr,rename_with) importFrom(dplyr,rowwise) importFrom(dplyr,select) importFrom(dplyr,select_if) +importFrom(dplyr,slice) importFrom(dplyr,summarise) importFrom(dplyr,summarise_all) importFrom(dplyr,ungroup) @@ -152,6 +155,7 @@ importFrom(forestControl,fpr_fs) importFrom(furrr,furrr_options) importFrom(furrr,future_map) importFrom(furrr,future_map2) +importFrom(furrr,future_map_dfr) importFrom(future,plan) importFrom(ggdendro,dendro_data) importFrom(ggplot2,aes) @@ -204,6 +208,7 @@ importFrom(magrittr,set_names) importFrom(magrittr,set_rownames) importFrom(methods,"slot<-") importFrom(methods,as) +importFrom(methods,is) importFrom(methods,new) importFrom(methods,show) importFrom(methods,slot) @@ -217,10 +222,12 @@ importFrom(purrr,map_chr) importFrom(purrr,map_dbl) importFrom(purrr,map_depth) importFrom(purrr,map_df) +importFrom(purrr,map_dfr) importFrom(purrr,map_lgl) importFrom(purrr,walk) importFrom(randomForest,margin) importFrom(rlang,":=") +importFrom(rlang,squash) importFrom(rlang,sym) importFrom(stats,cmdscale) importFrom(stats,cov) @@ -243,6 +250,7 @@ importFrom(stringr,str_replace_all) importFrom(stringr,str_split) importFrom(stringr,str_split_fixed) importFrom(stringr,str_sub) +importFrom(stringr,str_to_title) importFrom(tibble,as_tibble) importFrom(tibble,deframe) importFrom(tibble,rowid_to_column) diff --git a/NEWS.md b/NEWS.md index 7336d921..94b44de7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,25 @@ +# metabolyseR 0.15.0 + +* It is now possible to specify multiple `cls` arguments to the [aggregation methods](https://jasenfinch.github.io/metabolyseR/reference/aggregate.html). + +* Correlation thresholds are now available for both coefficient and total number using the `minCoef` and `maxCor` arguments in the [`correlations()`](https://jasenfinch.github.io/metabolyseR/reference/correlations.html) method. + +* Added the [`predictions()`]() accessor method for the [`RandomForest`](https://jasenfinch.github.io/metabolyseR/reference/RandomForest-class.html) S4 class to enable the retrieval of the out of bag model response predictions. + +* The [occupancy filtering methods](https://jasenfinch.github.io/metabolyseR/reference/occupancyFilter.html) now error if the value supplied to argument `occupancy` is non-numeric. + +* Memory usage and performance improvements for the [`randomForest()`](https://jasenfinch.github.io/metabolyseR/reference/randomForest.html) method. + +* Added [`type()`](https://jasenfinch.github.io/metabolyseR/reference/modelling-accessors.html) and [`response()`](https://jasenfinch.github.io/metabolyseR/reference/modelling-accessors.html) methods for the [`Univariate`](https://jasenfinch.github.io/metabolyseR/reference/Univariate-class.html) S4 class. + +* [`plotLDA()`](https://jasenfinch.github.io/metabolyseR/reference/plotLDA.html) now returns a warning and skips plotting if an error is encountered during PC-LDA. + +* The value `pre-treated` is now the default for argument `type` in the [`Analysis`](https://jasenfinch.github.io/metabolyseR/reference/Analysis-class.html) S4 class [accessor methods](https://jasenfinch.github.io/metabolyseR/reference/analysis-accessors.html). + +* Added the `value` argument to the [`explanatoryFeatures()`](https://jasenfinch.github.io/metabolyseR/reference/modelling-accessors.html) method to allow the specification of on which importance value to apply the specified `threshold`. + +* The specified `cls` argument is now correctly displayed on the x-axis title of the resulting plots from the [`plotFeature()`](https://jasenfinch.github.io/metabolyseR/reference/plotFeature.html) method. + # metabolyseR 0.14.10 * Added the method [`predict()`](https://jasenfinch.github.io/metabolyseR/reference/predict.html) for the [`RandomForest`](https://jasenfinch.github.io/metabolyseR/reference/RandomForest-class.html) S4 class to predict model response values. diff --git a/R/QC.R b/R/QC.R index e0889efa..33e1c446 100644 --- a/R/QC.R +++ b/R/QC.R @@ -80,7 +80,7 @@ setMethod('QCimpute',signature = 'AnalysisData', dat(d)[d %>% sinfo() %>% - select(cls) %>% + select(all_of(cls)) %>% deframe() %>% {. == QCidx},] <- QC %>% dat() diff --git a/R/aggregate.R b/R/aggregate.R index 333c91dc..6bd7e2f1 100644 --- a/R/aggregate.R +++ b/R/aggregate.R @@ -2,7 +2,7 @@ #' @rdname aggregate #' @description Aggregation of sample features based on a grouping variable. #' @param d S4 object of class `AnalysisData` -#' @param cls info column to use for class data +#' @param cls info columns across which to aggregate the data #' @return An S4 object of class `AnalysisData` containing the aggregated data. #' @details #' Sample aggregation allows the electronic pooling of sample features based on a grouping variable. @@ -26,17 +26,17 @@ #' #' ## Mean aggregation #' d %>% -#' aggregateMean(cls = 'day') %>% +#' aggregateMean(cls = c('day','class')) %>% #' plotPCA(cls = 'day',ellipses = FALSE) #' #' ## Median aggregation #' d %>% -#' aggregateMedian(cls = 'day') %>% +#' aggregateMedian(cls = c('day','class')) %>% #' plotPCA(cls = 'day',ellipses = FALSE) #' #' ## Sum aggregation #' d %>% -#' aggregateSum(cls = 'day') %>% +#' aggregateSum(cls = c('day','class')) %>% #' plotPCA(cls = 'day',ellipses = FALSE) #' @export @@ -48,23 +48,7 @@ setGeneric("aggregateMean", function(d,cls = 'class') setMethod('aggregateMean',signature = 'AnalysisData', function(d,cls = 'class'){ - dat(d) <- d %>% - dat() %>% - bind_cols(select(d %>% sinfo(),Class = cls)) %>% - gather('Feature','Intensity',-Class) %>% - group_by(Class,Feature) %>% - summarise(Intensity = mean(Intensity)) %>% - ungroup() %>% - spread(Feature,Intensity) %>% - select(-Class) - - sinfo(d) <- d %>% - sinfo() %>% - select(cls) %>% - group_by_all() %>% - summarise() %>% - arrange_all() - + d <- aggregate(d,'mean',cls) return(d) } ) @@ -80,23 +64,7 @@ setGeneric("aggregateMedian", function(d,cls = 'class') setMethod('aggregateMedian',signature = 'AnalysisData', function(d, cls = 'class'){ - dat(d) <- d %>% - dat() %>% - bind_cols(select(d %>% sinfo(),Class = cls)) %>% - gather('Feature','Intensity',-Class) %>% - group_by(Class,Feature) %>% - summarise(Intensity = median(Intensity)) %>% - ungroup() %>% - spread(Feature,Intensity) %>% - select(-Class) - - sinfo(d) <- d %>% - sinfo() %>% - select(cls) %>% - group_by_all() %>% - summarise() %>% - arrange_all() - + d <- aggregate(d,'median',cls) return(d) } ) @@ -113,23 +81,36 @@ setGeneric("aggregateSum", function(d,cls = 'class') setMethod('aggregateSum',signature = 'AnalysisData', function(d,cls = 'class'){ - dat(d) <- d %>% - dat() %>% - bind_cols(select(d %>% sinfo(),Class = cls)) %>% - gather('Feature','Intensity',-Class) %>% - group_by(Class,Feature) %>% - summarise(Intensity = sum(Intensity)) %>% - ungroup() %>% - spread(Feature,Intensity) %>% - select(-Class) - - sinfo(d) <- d %>% - sinfo() %>% - select(cls) %>% - group_by_all() %>% - summarise() %>% - arrange_all() - + d <- aggregate(d,'sum',cls) return(d) } ) + +aggregate <- function(d,method,cls){ + aggregateMethod <- switch(method, + mean = mean, + median = median, + sum = sum) + + sample_info <- d %>% + sinfo() %>% + select(all_of(cls)) + + dat(d) <- d %>% + dat() %>% + bind_cols(sample_info) %>% + gather('Feature','Intensity',-all_of(cls)) %>% + group_by(across(all_of(cls)),Feature) %>% + summarise(Intensity = aggregateMethod(Intensity)) %>% + ungroup() %>% + spread(Feature,Intensity) %>% + select(-all_of(cls)) + + sinfo(d) <- sample_info %>% + group_by_all() %>% + summarise() %>% + arrange_all() + + return(d) + +} diff --git a/R/allClasses.R b/R/all-classes.R similarity index 89% rename from R/allClasses.R rename to R/all-classes.R index 6edd9228..8a69af1b 100644 --- a/R/allClasses.R +++ b/R/all-classes.R @@ -78,7 +78,7 @@ setClass('Analysis', #' @description An S4 class for random forest results and models. #' @slot type random forest type #' @slot response response variable name -#' @slot results list of measure and importance results tables +#' @slot metrics tibble of model performance metrics #' @slot predictions tibble of model observation predictions #' @slot permutations list of permutations measure and importance results tables #' @slot importances tibble of model feature importances @@ -91,12 +91,25 @@ setClass('RandomForest', slots = list( type = 'character', response = 'character', - results = 'list', + metrics = 'tbl_df', predictions = 'tbl_df', permutations = 'list', importances = 'tbl_df', proximities = 'tbl_df', models = 'list' + ), + prototype = list( + type = 'unsupervised', + response = '', + metrics = tibble(), + predictions = tibble(), + permutations = list( + metrics = tibble(), + importance = tibble() + ), + importances = tibble(), + proximities = tibble(), + models = list() ) ) diff --git a/R/analysis-accessors.R b/R/analysis-accessors.R index 54005498..4c8bad45 100644 --- a/R/analysis-accessors.R +++ b/R/analysis-accessors.R @@ -60,21 +60,27 @@ setMethod('dat',signature = 'AnalysisData', #' @rdname analysis-accessors setMethod('dat',signature = 'Analysis', - function(x, type = c('raw','pre-treated')){ + function(x, type = c('pre-treated','raw')){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'pre-treated') { - x %>% + d <- x %>% preTreated() %>% dat() - } else { - x %>% + } + + if (type == 'raw'){ + d <- x %>% raw() %>% dat() } + + return(d) } ) @@ -96,17 +102,21 @@ setMethod("dat<-",signature = 'AnalysisData', #' @rdname analysis-accessors setMethod('dat<-',signature = 'Analysis', - function(x, type = c('raw','pre-treated'), value){ + function(x, type = c('pre-treated','raw'), value){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'pre-treated'){ d <- preTreated(x) dat(d) <- value preTreated(x) <- d - } else { + } + + if (type == 'raw'){ d <- raw(x) dat(d) <- value raw(x) <- d @@ -134,21 +144,27 @@ setMethod('sinfo',signature = 'AnalysisData', #' @rdname analysis-accessors setMethod('sinfo',signature = 'Analysis', - function(x, type = c('raw','pre-treated'), value){ + function(x, type = c('pre-treated','raw'), value){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'pre-treated') { - x %>% + i <- x %>% preTreated() %>% sinfo() - } else { - x %>% + } + + if (type == 'raw'){ + i <- x %>% raw() %>% sinfo() } + + return(i) } ) @@ -171,17 +187,21 @@ setMethod('sinfo<-',signature = 'AnalysisData', #' @rdname analysis-accessors setMethod('sinfo<-',signature = 'Analysis', - function(x,type = c('raw','pre-treated'), value){ + function(x,type = c('pre-treated','raw'), value){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'pre-treated'){ d <- preTreated(x) sinfo(d) <- value preTreated(x) <- d - } else { + } + + if (type == 'raw'){ d <- raw(x) sinfo(d) <- value raw(x) <- d @@ -268,20 +288,26 @@ setMethod('features',signature = 'AnalysisData', #' @rdname analysis-accessors setMethod('features',signature = 'Analysis', - function(x,type = c('raw','pre-treated')){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + function(x,type = c('pre-treated','raw')){ + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'pre-treated') { - x %>% + f <- x %>% preTreated() %>% features() - } else { - x %>% + } + + if (type == 'raw'){ + f <- x %>% raw() %>% features() } + + return(f) }) #' @rdname analysis-accessors @@ -302,20 +328,26 @@ setMethod('nSamples',signature = 'AnalysisData', #' @rdname analysis-accessors setMethod('nSamples',signature = 'Analysis', - function(x,type = c('raw','pre-treated')){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + function(x,type = c('pre-treated','raw')){ + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'pre-treated') { - x %>% + n_samples <- x %>% preTreated() %>% nSamples() - } else { - x %>% + } + + if (type == 'raw'){ + n_samples <- x %>% raw() %>% nSamples() } + + return(n_samples) }) #' @rdname analysis-accessors @@ -337,22 +369,27 @@ setMethod('nFeatures',signature = 'AnalysisData', #' @rdname analysis-accessors setMethod('nFeatures',signature = 'Analysis', - function(x,type = c('raw','pre-treated')){ + function(x,type = c('pre-treated','raw')){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'pre-treated') { - x %>% + n_features <- x %>% preTreated() %>% nFeatures() - } else { - x %>% + } + + if (type == 'raw'){ + n_features <- x %>% raw() %>% nFeatures() } + return(n_features) }) #' @rdname analysis-accessors diff --git a/R/correlations.R b/R/correlations.R index df279c48..6f60e52b 100644 --- a/R/correlations.R +++ b/R/correlations.R @@ -5,12 +5,14 @@ #' @param method correlation method. One of `pearson` or `spearman`. #' @param pAdjustMethod p-value adjustment method. See `?p.adjust` for available methods. #' @param corPvalue p-value cut-off threshold for significance +#' @param minCoef minimum absolute correlation coefficient threshold +#' @param maxCor maximum number of returned correlations #' @param ... arguments to pass to specific method #' @return A tibble containing results of significantly correlated features. #' @details #' Correlation analyses can be used to identify associated features within data sets. #' This can be useful to identifying clusters of related features that can be used to annotate metabolites within data sets. -#' All features are compared and the returned table of correlations are p-value thresholded using the specified cut-off. +#' All features are compared and the returned table of correlations are thresholded to the specified p-value cut-off. #' @examples #' library(metaboData) #' @@ -28,11 +30,15 @@ setMethod('correlations',signature = 'AnalysisData', function(d, method = 'pearson', pAdjustMethod = 'bonferroni', - corPvalue = 0.05){ + corPvalue = 0.05, + minCoef = 0, + maxCor = Inf){ doCorrelations(d, method = method, pAdjustMethod = pAdjustMethod, - corPvalue = corPvalue) + corPvalue = corPvalue, + minCoef = minCoef, + maxCor = maxCor) }) #' @rdname correlations @@ -63,7 +69,9 @@ setMethod("correlations", signature = "Analysis", rs <- correlations(da, params$method, params$pAdjustMethod, - params$corPvalue) + params$corPvalue, + params$minCoef, + params$maxCor) d@correlations <- rs d@log$correlations <- date() @@ -87,7 +95,7 @@ setMethod("correlations", signature = "Analysis", #' @importFrom Hmisc rcorr #' @importFrom stats p.adjust na.omit -#' @importFrom dplyr filter bind_cols left_join rename select mutate distinct +#' @importFrom dplyr filter bind_cols left_join rename select mutate distinct slice #' @importFrom tidyr gather #' @importFrom tibble tibble as_tibble #' @importFrom purrr map_df @@ -95,7 +103,9 @@ setMethod("correlations", signature = "Analysis", doCorrelations <- function(d, method = 'pearson', pAdjustMethod = 'bonferroni', - corPvalue = 0.05) + corPvalue = 0.05, + minCoef = 0, + maxCor = Inf) { methods <- eval(formals(rcorr)$type) @@ -136,31 +146,45 @@ doCorrelations <- function(d, map_df(p.adjust,method = pAdjustMethod,n = nrow(.) - 1) %>% mutate(Feature1 = colnames(.)) %>% gather('Feature2','p',-Feature1) %>% + drop_na() %>% distinct() ns <- cors$n %>% as_tibble() %>% mutate(Feature1 = colnames(.)) %>% gather('Feature2','n',-Feature1) %>% + drop_na() %>% distinct() rs <- cors$r %>% as_tibble() %>% mutate(Feature1 = colnames(.)) %>% - gather('Feature2','r',-Feature1) %>% + gather('Feature2','coefficient',-Feature1) %>% + drop_na() %>% distinct() %>% - bind_cols(ps %>% select(p), - ns %>% select(n)) %>% + left_join(ps, + by = c("Feature1", "Feature2")) %>% + left_join(ns, + by = c("Feature1", "Feature2")) %>% filter(Feature1 != Feature2,p < corPvalue,n > 2) %>% - left_join(intensity, by = c('Feature1' = 'Feature')) %>% + left_join(intensity, + by = c('Feature1' = 'Feature')) %>% rename(Intensity1 = Intensity) %>% - left_join(intensity, by = c('Feature2' = 'Feature')) %>% + left_join(intensity, + by = c('Feature2' = 'Feature')) %>% rename(Intensity2 = Intensity) %>% - mutate(`|r|` = abs(r), + mutate(`|coefficient|` = abs(coefficient), log2IntensityRatio = log2(Intensity1/Intensity2)) %>% - select(Feature1,Feature2,log2IntensityRatio,r,`|r|`,p,n) %>% - na.omit() %>% - arrange(desc(`|r|`)) + select(Feature1,Feature2,log2IntensityRatio,coefficient,`|coefficient|`,p,n) %>% + arrange(desc(`|coefficient|`)) %>% + filter(`|coefficient|` >= minCoef) + + n_correlations <- nrow(rs) + + if (n_correlations > maxCor){ + rs <- rs %>% + slice(seq_len(maxCor)) + } return(rs) } diff --git a/R/info.R b/R/info.R index 969c792d..0b15236b 100644 --- a/R/info.R +++ b/R/info.R @@ -79,15 +79,19 @@ setMethod('clsAdd', setMethod('clsAdd', signature = 'Analysis', - function(d,cls,value,type = c('raw','pre-treated')){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + function(d,cls,value,type = c('pre-treated','raw')){ + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'raw'){ sl <- get('raw') `sl<-` <- get(str_c('raw','<-')) - } else { + } + + if (type == 'pre-treated'){ sl <- get('preTreated') `sl<-` <- get(str_c('preTreated','<-')) } @@ -143,15 +147,19 @@ setMethod('clsArrange', setMethod('clsArrange', signature = 'Analysis', - function(d,cls = 'class', descending = FALSE, type = c('raw','pre-treated')){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + function(d,cls = 'class', descending = FALSE, type = c('pre-treated','raw')){ + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'raw'){ sl <- get('raw') `sl<-` <- get(str_c('raw','<-')) - } else { + } + + if (type == 'pre-treated'){ sl <- get('preTreated') `sl<-` <- get(str_c('preTreated','<-')) } @@ -159,6 +167,8 @@ setMethod('clsArrange', sl(d) <- d %>% sl() %>% clsArrange(cls = cls,descending = descending) + + return(d) }) #' @rdname cls @@ -177,14 +187,18 @@ setMethod('clsAvailable',signature = 'AnalysisData',function(d){ #' @rdname cls -setMethod('clsAvailable',signature = 'Analysis',function(d,type = c('raw','pre-treated')){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) +setMethod('clsAvailable',signature = 'Analysis',function(d,type = c('pre-treated','raw')){ + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'raw'){ sl <- get('raw') - } else { + } + + if (type == 'pre-treated'){ sl <- get('preTreated') } @@ -206,7 +220,7 @@ setMethod('clsExtract', function(d, cls = 'class'){ d %>% sinfo() %>% - select(cls) %>% + select(all_of(cls)) %>% deframe() }) @@ -214,14 +228,18 @@ setMethod('clsExtract', setMethod('clsExtract', signature = 'Analysis', - function(d,cls = 'class',type = c('raw','pre-treated')){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + function(d,cls = 'class',type = c('pre-treated','raw')){ + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'raw'){ sl <- get('raw') - } else { + } + + if (type =='pre-treated'){ sl <- get('preTreated') } @@ -246,7 +264,7 @@ setMethod('clsRemove',signature = 'AnalysisData',function(d,cls){ sinfo(d) <- d %>% sinfo() %>% - select(-{{cls}}) + select(-all_of(cls)) return(d) }) @@ -255,15 +273,19 @@ setMethod('clsRemove',signature = 'AnalysisData',function(d,cls){ setMethod('clsRemove', signature = 'Analysis', - function(d,cls,type = c('raw','pre-treated')){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + function(d,cls,type = c('pre-treated','raw')){ + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'raw'){ sl <- get('raw') `sl<-` <- get(str_c('raw','<-')) - } else { + } + + if (type == 'pre-treated'){ sl <- get('preTreated') `sl<-` <- get(str_c('preTreated','<-')) } @@ -297,15 +319,19 @@ setMethod('clsRename', setMethod('clsRename', signature = 'Analysis', - function(d,cls,newName, type = c('raw','pre-treated')){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + function(d,cls,newName, type = c('pre-treated','raw')){ + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'raw'){ sl <- get('raw') `sl<-` <- get(str_c('raw','<-')) - } else { + } + + if(type == 'pre-treated'){ sl <- get('preTreated') `sl<-` <- get(str_c('preTreated','<-')) } @@ -313,6 +339,8 @@ setMethod('clsRename', sl(d) <- d %>% sl() %>% clsRename(cls = cls,newName = newName) + + return(d) }) #' @rdname cls @@ -342,15 +370,19 @@ setMethod('clsReplace', setMethod('clsReplace', signature = 'Analysis', - function(d, value, cls = 'class', type = c('raw','pre-treated')){ - type <- match.arg(type, - choices = c('raw', - 'pre-treated')) + function(d, value, cls = 'class', type = c('pre-treated','raw')){ + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'raw'){ sl <- get('raw') `sl<-` <- get(str_c('raw','<-')) - } else { + } + + if (type == 'pre-treated'){ sl <- get('preTreated') `sl<-` <- get(str_c('preTreated','<-')) } diff --git a/R/mds.R b/R/mds.R index e9c514cc..30c2044e 100644 --- a/R/mds.R +++ b/R/mds.R @@ -29,14 +29,14 @@ setMethod('mds',signature = 'RandomForest', function(x,dimensions = 2,idx = NULL){ group_vars <- switch(type(x), - classification = c('Response','Comparison'), - regression = 'Response', + classification = c('response','comparison'), + regression = 'response', unsupervised = NULL) dissimilarities <- x %>% proximity(idx = idx) %>% - mutate(across(Sample1:Sample2,as.character)) %>% - spread(Sample2,Proximity) %>% + mutate(across(sample1:sample2,as.character)) %>% + spread(sample2,proximity) %>% mutate_if(is.numeric,~ 1 - .x) mds_dimensions <- dissimilarities %>% @@ -44,23 +44,23 @@ setMethod('mds',signature = 'RandomForest', group_map(~ .x %>% select_if(is.numeric) %>% cmdscale(k = dimensions) %>% - set_colnames(str_c('Dimension ', + set_colnames(str_c('dimension ', seq_len(dimensions))) %>% as_tibble() %>% bind_cols(select_if(.x %>% ungroup(), is.character)) %>% - relocate(contains('Dimension'), + relocate(contains('dimension'), .after = last_col()), .keep = TRUE ) %>% bind_rows() %>% - rename(Sample = Sample1) + rename(sample = sample1) if (is.null(idx)){ mds_dimensions <- mds_dimensions %>% - mutate(Sample = as.numeric(Sample)) %>% - arrange(across(c(group_vars,'Sample'))) + mutate(sample = as.numeric(sample)) %>% + arrange(across(all_of(c(group_vars,'sample')))) } return(mds_dimensions) diff --git a/R/metabolyseR.R b/R/metabolyseR.R index 75389ffd..a6110325 100644 --- a/R/metabolyseR.R +++ b/R/metabolyseR.R @@ -1,11 +1,13 @@ globalVariables( - c('Response','Feature','Sample2','Comparison','Measure','Value', + c('response','feature','Sample2','comparison','Measure','value', 'Pvalue','Mean','SD','.','.estimate','Permutation','.metric', 'Class','Intensity','Feature1','p','n','Feature2','Intensity1', - 'Intensity2','log2IntensityRatio','r','ID','Sample','Rep','Count', + 'Intensity2','log2IntensityRatio','coefficient','ID','Sample','rep','Count', 'Occupancy','adjustedPvalue','adjusted.p.value','Label','response', - '-log10(p)','DF1','Sample1','Proximity','Dimension 1','Dimension 2', + '-log10(p)','DF1','sample1','Proximity','Dimension 1','Dimension 2', 'x','.level','sensitivity','specificity','Mode','RSD','Median','Colour', - 'Index','TIC','y','label','batch','correction','N','term','Metric','Frequency','|r|','idx_1','idx_2' + 'Index','TIC','y','label','batch','correction','N','term','Metric','Frequency', + '|coefficient|','idx_1','idx_2','.estimator','metric','fpr','p-value','obs','pred', + 'sample2','Feature','freq','.data' )) diff --git a/R/modelling-accessors.R b/R/modelling-accessors.R index 3d0b9f60..8e5dfb25 100644 --- a/R/modelling-accessors.R +++ b/R/modelling-accessors.R @@ -5,6 +5,7 @@ #' @param cls sample information column to use #' @param metric importance metric for which to retrieve explanatory features #' @param threshold threshold below which explanatory features are extracted +#' @param value the importance value to threshold. See the usage section for possible values for each class. #' @param idx sample information column to use for sample names. If `NULL`, the sample row number will be used. Sample names should be unique for each row of data. #' @param ... arguments to parse to method for specific class #' @section Methods: @@ -13,6 +14,7 @@ #' * `type`: Return the type of random forest analysis. #' * `response`: Return the response variable name used for a random forest analysis. #' * `metrics`: Retrieve the model performance metrics for a random forest analysis +#' * `predictions`: Retrieve the out of bag model response predictions for a random forest analysis. #' * `importanceMetrics`: Retrieve the available feature importance metrics for a random forest analysis. #' * `importance`: Retrieve feature importance results. #' * `proximity`: Retrieve the random forest sample proximities. @@ -40,6 +42,9 @@ #' ## Retrieve the model performance metrics #' metrics(rf_analysis) #' +#' ## Retrieve the out of bag model response predictions +#' predictions(rf_analysis) +#' #' ## Show the available feature importance metrics #' importanceMetrics(rf_analysis) #' @@ -50,7 +55,7 @@ #' proximity(rf_analysis) #' #' ## Retrieve the explanatory features -#' explanatoryFeatures(rf_analysis,metric = 'FalsePositiveRate',threshold = 0.05) +#' explanatoryFeatures(rf_analysis,metric = 'false_positive_rate',threshold = 0.05) #' @export setGeneric('binaryComparisons',function(x,cls = 'class') @@ -98,7 +103,7 @@ setMethod('mtry',signature = 'AnalysisData', mtry <- switch(rf_type, regression = n_features/3, classification = sqrt(n_features)) %>% - floor() %>% + {floor(.)} %>% c(.,1) %>% max() @@ -117,6 +122,12 @@ setMethod('type',signature = 'RandomForest',function(x){ x@type }) +#' @rdname modelling-accessors + +setMethod('type',signature = 'Univariate',function(x){ + x@type +}) + #' @rdname modelling-accessors #' @export @@ -130,6 +141,12 @@ setMethod('response',signature = 'RandomForest',function(x){ x@response }) +#' @rdname modelling-accessors + +setMethod('response',signature = 'Univariate',function(x){ + unique(x@results$response) +}) + #' @rdname modelling-accessors #' @export @@ -141,7 +158,14 @@ setGeneric("metrics", function(x) setMethod('metrics',signature = 'RandomForest', function(x){ - x@results$measures + + if (nrow(x@permutations$metrics) > 0){ + metrics <- metricPvals(x) + } else { + metrics <- x@metrics + } + + return(metrics) } ) @@ -182,6 +206,55 @@ setMethod('metrics',signature = 'Analysis', #' @rdname modelling-accessors #' @export +setGeneric("predictions", function(x) + standardGeneric("predictions") +) + +#' @rdname modelling-accessors + +setMethod('predictions',signature = 'RandomForest', + function(x){ + x@predictions + } +) + +#' @rdname modelling-accessors + +setMethod('predictions',signature = 'list', + function(x){ + object_classes <- x %>% + map_chr(class) + + if (FALSE %in% (object_classes == 'RandomForest')) { + message( + str_c('All objects contained within supplied list ', + 'that are not of class RandomForest will be ignored.')) + } + + x <- x[object_classes == 'RandomForest'] + + if (length(x) > 0) { + x %>% + map(predictions) %>% + bind_rows() + } else { + tibble() + } + + }) + +#' @rdname modelling-accessors + +setMethod('predictions',signature = 'Analysis', + function(x){ + x %>% + analysisResults('modelling') %>% + predictions() + }) + +#' @rdname modelling-accessors +#' @export + setGeneric("importanceMetrics", function(x) standardGeneric("importanceMetrics") ) @@ -191,7 +264,7 @@ setGeneric("importanceMetrics", function(x) setMethod('importanceMetrics',signature = 'RandomForest',function(x){ x %>% importance() %>% - .$Metric %>% + .$metric %>% unique() %>% sort() }) @@ -207,8 +280,14 @@ setGeneric("importance", function(x) setMethod('importance',signature = 'RandomForest', function(x){ - x@results$importances %>% - ungroup() + + if (nrow(x@permutations$importance) > 0){ + importance <- importancePvals(x) + } else { + importance <- x@importances + } + + return(importance) } ) @@ -264,14 +343,14 @@ setMethod('proximity',signature = 'RandomForest', function(x,idx = NULL){ group_vars <- switch(type(x), - classification = c('Response','Comparison'), - regression = 'Response', + classification = c('response','comparison'), + regression = 'response', unsupervised = NULL) %>% - c(.,'Sample1','Sample2') + c(.,'sample1','sample2') proximities <- x@proximities %>% group_by_at(group_vars) %>% - summarise(Proximity = mean(Proximity), + summarise(proximity = mean(proximity), .groups = 'drop') if (!is.null(idx)){ @@ -291,16 +370,16 @@ setMethod('proximity',signature = 'RandomForest', proximities <- proximities %>% left_join(sample_idx, - by = c('Sample1' = 'row')) %>% + by = c('sample1' = 'row')) %>% rename(idx_1 = idx) %>% left_join(sample_idx, - by = c('Sample2' = 'row')) %>% + by = c('sample2' = 'row')) %>% rename(idx_2 = idx) %>% - select(-Sample1, - -Sample2, - Sample1 = idx_1, - Sample2 = idx_2) %>% - relocate(Proximity,.after = Sample2) + select(-sample1, + -sample2, + sample1 = idx_1, + sample2 = idx_2) %>% + relocate(proximity,.after = sample2) } return(proximities) @@ -353,26 +432,39 @@ setGeneric('explanatoryFeatures', function(x,...) #' @importFrom dplyr arrange setMethod('explanatoryFeatures',signature = 'Univariate', - function(x,threshold = 0.05){ + function(x,threshold = 0.05,value = c('adjusted.p.value','p.value')){ + + value <- match.arg( + value, + choices = c('adjusted.p.value', + 'p.value')) + importance(x) %>% - filter(adjusted.p.value < threshold) %>% - arrange(adjusted.p.value) + filter(.data[[value]] < threshold) %>% + arrange(.data[[value]]) } ) #' @rdname modelling-accessors setMethod('explanatoryFeatures',signature = 'RandomForest', - function(x,metric = 'FalsePositiveRate', threshold = 0.05){ + function(x,metric = 'false_positive_rate',value = c('value','p-value','adjusted_p-value'),threshold = 0.05){ + + value <- match.arg( + value, + choices = c('value', + 'p-value', + 'adjusted_p-value') + ) typ <- type(x) if (typ %in% c('unsupervised','classification')) { - explan <- explanatoryFeaturesClassification(x,metric,threshold) + explan <- explanatoryFeaturesClassification(x,metric,value,threshold) } if (typ == 'regression') { - explan <- explanatoryFeaturesRegression(x,metric,threshold) + explan <- explanatoryFeaturesRegression(x,metric,value,threshold) } return(explan) diff --git a/R/modellingPlots.R b/R/modelling-plots.R similarity index 88% rename from R/modellingPlots.R rename to R/modelling-plots.R index ff743a04..69ee153c 100644 --- a/R/modellingPlots.R +++ b/R/modelling-plots.R @@ -29,12 +29,14 @@ setGeneric("plotImportance", function(x,...) setMethod('plotImportance',signature = 'Univariate', function(x, response = 'class',rank = TRUE,threshold = 0.05){ + current_response <- response + res <- importance(x) - available_responses <- res$Response %>% + available_responses <- res$response %>% unique() - if (!(response %in% unique(res$Response))) { + if (!(response %in% unique(res$response))) { ar <- available_responses %>% str_c('"',.,'"') %>% str_c(collapse = ', ') @@ -53,25 +55,25 @@ setMethod('plotImportance',signature = 'Univariate', } res <- res %>% - filter(Response == response) %>% + filter(response == current_response) %>% mutate(`-log10(p)` = -log10(adjusted.p.value)) pl <- res %>% - base::split(.$Comparison) %>% + base::split(.$comparison) %>% map(~{ if (isTRUE(rank)) { .x <- .x %>% arrange(`-log10(p)`) - rank <- .x$Feature + rank <- .x$feature .x <- .x %>% - mutate(Feature = factor(Feature,levels = rank)) + mutate(feature = factor(feature,levels = rank)) } - comparison <- .x$Comparison[1] + comparison <- .x$comparison[1] - ggplot(.x,aes(x = Feature,y = `-log10(p)`)) + + ggplot(.x,aes(x = feature,y = `-log10(p)`)) + geom_hline( yintercept = -log10(threshold), linetype = 2, @@ -86,7 +88,8 @@ setMethod('plotImportance',signature = 'Univariate', axis.title = element_text(face = 'bold'), plot.title = element_text(face = 'bold', hjust = 0.5)) + - labs(title = comparison) + labs(title = comparison, + x = 'Feature') }) %>% wrap_plots() + @@ -103,12 +106,14 @@ setMethod('plotImportance',signature = 'Univariate', ) #' @rdname plotImportance +#' @importFrom stringr str_to_title setMethod('plotImportance',signature = 'RandomForest', - function(x,metric = 'FalsePositiveRate',rank = TRUE){ + function(x,metric = 'false_positive_rate',rank = TRUE){ typ <- type(x) metrics <- importanceMetrics(x) + current_metric <- metric if (!(metric %in% metrics)) { @@ -121,28 +126,28 @@ setMethod('plotImportance',signature = 'RandomForest', } res <- importance(x) %>% - filter(Metric == metric) + filter(metric == current_metric) if (typ == 'classification') { pl <- res %>% - base::split(.$Comparison) %>% + base::split(.$comparison) %>% map(~{ if (isTRUE(rank)) { .x <- .x %>% - arrange(Value) + arrange(value) - rank <- .x$Feature + rank <- .x$feature .x <- .x %>% - mutate(Feature = factor(Feature,levels = rank)) + mutate(feature = factor(feature,levels = rank)) } .x <- .x %>% - spread(Metric,Value) + spread(metric,value) - comparison <- .x$Comparison[1] + comparison <- .x$comparison[1] - pl <- ggplot(.x,aes(x = Feature,y = !!sym(metric))) + + pl <- ggplot(.x,aes(x = feature,y = !!sym(metric))) + geom_point(shape = 21,alpha = 0.5,fill = ptol_pal()(1)) + theme_bw() + theme(axis.ticks.x = element_blank(), @@ -154,11 +159,17 @@ setMethod('plotImportance',signature = 'RandomForest', plot.title = element_text(face = 'bold', hjust = 0.5), plot.caption = element_text(hjust = -1)) + - labs(title = comparison) + labs(title = comparison, + x = 'Feature', + y = str_replace_all(metric,'_',' ') %>% + str_to_title()) if (typ != 'unsupervised') { pl <- pl + - labs(title = res$Response[1]) + labs(title = res$response[1], + x = 'Feature', + y = str_replace_all(metric,'_',' ') %>% + str_to_title()) } }) %>% @@ -169,18 +180,18 @@ setMethod('plotImportance',signature = 'RandomForest', d <- . if (isTRUE(rank)) { d <- d %>% - arrange(Value) + arrange(value) - rank <- d$Feature + rank <- d$feature d <- d %>% - mutate(Feature = factor(Feature,levels = rank)) + mutate(feature = factor(feature,levels = rank)) } d } %>% - spread(Metric,Value) %>% + spread(metric,value) %>% { - p <- ggplot(.,aes(x = Feature,y = !!sym(metric))) + + p <- ggplot(.,aes(x = feature,y = !!sym(metric))) + geom_point(shape = 21,alpha = 0.5,fill = ptol_pal()(1)) + theme_bw() + theme(axis.ticks.x = element_blank(), @@ -195,7 +206,10 @@ setMethod('plotImportance',signature = 'RandomForest', if (typ != 'unsupervised') { p <- p + - labs(title = res$Response[1]) + labs(title = res$response[1], + x = 'Feature', + y = str_replace_all(metric,'_',' ') %>% + str_to_title()) } p } @@ -211,7 +225,7 @@ setMethod('plotImportance',signature = 'RandomForest', setMethod('plotImportance', signature = 'list', - function(x,metric = 'FalsePositiveRate'){ + function(x,metric = 'false_positive_rate'){ object_classes <- x %>% map_chr(class) @@ -264,7 +278,7 @@ setMethod('plotMetrics',signature = 'RandomForest', response <- response(x) if (x@type == 'classification') { - pl <- ggplot(res,aes(x = .estimate,y = Comparison)) + + pl <- ggplot(res,aes(x = .estimate,y = comparison)) + geom_point(shape = 21,fill = ptol_pal()(1)) + theme_bw() + facet_wrap(~.metric) + @@ -389,16 +403,16 @@ setMethod('plotMDS', if (type(x) == 'classification') { if (!is.null(cls)) { mds_dimensions <- mds_dimensions %>% - base::split(.$Comparison) %>% + base::split(.$comparison) %>% map(~{ - comparison <- str_split(.x$Comparison[1],'~')[[1]] + comparison <- str_split(.x$comparison[1],'~')[[1]] cda <- keepClasses(x,response(x),comparison) .x %>% bind_cols(cda %>% sinfo() %>% - select(cls) %>% + select(all_of(cls)) %>% mutate_all(as.character) ) }) %>% @@ -408,13 +422,13 @@ setMethod('plotMDS', if (!is.null(label)) { mds_dimensions <- mds_dimensions %>% - base::split(.$Comparison) %>% + base::split(.$comparison) %>% map(~{ d <- . - comparison <- str_split(d$Comparison[1],'~')[[1]] + comparison <- str_split(d$comparison[1],'~')[[1]] cda <- removeClasses(x,cls,classes = sinfo(x) %>% - select(cls) %>% + select(all_of(cls)) %>% unlist() %>% unique() %>% .[!(. %in% comparison)]) @@ -434,7 +448,7 @@ setMethod('plotMDS', mds_dimensions <- mds_dimensions %>% bind_cols(x %>% sinfo() %>% - select(cls) %>% + select(all_of(cls)) %>% mutate_all(factor) ) } @@ -456,8 +470,8 @@ setMethod('plotMDS', pl <- scatterPlot( mds_dimensions, cls, - 'Dimension 1', - 'Dimension 2', + 'dimension 1', + 'dimension 2', ellipses, shape, label, @@ -470,7 +484,7 @@ setMethod('plotMDS', if (x@type == 'classification') { pl <- pl + - facet_wrap(~Comparison) + facet_wrap(~comparison) } return(pl) @@ -547,7 +561,7 @@ setMethod('plotROC',signature = 'RandomForest', preds <- roc(x) - meas <- x@results$measures %>% + meas <- x@metrics %>% filter(.metric == 'roc_auc') %>% mutate(x = 0.8, y = 0, @@ -567,11 +581,11 @@ setMethod('plotROC',signature = 'RandomForest', colour = Class)) + geom_text(data = meas,aes(x = x,y = y,label = label),size = 3) + theme_bw() + - facet_wrap(~Comparison) + + facet_wrap(~comparison) + coord_fixed() + guides( colour = guide_legend( - title = x@results$measures$Response[1])) + + title = x@metrics$response[1])) + labs(title = title) if ((preds$Class %>% unique() %>% length()) <= 12) { @@ -594,7 +608,7 @@ setMethod('plotROC',signature = 'RandomForest', data = meas, aes(x = x,y = y,label = label),size = 3) + theme_bw() + - facet_wrap(~Comparison) + + facet_wrap(~comparison) + coord_fixed() + guides(colour = guide_legend(title = 'Class')) + labs(title = title) diff --git a/R/modelling.R b/R/modelling.R index 3098716e..f9ae8080 100644 --- a/R/modelling.R +++ b/R/modelling.R @@ -78,11 +78,18 @@ setMethod('modelling',signature = 'Analysis', } ) -explanatoryFeaturesClassification <- function(x,metric,threshold){ +explanatoryFeaturesClassification <- function(x,metric,value,threshold){ + + current_metric <- metric imp <- x %>% importance() + if (value != 'value' & !'p-value' %in% colnames(imp)){ + stop('p-values unavailable for this RandomForest class object. Use argument `perm` in randomForest() to for permutation testing to generate p-values.', + call. = FALSE) + } + metrics <- importanceMetrics(x) if (!(metric %in% metrics)) { @@ -90,32 +97,39 @@ explanatoryFeaturesClassification <- function(x,metric,threshold){ metrics <- str_c('"',metrics,'"') stop( - 'Argument "metric" should be one of ', + 'Argument `metric` should be one of ', str_c(metrics,collapse = ', '), call. = FALSE) } explan <- imp %>% - filter(Metric == metric) + filter(metric == current_metric) - if (metric == 'FalsePositiveRate') { + if (metric == 'false_positive_rate' | value != 'value') { explan <- explan %>% - filter(Value < threshold) %>% - arrange(Value) + filter(.data[[value]] < threshold) %>% + arrange(!!sym(value)) } else { explan <- explan %>% - filter(Value > threshold) %>% - arrange(desc(Value)) + filter(.data[[value]] > threshold) %>% + arrange(desc(!!sym(value))) } return(explan) } -explanatoryFeaturesRegression <- function(x,metric,threshold){ +explanatoryFeaturesRegression <- function(x,metric,value,threshold){ + + current_metric <- metric imp <- x %>% importance() + if (value != 'value' & !'p-value' %in% colnames(imp)){ + stop('p-values unavailable for this RandomForest class object. Use argument `perm` in randomForest() to for permutation testing to generate p-values.', + call. = FALSE) + } + metrics <- importanceMetrics(x) if (!(metric %in% metrics)) { @@ -123,15 +137,22 @@ explanatoryFeaturesRegression <- function(x,metric,threshold){ metrics <- str_c('"',metrics,'"') stop( - 'Argument "metric" should be one of ', + 'Argument `metric` should be one of ', str_c(metrics,collapse = ', '), call. = FALSE) } - explan <- imp %>% - filter(Metric == metric) %>% - filter(Value > threshold) %>% - arrange(desc(Value)) + if (value == 'value'){ + explan <- imp %>% + filter(metric == current_metric) %>% + filter(.data[[value]] > threshold) %>% + arrange(desc(!!sym(value))) + } else { + explan <- imp %>% + filter(metric == current_metric) %>% + filter(.data[[value]] < threshold) %>% + arrange(!!sym(value)) + } return(explan) } diff --git a/R/occupancy.R b/R/occupancy.R index c8b8e429..64914e7e 100644 --- a/R/occupancy.R +++ b/R/occupancy.R @@ -41,6 +41,12 @@ setGeneric("occupancyMaximum", function(d, cls = 'class', occupancy = 2/3) setMethod('occupancyMaximum',signature = 'AnalysisData', function(d,cls = 'class', occupancy = 2/3){ + + if (!is.numeric(occupancy)){ + stop('Argument `occupancy` is non-numeric.', + call. = FALSE) + } + occ <- occupancy(d,cls) fd <- occ %>% group_by(Feature) %>% @@ -52,7 +58,7 @@ setMethod('occupancyMaximum',signature = 'AnalysisData', unique(fd$Feature)] dat(d) <- d %>% dat() %>% - select(feat) + select(all_of(feat)) return(d) } ) @@ -67,6 +73,12 @@ setGeneric("occupancyMinimum", function(d, cls = 'class', occupancy = 2/3) setMethod('occupancyMinimum',signature = 'AnalysisData', function(d,cls = 'class', occupancy = 2/3){ + + if (!is.numeric(occupancy)){ + stop('Argument `occupancy` is non-numeric.', + call. = FALSE) + } + occ <- occupancy(d,cls) fd <- occ %>% group_by(Feature) %>% @@ -78,7 +90,7 @@ setMethod('occupancyMinimum',signature = 'AnalysisData', unique(fd$Feature)] dat(d) <- d %>% dat() %>% - select(feat) + select(all_of(feat)) return(d) } ) diff --git a/R/parameters.R b/R/parameters.R index c71ac5b9..8b44c690 100644 --- a/R/parameters.R +++ b/R/parameters.R @@ -55,7 +55,9 @@ analysisParameters <- function(elements = analysisElements()){ correlations <- list( method = 'pearson', pAdjustMethod = 'bonferroni', - corPvalue = 0.05 + corPvalue = 0.05, + minCoef = 0, + maxCor = Inf ) } diff --git a/R/plotExplanatoryHeatmap.R b/R/plotExplanatoryHeatmap.R index 0f7da093..72f686f1 100644 --- a/R/plotExplanatoryHeatmap.R +++ b/R/plotExplanatoryHeatmap.R @@ -12,16 +12,16 @@ heatmapClasses <- function(pl, featureLimit){ pl %>% map(~{ - r <- . - pred <- r$Response[1] + r <- .x + pred <- r$response[1] - classes <- r$Comparison %>% + classes <- r$comparison %>% unique() %>% str_split('~') %>% unlist() %>% unique() - feat <- r$Feature %>% + feat <- r$feature %>% unique() if (length(feat) > featureLimit){ @@ -145,9 +145,9 @@ heatmapRegression <- function(pl, pl %>% map(~{ - response <- .x$Response[1] + response <- .x$response[1] - feat <- .x$Feature %>% + feat <- .x$feature %>% unique() if (length(feat) > featureLimit){ @@ -313,7 +313,7 @@ setMethod('plotExplanatoryHeatmap', } pl <- res %>% - base::split(.$Response) + base::split(.$response) if (x@type == 't-test' | x@type == 'ANOVA') { pl <- heatmapClasses( @@ -341,7 +341,7 @@ setMethod('plotExplanatoryHeatmap', featureLimit = featureLimit) } - feat <- res$Feature %>% + feat <- res$feature %>% unique() caption <- str_c( @@ -368,7 +368,7 @@ setMethod('plotExplanatoryHeatmap', setMethod('plotExplanatoryHeatmap', signature = 'RandomForest', function(x, - metric = 'FalsePositiveRate', + metric = 'false_positive_rate', threshold = 0.05, title = '', distanceMeasure = "euclidean", @@ -391,7 +391,7 @@ setMethod('plotExplanatoryHeatmap', } pl <- explan %>% - base::split(.$Response) + base::split(.$response) if (x@type == 'classification') { pl <- heatmapClasses( @@ -419,7 +419,7 @@ setMethod('plotExplanatoryHeatmap', featureLimit = featureLimit) } - feat <- explan$Feature %>% + feat <- explan$feature %>% unique() if (metric == 'FalsePositiveRate') { @@ -448,6 +448,7 @@ setMethod('plotExplanatoryHeatmap', ) #' @rdname plotExplanatoryHeatmap +#' @importFrom rlang squash setMethod('plotExplanatoryHeatmap', signature = 'list', @@ -457,6 +458,9 @@ setMethod('plotExplanatoryHeatmap', clusterMethod = 'ward.D2', featureNames = TRUE, featureLimit = Inf){ + + suppressWarnings(x <- squash(x)) + object_classes <- x %>% map_chr(class) @@ -467,13 +471,12 @@ setMethod('plotExplanatoryHeatmap', 'should be of class RandomForest or Univariate'), call. = FALSE) } - - x %>% - names() %>% + + x %>% map(~{plotExplanatoryHeatmap( - x[[.x]], + .x, threshold = threshold, - title = .x, + title = response(.x), distanceMeasure = distanceMeasure, clusterMethod = clusterMethod, featureNames = featureNames, diff --git a/R/plotFeature.R b/R/plotFeature.R index e9708d80..04df2bb4 100644 --- a/R/plotFeature.R +++ b/R/plotFeature.R @@ -9,9 +9,8 @@ #' @param type `raw` or `pre-treated` data to plot #' @param ... arguments to pass to the appropriate method #' @examples -#' library(metaboData) -#' -#' d <- analysisData(abr1$neg,abr1$fact) +#' d <- analysisData(metaboData::abr1$neg, +#' metaboData::abr1$fact) #' #' ## Plot a categorical response variable #' plotFeature(d,'N133',cls = 'day') @@ -33,6 +32,8 @@ setGeneric('plotFeature', #' @rdname plotFeature #' @importFrom ggplot2 aes geom_point theme_bw element_text guides #' @importFrom ggplot2 scale_fill_manual xlab +#' @importFrom methods is +#' @importFrom dplyr pull setMethod('plotFeature', signature = 'AnalysisData', @@ -42,6 +43,7 @@ setMethod('plotFeature', label = NULL, labelSize = 2){ + cls <- sym(cls) feat <- features(analysis) if (!feature %in% feat) { @@ -56,33 +58,33 @@ setMethod('plotFeature', i <- sinfo(analysis) i <- i %>% - select(Class = cls,Label = label) + select(!!cls,Label = label) if (!is.null(label)) { d <- d %>% bind_cols(i) %>% - gather('Feature','Intensity',-Class,-Label) %>% + gather('Feature','Intensity',-!!cls,-Label) %>% filter(Feature == feature) %>% mutate(Intensity = as.numeric(Intensity)) } else { d <- d %>% bind_cols(i) %>% - gather('Feature','Intensity',-Class) %>% + gather('Feature','Intensity',-!!cls) %>% filter(Feature == feature) %>% mutate(Intensity = as.numeric(Intensity)) } - if (class(i$Class) == 'character' | class(i$Class) == 'factor') { + if (is(pull(i,!!cls),'character') | is(pull(i,!!cls),'factor')) { classes <- d %>% - select(Class) %>% + select(!!cls) %>% unique() %>% unlist() %>% length() pl <- d %>% - ggplot(aes(x = Class,y = Intensity,group = Class)) + + ggplot(aes(x = !!cls,y = Intensity,group = !!cls)) + geom_boxplot(outlier.shape = NA,colour = 'darkgrey') + - geom_point(aes(fill = Class),shape = 21,alpha = 0.8) + + geom_point(aes(fill = !!cls),shape = 21,alpha = 0.8) + theme_bw() + ggtitle(feature) + theme(axis.title = element_text(face = 'bold'), @@ -107,11 +109,10 @@ setMethod('plotFeature', } } else { pl <- d %>% - ggplot(aes(x = Class, y = Intensity)) + + ggplot(aes(x = !!cls, y = Intensity)) + geom_point(fill = ptol_pal()(1),shape = 21) + theme_bw() + ggtitle(feature) + - xlab(cls) + theme(axis.title = element_text(face = 'bold'), plot.title = element_text(face = 'bold', hjust = 0.5), @@ -139,17 +140,22 @@ setMethod('plotFeature', cls = 'class', label = NULL, labelSize = 2, - type = 'pre-treated'){ - if (!(type %in% c('raw','pre-treated'))) { - stop( - 'Argument "type" should be one of "raw" or "pre-treated".', - call. = FALSE) - } + type = c('pre-treated', + 'raw')){ + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw' + ) + ) if (type == 'pre-treated') { d <- analysis %>% preTreated() - } else { + } + + if (type == 'raw'){ d <- analysis %>% raw() } diff --git a/R/plotLDA.R b/R/plotLDA.R index 7f427619..f866a682 100644 --- a/R/plotLDA.R +++ b/R/plotLDA.R @@ -63,8 +63,10 @@ setMethod('plotLDA', legendPosition = 'bottom', labelSize = 2){ - lda <- nlda(analysis,cls = cls,scale = scale,center = center) + lda <- try(nlda(analysis,cls = cls,scale = scale,center = center)) + + if (!is(lda,'try-error')) { tw <- lda@Tw %>% round(2) @@ -111,6 +113,9 @@ setMethod('plotLDA', } return(pl) + } else { + warning('Errors encounted in PC-LDA, skipping plotting of PC-LDA.',call. = FALSE) + } } ) @@ -130,18 +135,23 @@ setMethod('plotLDA', title = 'PC-LDA', legendPosition = 'bottom', labelSize = 2, - type = 'raw'){ + type = c('pre-treated', + 'raw')){ - if (!(type %in% c('raw','pre-treated'))) { - stop( - 'Argument "type" should be one of "raw" or "pre-treated".', - call. = FALSE) - } + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw' + ) + ) if (type == 'pre-treated') { d <- analysis %>% preTreated() - } else { + } + + if (type == 'raw'){ d <- analysis %>% raw() } diff --git a/R/plotPCA.R b/R/plotPCA.R index 3030cf7f..ca6d32e7 100644 --- a/R/plotPCA.R +++ b/R/plotPCA.R @@ -30,20 +30,20 @@ setGeneric('plotPCA', function( - analysis, - cls = 'class', - label = NULL, - scale = TRUE, - center = TRUE, - xAxis = 'PC1', - yAxis = 'PC2', - shape = FALSE, - ellipses = TRUE, - title = 'PCA', - legendPosition = 'bottom', - labelSize = 2, - ...) - standardGeneric('plotPCA')) + analysis, + cls = 'class', + label = NULL, + scale = TRUE, + center = TRUE, + xAxis = 'PC1', + yAxis = 'PC2', + shape = FALSE, + ellipses = TRUE, + title = 'PCA', + legendPosition = 'bottom', + labelSize = 2, + ...) + standardGeneric('plotPCA')) #' @rdname plotPCA #' @importFrom ggplot2 scale_shape_manual geom_hline geom_vline @@ -68,7 +68,7 @@ setMethod('plotPCA', pca <- prcomp(dat(analysis),scale. = scale,center = center) info <- sinfo(analysis) %>% - select(cls) %>% + select(all_of(cls)) %>% mutate(!!cls := factor(!!sym(cls))) var <- pca$sdev @@ -127,18 +127,23 @@ setMethod('plotPCA', title = 'PCA', legendPosition = 'bottom', labelSize = 2, - type = 'raw'){ + type = c('pre-treated', + 'raw')){ - if (!(type %in% c('raw','pre-treated'))) { - stop( - 'Argument "type" should be one of "raw" or "pre-treated".', - call. = FALSE) - } + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw' + ) + ) if (type == 'pre-treated') { d <- analysis %>% preTreated() - } else { + } + + if (type == 'raw'){ d <- analysis %>% raw() } diff --git a/R/plotSupervisedRF.R b/R/plotSupervisedRF.R index 9d31b357..caf321ee 100644 --- a/R/plotSupervisedRF.R +++ b/R/plotSupervisedRF.R @@ -65,7 +65,7 @@ setMethod('plotSupervisedRF', reps = 1, seed = seed)) - if (class(rf) != 'try-error') { + if (!is(rf,'try-error')) { pl <- plotMDS(rf, cls = cls, label = label, @@ -75,7 +75,7 @@ setMethod('plotSupervisedRF', labelSize = labelSize) + labs( caption = str_c('Margin: ', - rf@results$measures$.estimate[4] %>% + rf@metrics$.estimate[4] %>% round(3))) if (isTRUE(ROC) & rf@type == 'classification') { @@ -110,18 +110,22 @@ setMethod('plotSupervisedRF', title = '', legendPosition = 'bottom', labelSize = 2, - type = 'raw'){ + type = c('pre-treated','raw')){ - if (!(type %in% c('raw','pre-treated'))) { - stop( - 'Argument "type" should be one of "raw" or "pre-treated".', - call. = FALSE) - } + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw' + ) + ) if (type == 'pre-treated') { d <- x %>% preTreated() - } else { + } + + if (type == 'raw'){ d <- x %>% raw() } diff --git a/R/plotTIC.R b/R/plotTIC.R index 03fd5a7d..cfd4b266 100644 --- a/R/plotTIC.R +++ b/R/plotTIC.R @@ -50,7 +50,7 @@ setMethod('plotTIC',signature = 'AnalysisData', } else { d <- d %>% group_by(ID,!!sym(by)) - + } d <- d %>% @@ -113,14 +113,19 @@ setMethod('plotTIC',signature = 'Analysis', function(analysis, by = 'injOrder', colour = 'block', - type = c('raw','pre-treated')) { + type = c('pre-treated','raw')) { - type <- match.arg(type, - choices = c('raw','pre-treated')) + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw')) if (type == 'raw'){ ty <- get('raw') - } else { + } + + if (type == 'pre-treated'){ ty <- get('preTreated') } diff --git a/R/plotUnsupervisedRF.R b/R/plotUnsupervisedRF.R index f8a63cd4..4b3a25c2 100644 --- a/R/plotUnsupervisedRF.R +++ b/R/plotUnsupervisedRF.R @@ -87,18 +87,22 @@ setMethod('plotUnsupervisedRF', title = '', legendPosition = 'bottom', labelSize = 2, - type = 'raw'){ + type = c('pre-treated','raw')){ - if (!(type %in% c('raw','pre-treated'))) { - stop( - 'Argument "type" should be one of "raw" or "pre-treated".', - call. = FALSE) - } + type <- match.arg( + type, + choices = c( + 'pre-treated', + 'raw' + ) + ) if (type == 'pre-treated') { d <- x %>% preTreated() - } else { + } + + if (type == 'raw'){ d <- x %>% raw() } diff --git a/R/pre-treatment.R b/R/pre-treatment.R index 3f9db95a..6b589319 100644 --- a/R/pre-treatment.R +++ b/R/pre-treatment.R @@ -189,11 +189,11 @@ QCMethods <- function(method = NULL){ return(method) } -removeMethods <- function(method = NULL, description = FALSE){ +removeMethods <- function(method = NULL){ methods <- list( samples = removeSamples, classes = removeClasses, - features = removeFeatures + features = removeFeatures ) if (is.null(method)) { @@ -203,7 +203,7 @@ removeMethods <- function(method = NULL, description = FALSE){ stop(str_c("Remove method '", method, "' not recognised. Available methods include: ", - str_c(str_c("'",names(methods),"'"),collapse = ', '),'.')) + str_c(str_c("'",names(methods),"'"),collapse = ' '),'.')) } method <- methods[[method]] } diff --git a/R/predict.R b/R/predict.R index 0e55c097..6b6f36ed 100644 --- a/R/predict.R +++ b/R/predict.R @@ -83,27 +83,25 @@ setMethod('predict',signature = c('RandomForest','AnalysisData'), test_data <- dat(new_data) model_object_depth <- switch(type(model), - classification = 4, - regression = 3) + classification = 3, + regression = 2) model_predictions <- model@models %>% map_depth(.depth = model_object_depth, .f = ~ .x %>% { tibble( - Sample = sample_idx, - Prediction = stats::predict( + sample = sample_idx, + prediction = stats::predict( object = .x, newdata = test_data, type = type, ...)) - }) %>% - map_depth(.depth = model_object_depth - 2, - .f = ~ .x$models) + }) - column_headers <- c('Response', - 'Comparison', - 'Rep') + column_headers <- c('response', + 'comparison', + 'rep') type_column_headers <- switch( type(model), classification = column_headers, @@ -117,7 +115,7 @@ setMethod('predict',signature = c('RandomForest','AnalysisData'), } model_predictions <- model_predictions %>% - mutate(Rep = as.numeric(Rep)) + mutate(rep = as.numeric(rep)) return(model_predictions) }) diff --git a/R/randomForest-classification.R b/R/randomForest-classification.R new file mode 100644 index 00000000..9d168dbd --- /dev/null +++ b/R/randomForest-classification.R @@ -0,0 +1,243 @@ +#' @importFrom randomForest margin + +classificationPredictions <- function(model){ + suppressMessages( + tibble(sample = seq_along(model$y), + obs = model$y, + pred = model$predicted, + margin = margin(model)) %>% + bind_cols(model$votes %>% + as_tibble(.name_repair = 'minimal') %>% + mutate_all(as.numeric)) + ) +} + +classificationMetrics <- function(model){ + predictions <- model %>% + classificationPredictions() + + class_metrics <- metric_set(accuracy,kap) + + acc_kap <- predictions %>% + class_metrics(obs,estimate = pred) + + if (length(levels(predictions$obs)) > 2) { + estimate <- levels(predictions$obs) + } else { + estimate <- levels(predictions$obs)[1] + } + + roc <- predictions %>% + roc_auc(obs,estimate) + + bind_rows( + acc_kap, + roc, + tibble(.metric = 'margin', + .estimate = margin(model) %>% + mean()) + ) +} + +#' @importFrom forestControl fpr_fs + +classificationImportance <- function(model){ + model %>% + randomForest::importance() %>% + {bind_cols(tibble(feature = rownames(.)),as_tibble(.,.name_repair = 'minimal'))} %>% + left_join(fpr_fs(model),by = c('feature' = 'variable')) %>% + rename(selection_frequency = freq,false_positive_rate = fpr) %>% + gather(metric,value,-feature) +} + +#' @importFrom purrr map_dfr + +collateClassification <- function(models,results){ + suppressMessages( + models %>% + map_dfr( + ~.x %>% + map_dfr(~.x$reps %>% + map_dfr(~.x[[results]], + .id = 'rep'), + .id = 'comparison' + ), + .id = 'response' + ) + ) +} + +collateClassificationModels <- function(models){ + suppressMessages( + models %>% + map( + ~.x %>% + map(~.x$reps %>% + map(~.x$model) + ) + ) + ) +} + +#' @importFrom yardstick metric_set accuracy kap roc_auc +#' @importFrom dplyr summarise_all group_by_all n +#' @importFrom stringr str_split +#' @importFrom magrittr set_names +#' @importFrom furrr future_map_dfr + +classification <- function(x, + cls, + rf = list( + keep.forest = TRUE, + proximity = TRUE, + importance = TRUE + ), + reps = 1, + binary = FALSE, + comparisons = list(), + perm = 0, + returnModels = FALSE, + seed = 1234){ + + i <- x %>% + sinfo() %>% + select(all_of(cls)) + + clsFreq <- i %>% + group_by_all() %>% + summarise(n = n(),.groups = 'drop') + + if (any(clsFreq$n < 5)) { + clsRem <- clsFreq %>% + filter(n < 5) + + x <- x %>% + removeClasses(cls = cls,classes = clsRem %>% + select(all_of(cls)) %>% + deframe()) + + cls_list <- clsRem %>% + select(all_of(cls)) %>% + deframe() %>% + str_c('"',.,'"') %>% + str_c(.,collapse = ', ') + + warning(str_c('Classes with < 5 replicates removed: ', + cls_list), + call. = FALSE) + + i <- x %>% + sinfo() %>% + select(all_of(cls)) + } + + if (length(unique(deframe(i))) < 2) { + stop('Need at least two classes to do classification.',call. = FALSE) + } + + if (length(comparisons) > 0) { + comp <- comparisons + } else { + if (binary == TRUE) { + comp <- map(names(i),~{ + binaryComparisons(x,cls = .x) + }) %>% + set_names(names(i)) + } else { + comp <- map(i,~{unique(.) %>% + sort() %>% + str_c(collapse = '~')}) + } + } + + models <- i %>% + colnames() %>% + map(~{ + inf <- .x + + comps <- comp[[inf]] + + comps %>% + map(~{ + comparison <- str_split(.x,'~')[[1]] + + cda <- keepClasses(x,inf,classes = comparison) + + pred <- cda %>% + sinfo() %>% + select(all_of(inf)) %>% + deframe() %>% + factor() + + predFreq <- pred %>% + tibble(cls = .) %>% + group_by_all() %>% + summarise(n = n(),.groups = 'drop') + + if (length(unique(predFreq$n)) > 1) { + message( + str_c('Unbalanced classes detected. Stratifying ', + 'sample size to the smallest class size.')) + + ss <- pred %>% + table() %>% + min() %>% + rep(length(unique(pred))) + + rf <- c(rf,list(strata = pred,sampsize = ss)) + } + + models <- future_map( + 1:reps,~{ + performRF( + dat(cda), + pred, + rf, + type = 'classification', + returnModel = returnModels) + }, + .options = furrr_options(seed = seed)) %>% + set_names(1:reps) + + if (perm > 0) { + permutation_results <- permutations(cda, + inf, + rf, + perm, + type = 'classification') + } else { + permutation_results <- list() + } + + return( + list(reps = models, + permutations = permutation_results) + ) + }) %>% + set_names(comps) + }) %>% + set_names(colnames(i)) + + res <- new('RandomForest', + x, + type = 'classification', + response = cls, + metrics = collate(models,'metrics',type = 'classification') %>% + group_by(response,comparison,.metric,.estimator) %>% + summarise(.estimate = mean(.estimate), + .groups = 'drop'), + predictions = collate(models,'predictions',type = 'classification'), + importances = collate(models,'importance',type = 'classification') %>% + group_by(response,comparison,feature,metric) %>% + summarise(value = mean(value), + .groups = 'drop'), + proximities = collate(models,'proximities',type = 'classification'), + permutations = collatePermutations(models,type = 'classification')) + + + if (isTRUE(returnModels)) { + res@models <- collateModels(models,type = 'classification') + } + + return(res) +} diff --git a/R/randomForest-internals.R b/R/randomForest-internals.R new file mode 100644 index 00000000..74a6ed64 --- /dev/null +++ b/R/randomForest-internals.R @@ -0,0 +1,111 @@ + +performRF <- function(x,cls,rf,type,returnModel){ + params <- formals(randomForest::randomForest) + params$x <- x + params <- c(params,rf) + + if (!is.null(cls)) params$y <- cls + + model <- do.call(randomForest::randomForest,params) + + model_results <- list(metrics = performanceMetrics(model, + type = type), + importance = modelImportance(model,type), + predictions = modelPredictions(model,type), + proximities = modelProximities(model)) + + if (isTRUE(returnModel)) model_results <- c(model_results, + list(model = model)) + + return(model_results) +} + +performanceMetrics <- function(model,type){ + switch(type, + unsupervised = tibble(), + classification = classificationMetrics(model), + regression = regressionMetrics(model)) +} + +modelPredictions <- function(model,type){ + switch(type, + unsupervised = tibble(), + classification = classificationPredictions(model), + regression = regressionPredictions(model)) +} + +modelImportance <- function(model,type){ + switch(type, + unsupervised = classificationImportance(model), + classification = classificationImportance(model), + regression = regressionImportance(model)) +} + +modelProximities <- function(model){ + model$proximity %>% + as_tibble(.name_repair = 'minimal') %>% + mutate(sample = seq_len(nrow(.))) %>% + gather('sample2','proximity',-sample) %>% + rename(sample1 = sample) %>% + mutate(sample2 = as.numeric(sample2)) +} + +collate <- function(models,results,type){ + switch(type, + unsupervised = collateUnsupervised(models,results), + classification = collateClassification(models,results), + regression = collateRegression(models,results) + ) +} + +collateModels <- function(models,type){ + switch(type, + unsupervised = collateUnsupervisedModels(models), + classification = collateClassificationModels(models), + regression = collateRegressionModels(models)) +} + +supervised <- function(x, + cls, + rf, + reps, + binary, + comparisons, + perm, + returnModels, + seed){ + i <- x %>% + sinfo() %>% + select(all_of(cls)) + + i %>% + colnames() %>% + map(~{ + cls <- . + + pred <- i %>% + select(all_of(cls)) %>% + deframe() + + if (is.numeric(pred)) { + regression(x, + cls, + rf, + reps, + perm, + returnModels, + seed) + } else { + classification(x, + cls, + rf, + reps, + binary, + comparisons, + perm, + returnModels, + seed) + } + }) %>% + set_names(colnames(i)) +} \ No newline at end of file diff --git a/R/randomForest-permute.R b/R/randomForest-permute.R new file mode 100644 index 00000000..c86b3e89 --- /dev/null +++ b/R/randomForest-permute.R @@ -0,0 +1,204 @@ +nPerm <- function(n,k){choose(n,k) * factorial(k)} + +#' @importFrom stats runif + +permute <- function(x,cls,rf,type){ + + randomised_cls <- x %>% + sinfo() %>% + select(all_of(cls)) %>% + unlist(use.names = FALSE) %>% + sample() + + if (type == 'classification'){ + randomised_cls <- factor(randomised_cls) + rf$strata <- randomised_cls + } + + model <- performRF(dat(x), + randomised_cls, + rf, + type, + returnModel = FALSE) + + list(metrics = model$metrics, + importance = model$importance) +} + +permutations <- function(x,cls,rf,n,type){ + i <- x %>% + sinfo() %>% + select(all_of(cls)) %>% + unlist(use.names = FALSE) + + if (is.character(i) | is.factor(i)) { + i <- factor(i) + } + + if (nPerm(length(i),length(unique(i))) < n) { + n <- nPerm(length(i),length(unique(i))) + } + + permutation_results <- future_map(1:n, + ~permute(x = x, + cls = cls, + rf = rf, + type = type), + .id = 'permutation', + .options = furrr_options( + seed = runif(1) %>% + {. * 100000} %>% + round() + )) + + permutation_metrics <- permutation_results %>% + map_dfr(~.x$metrics,id = 'permutation') %>% + group_by(.metric) %>% + summarise(mean = mean(.estimate), + sd = sd(.estimate)) + + permutation_importance <- permutation_results %>% + map_dfr(~.x$importance,id = 'permutation') %>% + group_by(feature,metric) %>% + summarise(mean = mean(value), + sd = sd(value), + .groups = 'drop') + + list(metrics = permutation_metrics, + importance = permutation_importance) +} + +collatePermutations <- function(models,type){ + switch(type, + classification = collatePermutationsClassification(models), + regression = collatePermutationsRegression(models)) +} + +collatePermutationsClassification <- function(models){ + permutation_metrics <- models %>% + map_dfr( + ~.x %>% + map_dfr(~.x$permutations$metrics, + .id = 'comparison' + ), + .id = 'response' + ) + + permutation_importance <- models %>% + map_dfr( + ~.x %>% + map_dfr(~.x$permutations$importance, + .id = 'comparison' + ), + .id = 'response' + ) + + list(metrics = permutation_metrics, + importance = permutation_importance) +} + +collatePermutationsRegression <- function(models){ + permutation_metrics <- models %>% + map_dfr( + ~.x$permutations$metrics, + .id = 'response' + ) + + permutation_importance <- models %>% + map_dfr( + ~.x$permutations$importance, + .id = 'response' + ) + + list(metrics = permutation_metrics, + importance = permutation_importance) +} + +metricPvals <- function(x){ + model_type <- type(x) + + switch(model_type, + classification = classificationMetricPvals(x), + regression = regressionMetricPvals(x)) +} + +#' @importFrom stats pnorm +#' @importFrom dplyr rowwise + +classificationMetricPvals <- function(x){ + left_join(x@metrics, + x@permutations$metrics, + by = c("response", "comparison", ".metric")) %>% + mutate(`p-value` = pnorm(.estimate,mean,sd,lower.tail = FALSE)) %>% + select(-mean,-sd) +} + +regressionMetricPvals <- function(x){ + lowertail <- list(rsq = FALSE, + mae = TRUE, + mape = TRUE, + mape = TRUE, + rmse = TRUE, + ccc = FALSE) + + left_join(x@metrics, + x@permutations$metrics, + by = c("response", ".metric")) %>% + rowwise() %>% + mutate(`p-value` = pnorm(.estimate, + mean, + sd, + lower.tail = lowertail[[.metric]])) %>% + select(-mean,-sd) + +} + +importancePvals <- function(x){ + model_type <- type(x) + + switch(model_type, + classification = classificationImportancePvals(x), + regression = regressionImportancePvals(x)) +} + +classificationPtail <- function(metric){ + if (metric == 'false_positive_rate'){ + lowertail <- TRUE + } else { + lowertail <- FALSE + } + return(lowertail) +} + +classificationImportancePvals <- function(x){ + + left_join(x@importances, + x@permutations$importance, + by = c("response", "comparison", "feature",'metric')) %>% + rowwise() %>% + mutate(`p-value` = pnorm(value, + mean, + sd, + lower.tail = classificationPtail(metric)), + `adjusted_p-value` = p.adjust(`p-value`, + method = 'bonferroni', + n = nFeatures(x))) %>% + select(-mean,-sd) %>% + ungroup() +} + +regressionImportancePvals <- function(x){ + left_join(x@importances, + x@permutations$importance, + by = c("response","feature",'metric')) %>% + rowwise() %>% + mutate(`p-value` = pnorm(value, + mean, + sd, + lower.tail = FALSE), + `adjusted_p-value` = p.adjust(`p-value`, + method = 'bonferroni', + n = nFeatures(x))) %>% + select(-mean,-sd) %>% + ungroup() +} \ No newline at end of file diff --git a/R/randomForest-regression.R b/R/randomForest-regression.R new file mode 100644 index 00000000..6143451d --- /dev/null +++ b/R/randomForest-regression.R @@ -0,0 +1,117 @@ +regressionPredictions <- function(model){ + tibble(sample = seq_along(model$y), + obs = model$y, + pred = model$predicted) +} + +regressionMetrics <- function(model){ + predictions <- model %>% + regressionPredictions() + + reg_metrics <- metric_set(rsq,mae,mape,rmse,ccc) + + predictions %>% + reg_metrics(obs,estimate = pred) +} + +regressionImportance <- function(model){ + model %>% + randomForest::importance() %>% + {bind_cols(tibble(feature = rownames(.)),as_tibble(.,.name_repair = 'minimal'))} %>% + gather(metric,value,-feature) +} + +collateRegression <- function(models,results){ + models %>% + map_dfr( + ~.x$reps %>% + map_dfr(~.x[[results]], + .id = 'rep'), + .id = 'response' + ) +} + +collateRegressionModels <- function(models){ + models %>% + map( + ~.x$reps %>% + map(~.x$model) + ) +} + +#' @importFrom yardstick rsq mae mape rmse ccc + +regression <- function(x, + cls = 'class', + rf = list( + keep.forest = TRUE, + proximity = TRUE, + importance = TRUE + ), + reps = 1, + perm = 0, + returnModels = FALSE, + seed = 1234){ + i <- x %>% + sinfo() %>% + select(all_of(cls)) + + models <- i %>% + colnames() %>% + map(~{ + inf <- . + + pred <- i %>% + select(all_of(inf)) %>% + unlist(use.names = FALSE) + + models <- future_map(1:reps,~{ + performRF(dat(x), + pred, + rf, + type = 'regression', + returnModel = returnModels) + }, + .options = furrr_options(seed = seed)) %>% + set_names(1:reps) + + if (perm > 0) { + permutation_results <- permutations(x, + inf, + rf, + perm, + type = 'regression') + } else { + permutation_results <- list() + } + + return( + list(reps = models, + permutations = permutation_results) + ) + }) %>% + set_names(colnames(i)) + + res <- new('RandomForest', + x, + type = 'regression', + response = cls, + metrics = collate(models,'metrics',type = 'regression') %>% + group_by(response,.metric,.estimator) %>% + summarise(.estimate = mean(.estimate), + .groups = 'drop'), + predictions = collate(models,'predictions',type = 'regression'), + importances = collate(models,'importance',type = 'regression') %>% + group_by(response,feature,metric) %>% + summarise(value = mean(value), + .groups = 'drop'), + proximities = collate(models,'proximities',type = 'regression'), + permutations = collatePermutations(models,type = 'regression')) + + + if (isTRUE(returnModels)) { + res@models <- collateModels(models,type = 'regression') + } + + return(res) +} diff --git a/R/randomForest-unsupervised.R b/R/randomForest-unsupervised.R new file mode 100644 index 00000000..b7376939 --- /dev/null +++ b/R/randomForest-unsupervised.R @@ -0,0 +1,49 @@ + +collateUnsupervised <- function(models,result){ + models %>% + map_dfr(~.x[[result]], + .id = 'rep') +} + +collateUnsupervisedModels <- function(models){ + models %>% + map(~.x$model) +} + +unsupervised <- function(x, + rf = list( + keep.forest = TRUE, + proximity = TRUE, + importance = TRUE + ), + reps = 1, + returnModels = FALSE, + seed = 1234, + ...){ + + models <- future_map(1:reps,~{ + performRF( + dat(x), + cls = NULL, + rf = rf, + type = 'unsupervised', + returnModel = returnModels + ) + },.options = furrr_options(seed = seed)) %>% + set_names(1:reps) + + res <- new('RandomForest', + x, + type = 'unsupervised', + importances = collate(models,'importance',type = 'unsupervised') %>% + group_by(feature,metric) %>% + summarise(value = mean(value),.groups = 'drop'), + proximities = collate(models,'proximities',type = 'unsupervised')) + + + if (isTRUE(returnModels)) { + res@models <- collateModels(models,type = 'unsupervised') + } + + return(res) +} \ No newline at end of file diff --git a/R/randomForest.R b/R/randomForest.R index 209da111..1f8bcbc9 100644 --- a/R/randomForest.R +++ b/R/randomForest.R @@ -1,761 +1,4 @@ -nPerm <- function(n,k){choose(n,k) * factorial(k)} - -#' @importFrom stats runif - -permute <- function(x,cls,rf,n = 1000){ - i <- x %>% - sinfo() %>% - select(cls) %>% - unlist(use.names = FALSE) - - if (is.character(i) | is.factor(i)) { - i <- factor(i) - } - - if (nPerm(length(i),length(unique(i))) < n) { - n <- nPerm(length(i),length(unique(i))) - } - - models <- future_map(1:n,~{ - params <- formals(randomForest::randomForest) - params <- c(params,rf) - params$x <- x %>% dat() - ind <- sample(i) - params$y <- ind - params$strata <- ind - do.call(randomForest::randomForest,params) - },.options = furrr_options(seed = runif(1) %>% - {. * 100000} %>% - round())) %>% - set_names(1:n) - - return(models) -} - -importance <- function(x){ - x %>% - randomForest::importance() %>% - {bind_cols(tibble(Feature = rownames(.)),as_tibble(.,.name_repair = 'minimal'))} -} - -#' @importFrom randomForest margin -#' @importFrom stats pnorm - -classificationMeasures <- function(predictions,permutations){ - - class_metrics <- metric_set(accuracy,kap) - - meas <- predictions %>% - base::split(.$Response) %>% - map(~{ - d <- . - d %>% - base::split(.$Comparison) %>% - map(~{ - p <- . - p %>% - mutate(obs = factor(obs),pred = factor(pred)) %>% - group_by(Response,Comparison) %>% - class_metrics(obs,estimate = pred) - }) %>% - bind_rows() - }) %>% - bind_rows() %>% - bind_rows( - suppressMessages( - predictions %>% - base::split(.$Response) %>% - map(~{ - d <- . - d %>% - base::split(.$Comparison) %>% - map(~{ - p <- . - - p <- p %>% - mutate(obs = factor(obs),pred = factor(pred)) - if (length(levels(p$obs)) > 2) { - estimate <- levels(p$obs) - } else { - estimate <- levels(p$obs)[1] - } - p %>% - group_by(Response,Comparison) %>% - roc_auc(obs,estimate) - }) %>% - bind_rows() - }) %>% - bind_rows())) %>% - bind_rows(predictions %>% - group_by(Response,Comparison) %>% - summarise(.estimate = mean(margin)) %>% - mutate(.metric = 'margin')) - - if (length(permutations) > 0) { - meas <- meas %>% - left_join( - permutations$measures, - by = c("Response","Comparison", ".metric")) %>% - mutate(Pvalue = pnorm(.estimate,Mean,SD,lower.tail = FALSE)) %>% - select(-Mean,-SD) - } - - return(meas) -} - -#' @importFrom dplyr rowwise - -classificationImportance <- function(importances,permutations){ - imps <- importances %>% - group_by(Response,Comparison,Feature,Metric) %>% - summarise(Value = mean(Value)) - - if (length(permutations) > 0) { - lowertail <- list(MeanDecreaseGini = FALSE, - SelectionFrequency = FALSE, - FalsePositiveRate = TRUE) - - imps <- imps %>% - left_join( - permutations$importance, - by = c("Response","Comparison", "Feature", "Metric")) %>% - base::split(.$Metric) %>% - map(~{ - i <- . - tail <- lowertail[[i$Metric[1]]] - i %>% - rowwise() %>% - mutate(Pvalue = pnorm(Value,Mean,SD,lower.tail = tail)) %>% - ungroup() - }) %>% - bind_rows() %>% - group_by(Metric) %>% - mutate(adjustedPvalue = p.adjust(Pvalue,method = 'bonferroni')) %>% - select(-Mean,-SD) - } - - return(imps) -} - -classificationPermutationMeasures <- function(models){ - suppressWarnings({ - preds <- models %>% - map(~{ - map(.,~{ - map(.$permutations,~{ - m <- . - tibble(sample = seq_along(m$y), - obs = m$y, - pred = m$predicted, - margin = margin(m)) %>% - bind_cols(m$votes %>% - as_tibble(.name_repair = 'minimal')) - }) %>% - { - suppressMessages(bind_rows(.,.id = 'Permutation')) - } %>% - mutate(Permutation = as.numeric(Permutation)) - }) %>% - bind_rows(.id = 'Comparison') - }) %>% - bind_rows(.id = 'Response') - }) - - class_metrics <- metric_set(accuracy,kap) - - meas <- preds %>% - base::split(.$Response) %>% - map(~{ - d <- . - d %>% - base::split(.$Comparison) %>% - map(~{ - p <- . - p %>% - mutate(obs = factor(obs),pred = factor(pred)) %>% - group_by(Response,Comparison,Permutation) %>% - class_metrics(obs,estimate = pred) - }) %>% - bind_rows() - }) %>% - bind_rows() %>% - bind_rows( - suppressMessages( - preds %>% - base::split(.$Response) %>% - map(~{ - d <- . - d %>% - base::split(.$Comparison) %>% - map(~{ - p <- . - - p <- p %>% - mutate(obs = factor(obs),pred = factor(pred)) - if (length(levels(p$obs)) > 2) { - estimate <- levels(p$obs) - } else { - estimate <- levels(p$obs)[1] - } - p %>% - group_by(Response,Comparison,Permutation) %>% - roc_auc(obs,estimate) - }) %>% - bind_rows() - }) %>% - bind_rows())) %>% - bind_rows(preds %>% - group_by(Response,Comparison,Permutation) %>% - summarise(.estimate = mean(margin)) %>% - mutate(.metric = 'margin')) %>% - group_by(Response,Comparison,.metric) %>% - summarise(Mean = mean(.estimate),SD = sd(.estimate)) - - imps <- models %>% - map(~{ - map(.,~{ - map(.$permutations,~{ - m <- . - importance(m) %>% - left_join(fpr_fs(m),by = c('Feature' = 'variable')) %>% - rename(SelectionFrequency = freq,FalsePositiveRate = fpr) - }) %>% - bind_rows(.id = 'Permutation') %>% - mutate(Permutation = as.numeric(Permutation)) - }) %>% - bind_rows(.id = 'Comparison') - }) %>% - bind_rows(.id = 'Response') %>% - gather('Metric','Value',-(Response:Feature)) %>% - group_by(Response,Comparison,Feature,Metric) %>% - summarise(Mean = mean(Value),SD = sd(Value)) - - return(list(measures = meas,importance = imps)) -} - -regressionMeasures <- function(predictions,permutations){ - reg_metrics <- metric_set(rsq,mae,mape,rmse,ccc) - meas <- predictions %>% - base::split(.$Response) %>% - map(~{ - d <- . - d %>% - group_by(Response) %>% - reg_metrics(obs,estimate = pred) - }) %>% - bind_rows() - - if (length(permutations) > 0) { - lowertail <- list(rsq = FALSE, - mae = TRUE, - mape = TRUE, - mape = TRUE, - rmse = TRUE, - ccc = FALSE) - - meas <- meas %>% - left_join(permutations$measures, by = c("Response", ".metric")) %>% - rowwise() %>% - mutate(Pvalue = pnorm(.estimate, - Mean, - SD, - lower.tail = lowertail[[.metric]])) %>% - select(-Mean,-SD) - } - - return(meas) -} - -regressionImportance <- function(importances,permutations){ - imps <- importances %>% - group_by(Response,Feature,Metric) %>% - summarise(Value = mean(Value)) - - if (length(permutations) > 0) { - imps <- imps %>% - left_join(permutations$importance, - by = c("Response", "Feature", "Metric")) %>% - mutate(Pvalue = pnorm(Value,Mean,SD,lower.tail = FALSE)) %>% - group_by(Metric) %>% - mutate(adjustedPvalue = p.adjust(Pvalue,method = 'bonferroni')) %>% - select(-Mean,-SD) - } - - return(imps) -} - -regressionPermutationMeasures <- function(models){ - preds <- models %>% - map(~{ - map(.$permutations,~{ - m <- . - tibble(sample = seq_along(m$y),obs = m$y,pred = m$predicted) - }) %>% - bind_rows(.id = 'Permutation') %>% - mutate(Permutation = as.numeric(Permutation)) - }) %>% - bind_rows(.id = 'Response') - - reg_metrics <- metric_set(rsq,mae,mape,rmse,ccc) - - meas <- preds %>% - base::split(.$Response) %>% - map(~{ - d <- . - d %>% - group_by(Response,Permutation) %>% - reg_metrics(obs,estimate = pred) - }) %>% - bind_rows() %>% - group_by(Response,.metric) %>% - summarise(Mean = mean(.estimate),SD = sd(.estimate)) - - imps <- models %>% - map(~{ - map(.$permutations,~{ - m <- . - importance(m) - }) %>% - bind_rows(.id = 'Permutation') %>% - mutate(Permutation = as.numeric(Permutation)) - }) %>% - bind_rows(.id = 'Response') %>% - gather('Metric','Value',-Response,-Permutation,-Feature) %>% - group_by(Response,Feature,Metric) %>% - summarise(Mean = mean(Value),SD = sd(Value)) - - return(list(measures = meas,importance = imps)) -} - -#' @importFrom forestControl fpr_fs - -unsupervised <- function(x,rf,reps,returnModels,seed,...){ - - set.seed(seed) - - models <- future_map(1:reps,~{ - params <- formals(randomForest::randomForest) - params$x <- x %>% dat() - params <- c(params,rf) - do.call(randomForest::randomForest,params) - },.options = furrr_options(seed = seed)) %>% - set_names(1:reps) - - importances <- models %>% - map(~{ - m <- . - importance(m) %>% - left_join(fpr_fs(m),by = c('Feature' = 'variable')) %>% - rename(SelectionFrequency = freq,FalsePositiveRate = fpr) - }) %>% - bind_rows(.id = 'Rep') %>% - mutate(Rep = as.numeric(Rep)) %>% - gather('Metric','Value',-Rep,-Feature) - - proximities <- models %>% - map(.,~{.$proximity %>% - as_tibble(.name_repair = 'minimal') %>% - mutate(Sample = seq_len(nrow(.))) %>% - gather('Sample2','Proximity',-Sample) %>% - rename(Sample1 = Sample) - }) %>% - bind_rows(.id = 'Rep') %>% - mutate(Rep = as.numeric(Rep)) %>% - mutate(Sample2 = as.numeric(Sample2)) - - results <- list( - importances = importances %>% - select(-Rep) %>% - group_by(Feature,Metric) %>% - summarise(Value = mean(Value)) - ) - - res <- new('RandomForest') - res@type <- 'unsupervised' - dat(res) <- dat(x) - sinfo(res) <- sinfo(x) - res@results <- results - res@importances <- importances - res@proximities <- proximities - - if (isTRUE(returnModels)) { - res@models <- models - } - - return(list(res)) -} - -supervised <- function(x, - cls, - rf, - reps, - binary, - comparisons, - perm, - returnModels, - seed){ - i <- x %>% - sinfo() %>% - select(all_of(cls)) - - i %>% - colnames() %>% - map(~{ - cls <- . - - pred <- i %>% - select(all_of(cls)) %>% - deframe() - - if (is.numeric(pred)) { - regression(x, - cls, - rf, - reps, - perm, - returnModels, - seed) - } else { - classification(x, - cls, - rf, - reps, - binary, - comparisons, - perm, - returnModels, - seed) - } - }) %>% - set_names(colnames(i)) -} - -#' @importFrom yardstick metric_set accuracy kap roc_auc -#' @importFrom dplyr summarise_all group_by_all n -#' @importFrom stringr str_split -#' @importFrom magrittr set_names - -classification <- function(x, - cls, - rf, - reps, - binary, - comparisons, - perm, - returnModels, - seed){ - - i <- x %>% - sinfo() %>% - select(all_of(cls)) - - clsFreq <- i %>% - group_by_all() %>% - summarise(n = n(),.groups = 'drop') - - if (any(clsFreq$n < 5)) { - clsRem <- clsFreq %>% - filter(n < 5) - - x <- x %>% - removeClasses(cls = cls,classes = clsRem %>% - select(all_of(cls)) %>% - deframe()) - - cls_list <- clsRem %>% - select(all_of(cls)) %>% - deframe() %>% - str_c('"',.,'"') %>% - str_c(.,collapse = ', ') - - warning(str_c('Classes with < 5 replicates removed: ', - cls_list), - call. = FALSE) - - i <- x %>% - sinfo() %>% - select(cls) - } - - if (length(unique(deframe(i))) < 2) { - stop('Need at least two classes to do classification.',call. = FALSE) - } - - if (length(comparisons) > 0) { - comp <- comparisons - } else { - if (binary == TRUE) { - comp <- map(names(i),~{ - binaryComparisons(x,cls = .x) - }) %>% - set_names(names(i)) - } else { - comp <- map(i,~{unique(.) %>% - sort() %>% - str_c(collapse = '~')}) - } - } - - models <- i %>% - colnames() %>% - map(~{ - inf <- . - - comps <- comp[[inf]] - - comps %>% - map(~{ - comparison <- str_split(.,'~')[[1]] - - cda <- keepClasses(x,inf,classes = comparison) - - pred <- cda %>% - sinfo() %>% - select(all_of(inf)) %>% - deframe() %>% - factor() - - predFreq <- pred %>% - tibble(cls = .) %>% - group_by_all() %>% - summarise(n = n(),.groups = 'drop') - - if (length(unique(predFreq$n)) > 1) { - message( - str_c('Unbalanced classes detected. Stratifying ', - 'sample size to the smallest class size.')) - - ss <- pred %>% - table() %>% - min() %>% - rep(length(unique(pred))) - - rf <- c(rf,list(strata = pred,sampsize = ss)) - } - - set.seed(seed) - - mod <- future_map(1:reps,~{ - params <- formals(randomForest::randomForest) - params$x <- cda %>% dat() - params$y <- pred - params <- c(params,rf) - do.call(randomForest::randomForest,params) - },.options = furrr_options(seed = seed)) %>% - set_names(1:reps) - - mod <- list(models = mod) - - if (perm > 0) { - perms <- permute(x,cls,rf,n = perm) - mod <- c(mod,list(permutations = perms)) - } - - return(mod) - }) %>% - set_names(comps) - }) %>% - set_names(colnames(i)) - - suppressMessages({ - predictions <- models %>% - map(~{ - map(.x,~{ - map(.x$models,~{ - m <- .x - tibble(sample = seq_along(m$y), - obs = m$y, - pred = m$predicted, - margin = margin(m)) %>% - bind_cols(m$votes %>% - as_tibble(.name_repair = 'minimal') %>% - mutate_all(as.numeric)) - }) %>% - bind_rows(.id = 'Rep') %>% - mutate(Rep = as.numeric(Rep)) - }) %>% - bind_rows(.id = 'Comparison') - }) %>% - bind_rows(.id = 'Response') - }) - - importances <- models %>% - map(~{ - map(.,~{ - map(.$models,~{ - m <- . - importance(m) %>% - left_join(fpr_fs(m),by = c('Feature' = 'variable')) %>% - rename(SelectionFrequency = freq,FalsePositiveRate = fpr) - }) %>% - bind_rows(.id = 'Rep') %>% - mutate(Rep = as.numeric(Rep)) - }) %>% - bind_rows(.id = 'Comparison') - }) %>% - bind_rows(.id = 'Response') %>% - gather('Metric','Value',-(Response:Feature)) - - proximities <- models %>% - map(~{ - map(.,~{ - map(.$models,~{.$proximity %>% - as_tibble() %>% - mutate(Sample = seq_len(nrow(.))) %>% - gather('Sample2','Proximity',-Sample) %>% - rename(Sample1 = Sample) - }) %>% - bind_rows(.id = 'Rep') %>% - mutate(Rep = as.numeric(Rep)) - }) %>% - bind_rows(.id = 'Comparison') - }) %>% - bind_rows(.id = 'Response') %>% - mutate(Sample2 = as.numeric(Sample2)) - - if (perm > 0) { - permutations <- classificationPermutationMeasures(models) - } else { - permutations <- list() - } - - results <- list( - measures = classificationMeasures(predictions,permutations), - importances = classificationImportance(importances,permutations) - ) - - res <- new('RandomForest') - res@type <- 'classification' - res@response <- cls - dat(res) <- dat(x) - sinfo(res) <- sinfo(x) - res@results <- results - res@predictions <- predictions - res@permutations <- permutations - res@importances <- importances - res@proximities <- proximities - - if (isTRUE(returnModels)) { - res@models <- models - } - - return(res) -} - -#' @importFrom yardstick rsq mae mape rmse ccc - -regression <- function(x, - cls, - rf, - reps, - perm, - returnModels, - seed){ - i <- x %>% - sinfo() %>% - select(cls) - - models <- i %>% - colnames() %>% - map(~{ - inf <- . - - pred <- i %>% - select(inf) %>% - unlist(use.names = FALSE) - - set.seed(seed) - - mod <- future_map(1:reps,~{ - params <- formals(randomForest::randomForest) - params$x <- x %>% dat() - params$y <- pred - params <- c(params,rf) - do.call(randomForest::randomForest,params) - },.options = furrr_options(seed = seed)) %>% - set_names(1:reps) - - mod <- list(models = mod) - - if (perm > 0) { - perms <- permute(x,cls,rf,n = perm) - } else { - perms <- list() - } - - mod <- c(mod,list(permutations = perms)) - - return(mod) - }) %>% - set_names(colnames(i)) - - predictions <- models %>% - map(~{ - map(.$models,~{ - m <- . - tibble(sample = seq_along(m$y),obs = m$y,pred = m$predicted) - }) %>% - bind_rows(.id = 'Rep') %>% - mutate(Rep = as.numeric(Rep)) - }) %>% - bind_rows(.id = 'Response') - - importances <- models %>% - map(~{ - map(.$models,~{ - m <- . - importance(m) - }) %>% - bind_rows(.id = 'Rep') %>% - mutate(Rep = as.numeric(Rep)) - }) %>% - bind_rows(.id = 'Response') %>% - gather('Metric','Value',-Response,-Rep,-Feature) - - proximities <- models %>% - map(~{ - map(.$models,~{.$proximity %>% - as_tibble() %>% - mutate(Sample = seq_len(nrow(.))) %>% - gather('Sample2','Proximity',-Sample) %>% - rename(Sample1 = Sample) - }) %>% - bind_rows(.id = 'Rep') %>% - mutate(Rep = as.numeric(Rep)) - }) %>% - bind_rows(.id = 'Response') %>% - mutate(Sample2 = as.numeric(Sample2)) - - if (perm > 0) { - permutations <- regressionPermutationMeasures(models) - } else { - permutations <- list() - } - - results <- list( - measures = regressionMeasures(predictions,permutations), - importances = regressionImportance(importances,permutations) - ) - - res <- new('RandomForest') - res@type <- 'regression' - res@response <- cls - dat(res) <- dat(x) - sinfo(res) <- sinfo(x) - res@results <- results - res@predictions <- predictions - res@permutations <- permutations - res@importances <- importances - res@proximities <- proximities - - if (isTRUE(returnModels)) { - res@models <- models - } - - return(res) -} - -#' Random forest analysis +#' Random forest #' @rdname randomForest #' @description Perform random forest on an `AnalysisData` object #' @param x S4 object of class `AnalysisData` @@ -832,7 +75,7 @@ setMethod('randomForest',signature = 'AnalysisData', seed) } - if (is.null(cls) | length(cls) == 1) { + if (is.list(res) & {length(res) == 1}){ res <- res[[1]] } diff --git a/R/roc.R b/R/roc.R index 016ff219..5a19897f 100644 --- a/R/roc.R +++ b/R/roc.R @@ -30,18 +30,18 @@ setMethod('roc',signature = 'RandomForest', } roc_curves <- x@predictions %>% - group_by(Response,Comparison) %>% + group_by(response,comparison) %>% group_map(~{ .x <- .x %>% mutate(obs = factor(obs)) if (length(levels(.x$obs)) > 2) { .x %>% - group_by(Response,Comparison) %>% + group_by(response,comparison) %>% roc_curve(obs,levels(.x$obs)) } else { .x %>% - group_by(Response,Comparison) %>% + group_by(response,comparison) %>% roc_curve(obs,levels(.x$obs)[1]) } }, .keep = TRUE) %>% diff --git a/R/show-method.R b/R/show-method.R index f7ccf3b5..08d92a50 100644 --- a/R/show-method.R +++ b/R/show-method.R @@ -158,17 +158,23 @@ setMethod('show',signature = 'RandomForest', cat('Features:\t',nFeatures(object),'\n') if (object@type != 'unsupervised') { - cat('Response:\t',metrics(object) %>% - .$Response %>% - unique() %>% + cat('Response:\t',response(object) %>% str_c(collapse = ', '),'\n') } if (object@type == 'classification') { - cat('# comparisons:\t',metrics(object) %>% - .$Comparison %>% - unique() %>% - length(),'\n') + + comparisons <- metrics(object) + + if (nrow(comparisons) > 0){ + comparisons <- comparisons$comparison %>% + unique() %>% + length() + } else { + comparisons <- 0 + } + + cat('# comparisons:\t',comparisons,'\n') } cat('\n') @@ -182,12 +188,12 @@ setMethod('show',signature = 'Univariate', cat('Samples:\t',nSamples(object),'\n') cat('Features:\t',nFeatures(object),'\n') cat('Responses:\t',importance(object) %>% - .$Response %>% + .$response %>% unique() %>% str_c(collapse = ', '),'\n') if (object@type != 'linear regression') { cat('# comparisons:\t',importance(object) %>% - .$Comparison %>% + .$comparison %>% unique() %>% length(),'\n') diff --git a/R/tune.R b/R/tune.R index 2a1cc86f..d3609fff 100644 --- a/R/tune.R +++ b/R/tune.R @@ -79,10 +79,10 @@ setMethod('tune',signature = 'AnalysisData', rf = list(ntree = .x, mtry = .y)), silent = TRUE) - if (class(rf_res) == 'RandomForest'){ + if (is(rf_res,'RandomForest')){ rf_res %>% metrics() %>% - select(-Response,-.estimator,-contains('Comparison')) %>% + select(-response,-.estimator,-contains('comparison')) %>% spread(.metric,.estimate) %>% mutate(ntree = .x, mtry = .y) diff --git a/R/univariate.R b/R/univariate.R index 85a2ecd9..0f1da28d 100644 --- a/R/univariate.R +++ b/R/univariate.R @@ -112,13 +112,13 @@ setMethod('anova',signature = 'AnalysisData', map(~{ map(.,~{ map(.,tidy) %>% - bind_rows(.id = 'Feature') %>% + bind_rows(.id = 'feature') %>% mutate(adjusted.p.value = p.adjust(p.value, method = pAdjust)) }) %>% - bind_rows(.id = 'Comparison') + bind_rows(.id = 'comparison') }) %>% - bind_rows(.id = 'Response') %>% + bind_rows(.id = 'response') %>% filter(term == 'response') res <- new('Univariate') @@ -223,13 +223,13 @@ setMethod('ttest',signature = 'AnalysisData', map(~{ map(.,~{ map(.,glance) %>% - bind_rows(.id = 'Feature') %>% + bind_rows(.id = 'feature') %>% mutate(adjusted.p.value = p.adjust(p.value, method = pAdjust)) }) %>% - bind_rows(.id = 'Comparison') + bind_rows(.id = 'comparison') }) %>% - bind_rows(.id = 'Response') + bind_rows(.id = 'response') res <- new('Univariate') res@type <- 't-test' @@ -314,11 +314,11 @@ setMethod('linearRegression',signature = 'AnalysisData', map(.,~{ glance(.) }) %>% - bind_rows(.id = 'Feature') %>% + bind_rows(.id = 'feature') %>% mutate(adjusted.p.value = p.adjust(p.value, method = pAdjust)) }) %>% - bind_rows(.id = 'Response') + bind_rows(.id = 'response') res <- new('Univariate') res@type <- 'linear regression' diff --git a/README.Rmd b/README.Rmd index 5e6f1d9e..ea42b6b8 100644 --- a/README.Rmd +++ b/README.Rmd @@ -9,14 +9,15 @@ knitr::opts_chunk$set(collapse = TRUE, comment = "#>", fig.align = 'center', fig.path = "man/figures/README-", - message = FALSE) + message = FALSE, + warning = FALSE) ``` # metabolyseR [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) -[![R-CMD-check](https://github.com/jasenfinch/metabolyseR/workflows/R-CMD-check/badge.svg)](https://github.com/jasenfinch/metabolyseR/actions) +[![R-CMD-check](https://github.com/jasenfinch/metabolyseR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/jasenfinch/metabolyseR/actions/workflows/R-CMD-check.yaml) [![codecov](https://codecov.io/gh/jasenfinch/metabolyseR/branch/master/graph/badge.svg)](https://codecov.io/gh/jasenfinch/metabolyseR/branch/master) [![license](https://img.shields.io/badge/license-GNU%20GPL%20v3.0-blue.svg)](https://github.com/jasenfinch/metabolyseR/blob/master/DESCRIPTION) [![DOI](https://zenodo.org/badge/88983134.svg)](https://zenodo.org/badge/latestdoi/88983134) @@ -38,7 +39,13 @@ This package provides a tool kit of methods for metabolomics analyses that inclu The `metabolyseR` package can be installed from GitHub using the following: ```r -devtools::install_github('jasenfinch/metabolyseR',build_vignettes = TRUE) +remotes::install_github('jasenfinch/metabolyseR') +``` + +The package documentation can be browsed online at ; however, if users want to compile the vignettes locally, the following can be used. + +```r +remotes::install_github('jasenfinch/metabolyseR',build_vignettes = TRUE,dependencies = TRUE) ``` ## Learn more diff --git a/README.md b/README.md index 8e455b69..60582d11 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) -[![R-CMD-check](https://github.com/jasenfinch/metabolyseR/workflows/R-CMD-check/badge.svg)](https://github.com/jasenfinch/metabolyseR/actions) +[![R-CMD-check](https://github.com/jasenfinch/metabolyseR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/jasenfinch/metabolyseR/actions/workflows/R-CMD-check.yaml) [![codecov](https://codecov.io/gh/jasenfinch/metabolyseR/branch/master/graph/badge.svg)](https://codecov.io/gh/jasenfinch/metabolyseR/branch/master) [![license](https://img.shields.io/badge/license-GNU%20GPL%20v3.0-blue.svg)](https://github.com/jasenfinch/metabolyseR/blob/master/DESCRIPTION) [![DOI](https://zenodo.org/badge/88983134.svg)](https://zenodo.org/badge/latestdoi/88983134) @@ -23,9 +23,9 @@ release](https://img.shields.io/github/release/jasenfinch/metabolyseR.svg)](http This package provides a tool kit of methods for metabolomics analyses that includes: -- data pre-treatment -- multivariate and univariate modelling/data mining techniques -- correlation analysis +- data pre-treatment +- multivariate and univariate modelling/data mining techniques +- correlation analysis ## Installation @@ -33,7 +33,15 @@ The `metabolyseR` package can be installed from GitHub using the following: ``` r -devtools::install_github('jasenfinch/metabolyseR',build_vignettes = TRUE) +remotes::install_github('jasenfinch/metabolyseR') +``` + +The package documentation can be browsed online at +; however, if users want to +compile the vignettes locally, the following can be used. + +``` r +remotes::install_github('jasenfinch/metabolyseR',build_vignettes = TRUE,dependencies = TRUE) ``` ## Learn more @@ -170,7 +178,7 @@ anova_results <- d %>% ``` A table of the significantly explanatory features can be extracted with -a bonferroni correction adjusted p value < 0.05 using: +a bonferroni correction adjusted p value \< 0.05 using: ``` r explan_feat <- explanatoryFeatures(anova_results,threshold = 0.05) @@ -179,19 +187,20 @@ explan_feat <- explanatoryFeatures(anova_results,threshold = 0.05) ``` r explan_feat #> # A tibble: 379 × 10 -#> Response Comparison Feature term df sumsq meansq statistic p.value -#> -#> 1 day 1~2~3~4~5~H N341 respo… 5 3.88e-4 7.76e-5 137. 1.55e-46 -#> 2 day 1~2~3~4~5~H N133 respo… 5 7.00e-5 1.40e-5 126. 8.63e-45 -#> 3 day 1~2~3~4~5~H N163 respo… 5 6.01e-5 1.20e-5 117. 2.95e-43 -#> 4 day 1~2~3~4~5~H N1087 respo… 5 2.42e-6 4.84e-7 99.8 5.61e-40 -#> 5 day 1~2~3~4~5~H N171 respo… 5 2.25e-7 4.50e-8 95.7 3.84e-39 -#> 6 day 1~2~3~4~5~H N513 respo… 5 3.38e-6 6.76e-7 95.3 4.78e-39 -#> 7 day 1~2~3~4~5~H N1025 respo… 5 2.78e-6 5.56e-7 91.0 3.91e-38 -#> 8 day 1~2~3~4~5~H N342 respo… 5 3.71e-6 7.41e-7 90.3 5.32e-38 -#> 9 day 1~2~3~4~5~H N1083 respo… 5 5.11e-5 1.02e-5 89.0 1.06e-37 -#> 10 day 1~2~3~4~5~H N1085 respo… 5 1.10e-5 2.19e-6 83.4 1.92e-36 -#> # … with 369 more rows, and 1 more variable: adjusted.p.value +#> respo…¹ compa…² feature term df sumsq meansq stati…³ p.value adjust…⁴ +#> +#> 1 day 1~2~3~… N341 resp… 5 3.88e-4 7.76e-5 137. 1.55e-46 2.73e-43 +#> 2 day 1~2~3~… N133 resp… 5 7.00e-5 1.40e-5 126. 8.63e-45 1.52e-41 +#> 3 day 1~2~3~… N163 resp… 5 6.01e-5 1.20e-5 117. 2.95e-43 5.19e-40 +#> 4 day 1~2~3~… N1087 resp… 5 2.42e-6 4.84e-7 99.8 5.61e-40 9.88e-37 +#> 5 day 1~2~3~… N171 resp… 5 2.25e-7 4.50e-8 95.7 3.84e-39 6.75e-36 +#> 6 day 1~2~3~… N513 resp… 5 3.38e-6 6.76e-7 95.3 4.78e-39 8.41e-36 +#> 7 day 1~2~3~… N1025 resp… 5 2.78e-6 5.56e-7 91.0 3.91e-38 6.87e-35 +#> 8 day 1~2~3~… N342 resp… 5 3.71e-6 7.41e-7 90.3 5.32e-38 9.36e-35 +#> 9 day 1~2~3~… N1083 resp… 5 5.11e-5 1.02e-5 89.0 1.06e-37 1.87e-34 +#> 10 day 1~2~3~… N1085 resp… 5 1.10e-5 2.19e-6 83.4 1.92e-36 3.37e-33 +#> # … with 369 more rows, and abbreviated variable names ¹​response, ²​comparison, +#> # ³​statistic, ⁴​adjusted.p.value ``` The ANOVA has identified 379 features significantly explanatory over the diff --git a/inst/defaultParameters.yaml b/inst/defaultParameters.yaml index 77ac55e8..5f8a35d2 100644 --- a/inst/defaultParameters.yaml +++ b/inst/defaultParameters.yaml @@ -3,49 +3,44 @@ pre-treatment: occupancyFilter: cls: class QCidx: QC - occupancy: 0.667 + occupancy: 0.6666667 impute: cls: class QCidx: QC - occupancy: 0.667 + occupancy: 0.6666667 + parallel: variables + seed: 1234.0 RSDfilter: cls: class QCidx: QC - RSDthresh: 0.5 + RSDthresh: 50.0 removeQC: cls: class QCidx: QC - occupancyFilter: maximum: cls: class - occupancy: 0.667 - + occupancy: 0.6666667 impute: class: cls: class - occupancy: 0.667 - + occupancy: 0.6666667 + seed: 1234.0 transform: - TICnorm: - -classification: - cls: class - method: randomForest - pars: - sampling: boot - niter: 10 - nreps: 10 - strat: TRUE - -featureSelection: - method: fs.rf - cls: class - pars: - fs.rf: - nreps: 100 - + TICnorm: {} +modelling: + randomForest: + cls: class + rf: [] + reps: 1.0 + binary: no + comparisons: [] + perm: 0.0 + returnModels: no + seed: 1234.0 correlations: method: pearson pAdjustMethod: bonferroni corPvalue: 0.05 + minCoef: 0.0 + maxCor: .inf diff --git a/man/Analysis-class.Rd b/man/Analysis-class.Rd index f2b2283b..a1074cbd 100644 --- a/man/Analysis-class.Rd +++ b/man/Analysis-class.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allClasses.R +% Please edit documentation in R/all-classes.R \docType{class} \name{Analysis-class} \alias{Analysis-class} diff --git a/man/AnalysisData-class.Rd b/man/AnalysisData-class.Rd index f2fc0319..b1542b62 100644 --- a/man/AnalysisData-class.Rd +++ b/man/AnalysisData-class.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allClasses.R +% Please edit documentation in R/all-classes.R \docType{class} \name{AnalysisData-class} \alias{AnalysisData-class} diff --git a/man/AnalysisParameters-class.Rd b/man/AnalysisParameters-class.Rd index 3df60e70..2687c425 100644 --- a/man/AnalysisParameters-class.Rd +++ b/man/AnalysisParameters-class.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allClasses.R +% Please edit documentation in R/all-classes.R \docType{class} \name{AnalysisParameters-class} \alias{AnalysisParameters-class} diff --git a/man/RandomForest-class.Rd b/man/RandomForest-class.Rd index 09e1ddd0..d29107b5 100644 --- a/man/RandomForest-class.Rd +++ b/man/RandomForest-class.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allClasses.R +% Please edit documentation in R/all-classes.R \docType{class} \name{RandomForest-class} \alias{RandomForest-class} @@ -14,7 +14,7 @@ An S4 class for random forest results and models. \item{\code{response}}{response variable name} -\item{\code{results}}{list of measure and importance results tables} +\item{\code{metrics}}{tibble of model performance metrics} \item{\code{predictions}}{tibble of model observation predictions} diff --git a/man/Univariate-class.Rd b/man/Univariate-class.Rd index d43535d7..7766187c 100644 --- a/man/Univariate-class.Rd +++ b/man/Univariate-class.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/allClasses.R +% Please edit documentation in R/all-classes.R \docType{class} \name{Univariate-class} \alias{Univariate-class} diff --git a/man/aggregate.Rd b/man/aggregate.Rd index 50df4dc4..1ce71093 100644 --- a/man/aggregate.Rd +++ b/man/aggregate.Rd @@ -24,7 +24,7 @@ aggregateSum(d, cls = "class") \arguments{ \item{d}{S4 object of class \code{AnalysisData}} -\item{cls}{info column to use for class data} +\item{cls}{info columns across which to aggregate the data} } \value{ An S4 object of class \code{AnalysisData} containing the aggregated data. @@ -60,16 +60,16 @@ d \%>\% ## Mean aggregation d \%>\% - aggregateMean(cls = 'day') \%>\% + aggregateMean(cls = c('day','class')) \%>\% plotPCA(cls = 'day',ellipses = FALSE) ## Median aggregation d \%>\% - aggregateMedian(cls = 'day') \%>\% + aggregateMedian(cls = c('day','class')) \%>\% plotPCA(cls = 'day',ellipses = FALSE) ## Sum aggregation d \%>\% - aggregateSum(cls = 'day') \%>\% + aggregateSum(cls = c('day','class')) \%>\% plotPCA(cls = 'day',ellipses = FALSE) } diff --git a/man/analysis-accessors.Rd b/man/analysis-accessors.Rd index e96e0b23..799bc1e8 100644 --- a/man/analysis-accessors.Rd +++ b/man/analysis-accessors.Rd @@ -38,25 +38,25 @@ dat(x, ...) \S4method{dat}{AnalysisData}(x) -\S4method{dat}{Analysis}(x, type = c("raw", "pre-treated")) +\S4method{dat}{Analysis}(x, type = c("pre-treated", "raw")) dat(x, ...) <- value \S4method{dat}{AnalysisData}(x) <- value -\S4method{dat}{Analysis}(x, type = c("raw", "pre-treated")) <- value +\S4method{dat}{Analysis}(x, type = c("pre-treated", "raw")) <- value sinfo(x, ...) \S4method{sinfo}{AnalysisData}(x) -\S4method{sinfo}{Analysis}(x, type = c("raw", "pre-treated"), value) +\S4method{sinfo}{Analysis}(x, type = c("pre-treated", "raw"), value) sinfo(x, ...) <- value \S4method{sinfo}{AnalysisData}(x) <- value -\S4method{sinfo}{Analysis}(x, type = c("raw", "pre-treated")) <- value +\S4method{sinfo}{Analysis}(x, type = c("pre-treated", "raw")) <- value raw(x) @@ -78,19 +78,19 @@ features(x, ...) \S4method{features}{AnalysisData}(x) -\S4method{features}{Analysis}(x, type = c("raw", "pre-treated")) +\S4method{features}{Analysis}(x, type = c("pre-treated", "raw")) nSamples(x, ...) \S4method{nSamples}{AnalysisData}(x) -\S4method{nSamples}{Analysis}(x, type = c("raw", "pre-treated")) +\S4method{nSamples}{Analysis}(x, type = c("pre-treated", "raw")) nFeatures(x, ...) \S4method{nFeatures}{AnalysisData}(x) -\S4method{nFeatures}{Analysis}(x, type = c("raw", "pre-treated")) +\S4method{nFeatures}{Analysis}(x, type = c("pre-treated", "raw")) analysisResults(x, element) diff --git a/man/cls.Rd b/man/cls.Rd index 7bf0afed..23ff43bb 100644 --- a/man/cls.Rd +++ b/man/cls.Rd @@ -28,7 +28,7 @@ clsAdd(d, cls, value, ...) \S4method{clsAdd}{AnalysisData}(d, cls, value) -\S4method{clsAdd}{Analysis}(d, cls, value, type = c("raw", "pre-treated")) +\S4method{clsAdd}{Analysis}(d, cls, value, type = c("pre-treated", "raw")) clsArrange(d, cls = "class", descending = FALSE, ...) @@ -38,38 +38,38 @@ clsArrange(d, cls = "class", descending = FALSE, ...) d, cls = "class", descending = FALSE, - type = c("raw", "pre-treated") + type = c("pre-treated", "raw") ) clsAvailable(d, ...) \S4method{clsAvailable}{AnalysisData}(d) -\S4method{clsAvailable}{Analysis}(d, type = c("raw", "pre-treated")) +\S4method{clsAvailable}{Analysis}(d, type = c("pre-treated", "raw")) clsExtract(d, cls = "class", ...) \S4method{clsExtract}{AnalysisData}(d, cls = "class") -\S4method{clsExtract}{Analysis}(d, cls = "class", type = c("raw", "pre-treated")) +\S4method{clsExtract}{Analysis}(d, cls = "class", type = c("pre-treated", "raw")) clsRemove(d, cls, ...) \S4method{clsRemove}{AnalysisData}(d, cls) -\S4method{clsRemove}{Analysis}(d, cls, type = c("raw", "pre-treated")) +\S4method{clsRemove}{Analysis}(d, cls, type = c("pre-treated", "raw")) clsRename(d, cls, newName, ...) \S4method{clsRename}{AnalysisData}(d, cls, newName) -\S4method{clsRename}{Analysis}(d, cls, newName, type = c("raw", "pre-treated")) +\S4method{clsRename}{Analysis}(d, cls, newName, type = c("pre-treated", "raw")) clsReplace(d, value, cls = "class", ...) \S4method{clsReplace}{AnalysisData}(d, value, cls = "class") -\S4method{clsReplace}{Analysis}(d, value, cls = "class", type = c("raw", "pre-treated")) +\S4method{clsReplace}{Analysis}(d, value, cls = "class", type = c("pre-treated", "raw")) } \arguments{ \item{d}{S4 object of class Analysis or AnalysisData} diff --git a/man/correlations.Rd b/man/correlations.Rd index 4a2761df..efaa63bc 100644 --- a/man/correlations.Rd +++ b/man/correlations.Rd @@ -12,7 +12,9 @@ correlations(d, ...) d, method = "pearson", pAdjustMethod = "bonferroni", - corPvalue = 0.05 + corPvalue = 0.05, + minCoef = 0, + maxCor = Inf ) \S4method{correlations}{Analysis}(d) @@ -27,6 +29,10 @@ correlations(d, ...) \item{pAdjustMethod}{p-value adjustment method. See \code{?p.adjust} for available methods.} \item{corPvalue}{p-value cut-off threshold for significance} + +\item{minCoef}{minimum absolute correlation coefficient threshold} + +\item{maxCor}{maximum number of returned correlations} } \value{ A tibble containing results of significantly correlated features. @@ -37,7 +43,7 @@ Feature correlation analysis. \details{ Correlation analyses can be used to identify associated features within data sets. This can be useful to identifying clusters of related features that can be used to annotate metabolites within data sets. -All features are compared and the returned table of correlations are p-value thresholded using the specified cut-off. +All features are compared and the returned table of correlations are thresholded to the specified p-value cut-off. } \examples{ library(metaboData) diff --git a/man/figures/README-feature_plot-1.png b/man/figures/README-feature_plot-1.png index d9724559..3d94c653 100644 Binary files a/man/figures/README-feature_plot-1.png and b/man/figures/README-feature_plot-1.png differ diff --git a/man/figures/README-pca-1.png b/man/figures/README-pca-1.png index 2445ed52..57eb61ff 100644 Binary files a/man/figures/README-pca-1.png and b/man/figures/README-pca-1.png differ diff --git a/man/figures/README-supervised_RF-1.png b/man/figures/README-supervised_RF-1.png index 86497dbc..2057dc22 100644 Binary files a/man/figures/README-supervised_RF-1.png and b/man/figures/README-supervised_RF-1.png differ diff --git a/man/modelling-accessors.Rd b/man/modelling-accessors.Rd index 90565c65..871da690 100644 --- a/man/modelling-accessors.Rd +++ b/man/modelling-accessors.Rd @@ -7,12 +7,18 @@ \alias{mtry,AnalysisData-method} \alias{type} \alias{type,RandomForest-method} +\alias{type,Univariate-method} \alias{response} \alias{response,RandomForest-method} +\alias{response,Univariate-method} \alias{metrics} \alias{metrics,RandomForest-method} \alias{metrics,list-method} \alias{metrics,Analysis-method} +\alias{predictions} +\alias{predictions,RandomForest-method} +\alias{predictions,list-method} +\alias{predictions,Analysis-method} \alias{importanceMetrics} \alias{importanceMetrics,RandomForest-method} \alias{importance} @@ -43,10 +49,14 @@ type(x) \S4method{type}{RandomForest}(x) +\S4method{type}{Univariate}(x) + response(x) \S4method{response}{RandomForest}(x) +\S4method{response}{Univariate}(x) + metrics(x) \S4method{metrics}{RandomForest}(x) @@ -55,6 +65,14 @@ metrics(x) \S4method{metrics}{Analysis}(x) +predictions(x) + +\S4method{predictions}{RandomForest}(x) + +\S4method{predictions}{list}(x) + +\S4method{predictions}{Analysis}(x) + importanceMetrics(x) \S4method{importanceMetrics}{RandomForest}(x) @@ -79,9 +97,18 @@ proximity(x, idx = NULL) explanatoryFeatures(x, ...) -\S4method{explanatoryFeatures}{Univariate}(x, threshold = 0.05) +\S4method{explanatoryFeatures}{Univariate}( + x, + threshold = 0.05, + value = c("adjusted.p.value", "p.value") +) -\S4method{explanatoryFeatures}{RandomForest}(x, metric = "FalsePositiveRate", threshold = 0.05) +\S4method{explanatoryFeatures}{RandomForest}( + x, + metric = "false_positive_rate", + value = c("value", "p-value", "adjusted_p-value"), + threshold = 0.05 +) \S4method{explanatoryFeatures}{list}(x, ...) @@ -98,6 +125,8 @@ explanatoryFeatures(x, ...) \item{threshold}{threshold below which explanatory features are extracted} +\item{value}{the importance value to threshold. See the usage section for possible values for each class.} + \item{metric}{importance metric for which to retrieve explanatory features} } \description{ @@ -107,10 +136,11 @@ Methods for accessing modelling results. \itemize{ \item \code{binaryComparisons}: Return a vector of all possible binary comparisons for a given sample information column. -\item \code{mtry}: Calculate the default \code{mtry} random forest parameter value for a given sample information column. +\item \code{mtry}: Return the default \code{mtry} random forest parameter value for a given sample information column. \item \code{type}: Return the type of random forest analysis. \item \code{response}: Return the response variable name used for a random forest analysis. \item \code{metrics}: Retrieve the model performance metrics for a random forest analysis +\item \code{predictions}: Retrieve the out of bag model response predictions for a random forest analysis. \item \code{importanceMetrics}: Retrieve the available feature importance metrics for a random forest analysis. \item \code{importance}: Retrieve feature importance results. \item \code{proximity}: Retrieve the random forest sample proximities. @@ -141,6 +171,9 @@ response(rf_analysis) ## Retrieve the model performance metrics metrics(rf_analysis) +## Retrieve the out of bag model response predictions +predictions(rf_analysis) + ## Show the available feature importance metrics importanceMetrics(rf_analysis) @@ -151,5 +184,5 @@ importance(rf_analysis) proximity(rf_analysis) ## Retrieve the explanatory features -explanatoryFeatures(rf_analysis,metric = 'FalsePositiveRate',threshold = 0.05) +explanatoryFeatures(rf_analysis,metric = 'false_positive_rate',threshold = 0.05) } diff --git a/man/plotExplanatoryHeatmap.Rd b/man/plotExplanatoryHeatmap.Rd index 1d687024..0ed155f3 100644 --- a/man/plotExplanatoryHeatmap.Rd +++ b/man/plotExplanatoryHeatmap.Rd @@ -23,7 +23,7 @@ plotExplanatoryHeatmap(x, ...) \S4method{plotExplanatoryHeatmap}{RandomForest}( x, - metric = "FalsePositiveRate", + metric = "false_positive_rate", threshold = 0.05, title = "", distanceMeasure = "euclidean", diff --git a/man/plotFeature.Rd b/man/plotFeature.Rd index 094c0d43..e896f7a2 100644 --- a/man/plotFeature.Rd +++ b/man/plotFeature.Rd @@ -16,7 +16,7 @@ plotFeature(analysis, feature, cls = "class", label = NULL, labelSize = 2, ...) cls = "class", label = NULL, labelSize = 2, - type = "pre-treated" + type = c("pre-treated", "raw") ) } \arguments{ @@ -38,9 +38,8 @@ plotFeature(analysis, feature, cls = "class", label = NULL, labelSize = 2, ...) Plot the trend of a feature. } \examples{ -library(metaboData) - -d <- analysisData(abr1$neg,abr1$fact) +d <- analysisData(metaboData::abr1$neg, + metaboData::abr1$fact) ## Plot a categorical response variable plotFeature(d,'N133',cls = 'day') diff --git a/man/plotImportance.Rd b/man/plotImportance.Rd index cb5b5219..72035daa 100644 --- a/man/plotImportance.Rd +++ b/man/plotImportance.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modellingPlots.R +% Please edit documentation in R/modelling-plots.R \name{plotImportance} \alias{plotImportance} \alias{plotImportance,Univariate-method} @@ -11,9 +11,9 @@ plotImportance(x, ...) \S4method{plotImportance}{Univariate}(x, response = "class", rank = TRUE, threshold = 0.05) -\S4method{plotImportance}{RandomForest}(x, metric = "FalsePositiveRate", rank = TRUE) +\S4method{plotImportance}{RandomForest}(x, metric = "false_positive_rate", rank = TRUE) -\S4method{plotImportance}{list}(x, metric = "FalsePositiveRate") +\S4method{plotImportance}{list}(x, metric = "false_positive_rate") } \arguments{ \item{x}{S4 object of class \code{Univariate} or \code{RandomForest}} diff --git a/man/plotLDA.Rd b/man/plotLDA.Rd index 5ee7afba..c8fa621e 100644 --- a/man/plotLDA.Rd +++ b/man/plotLDA.Rd @@ -50,7 +50,7 @@ plotLDA( title = "PC-LDA", legendPosition = "bottom", labelSize = 2, - type = "raw" + type = c("pre-treated", "raw") ) } \arguments{ diff --git a/man/plotMDS.Rd b/man/plotMDS.Rd index 7331663b..8d5577bf 100644 --- a/man/plotMDS.Rd +++ b/man/plotMDS.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modellingPlots.R +% Please edit documentation in R/modelling-plots.R \name{plotMDS} \alias{plotMDS} \alias{plotMDS,RandomForest-method} diff --git a/man/plotMetrics.Rd b/man/plotMetrics.Rd index c198362c..686aa3b7 100644 --- a/man/plotMetrics.Rd +++ b/man/plotMetrics.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modellingPlots.R +% Please edit documentation in R/modelling-plots.R \name{plotMetrics} \alias{plotMetrics} \alias{plotMetrics,RandomForest-method} diff --git a/man/plotPCA.Rd b/man/plotPCA.Rd index 201dee78..7d72a2d7 100644 --- a/man/plotPCA.Rd +++ b/man/plotPCA.Rd @@ -50,7 +50,7 @@ plotPCA( title = "PCA", legendPosition = "bottom", labelSize = 2, - type = "raw" + type = c("pre-treated", "raw") ) } \arguments{ diff --git a/man/plotROC.Rd b/man/plotROC.Rd index 9b090c01..7a91f597 100644 --- a/man/plotROC.Rd +++ b/man/plotROC.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modellingPlots.R +% Please edit documentation in R/modelling-plots.R \name{plotROC} \alias{plotROC} \alias{plotROC,RandomForest-method} diff --git a/man/plotSupervisedRF.Rd b/man/plotSupervisedRF.Rd index 6a564fb9..401977ac 100644 --- a/man/plotSupervisedRF.Rd +++ b/man/plotSupervisedRF.Rd @@ -47,7 +47,7 @@ plotSupervisedRF( title = "", legendPosition = "bottom", labelSize = 2, - type = "raw" + type = c("pre-treated", "raw") ) } \arguments{ diff --git a/man/plotTIC.Rd b/man/plotTIC.Rd index 1a69454f..614eb81b 100644 --- a/man/plotTIC.Rd +++ b/man/plotTIC.Rd @@ -14,7 +14,7 @@ plotTIC(analysis, by = "injOrder", colour = "block", ...) analysis, by = "injOrder", colour = "block", - type = c("raw", "pre-treated") + type = c("pre-treated", "raw") ) } \arguments{ diff --git a/man/plotUnsupervisedRF.Rd b/man/plotUnsupervisedRF.Rd index 21d2fe3e..eecc0cb1 100644 --- a/man/plotUnsupervisedRF.Rd +++ b/man/plotUnsupervisedRF.Rd @@ -44,7 +44,7 @@ plotUnsupervisedRF( title = "", legendPosition = "bottom", labelSize = 2, - type = "raw" + type = c("pre-treated", "raw") ) } \arguments{ diff --git a/man/randomForest.Rd b/man/randomForest.Rd index ea253fee..5de8aeef 100644 --- a/man/randomForest.Rd +++ b/man/randomForest.Rd @@ -3,7 +3,7 @@ \name{randomForest} \alias{randomForest} \alias{randomForest,AnalysisData-method} -\title{Random forest analysis} +\title{Random forest} \usage{ randomForest( x, diff --git a/man/tune.Rd b/man/tune.Rd index ca20d62e..62ec983b 100644 --- a/man/tune.Rd +++ b/man/tune.Rd @@ -8,8 +8,8 @@ tune( x, cls = "class", - mtry_range = floor(seq(mtry(x, cls = cls) - mtry(x, cls = cls)/2, mtry(x, cls = cls) - + mtry(x, cls = cls)/2, length.out = 4)), + mtry_range = floor(seq(mtry(x, cls = cls) - mtry(x, cls = cls)/2, mtry(x, cls = cls) + + mtry(x, cls = cls)/2, length.out = 4)), ntree_range = 1000, seed = 1234 ) @@ -17,8 +17,8 @@ tune( \S4method{tune}{AnalysisData}( x, cls = "class", - mtry_range = floor(seq(mtry(x, cls = cls) - mtry(x, cls = cls)/2, mtry(x, cls = cls) - + mtry(x, cls = cls)/2, length.out = 4)), + mtry_range = floor(seq(mtry(x, cls = cls) - mtry(x, cls = cls)/2, mtry(x, cls = cls) + + mtry(x, cls = cls)/2, length.out = 4)), ntree_range = 1000, seed = 1234 ) diff --git a/tests/testthat.R b/tests/testthat.R index 8a7284d5..307f2dd4 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,5 +1,4 @@ library(testthat) library(metabolyseR) -library(metaboData) test_check("metabolyseR") diff --git a/tests/testthat/test-QCMethods.R b/tests/testthat/test-QCMethods.R index a520607f..96b6fed6 100644 --- a/tests/testthat/test-QCMethods.R +++ b/tests/testthat/test-QCMethods.R @@ -1,23 +1,28 @@ -library(metaboData) - -context('QCMethods') - -plan(future::multisession,workers = 2) test_that('QCMethods returns methods correctly',{ m <- map_lgl(QCMethods(),is.function) expect_false(FALSE %in% m) }) +test_that('QCMethods errors if incorrect method specified',{ + expect_error(QCMethods('incorrect')) +}) + test_that('methods work',{ m <- names(QCMethods()) - d <- analysisData(abr1$neg,abr1$fact) %>% + d <- analysisData( + metaboData::abr1$neg, + metaboData::abr1$fact) %>% keepFeatures(features = features(.)[290:300]) %>% keepClasses(classes = c(1,6)) m <- map(m,~{ method <- QCMethods(.) - method(d, cls = 'class', QCidx = '1') + if (.x == 'impute'){ + method(d, cls = 'class', QCidx = '1',parallel = 'no') + } else { + method(d, cls = 'class', QCidx = '1') + } }) expect_false(FALSE %in% map_lgl( diff --git a/tests/testthat/test-aggregateMethods.R b/tests/testthat/test-aggregateMethods.R index c5497353..f0a1d058 100644 --- a/tests/testthat/test-aggregateMethods.R +++ b/tests/testthat/test-aggregateMethods.R @@ -7,6 +7,10 @@ test_that('aggregateMethods returns methods correctly',{ expect_false(FALSE %in% m) }) +test_that('aggregateMethods errors if incorrect method specified',{ + expect_error(aggregateMethods('incorrect')) +}) + test_that('methods work',{ m <- names(aggregateMethods()) d <- analysisData(abr1$neg,abr1$fact) %>% diff --git a/tests/testthat/test-analysisData.R b/tests/testthat/test-analysisData.R index 37a10828..118d0a53 100644 --- a/tests/testthat/test-analysisData.R +++ b/tests/testthat/test-analysisData.R @@ -153,5 +153,5 @@ test_that('number of features and samples correctly returned from Analysis',{ d <- metabolyse(abr1$neg[,columns],abr1$fact,p,verbose = FALSE) expect_equal(nFeatures(d),length(columns)) - expect_equal(nSamples(d),nrow(abr1$neg)) + expect_equal(nSamples(d,type = 'raw'),nrow(abr1$neg)) }) diff --git a/tests/testthat/test-correctionMethods.R b/tests/testthat/test-correctionMethods.R index 5676e332..e556d662 100644 --- a/tests/testthat/test-correctionMethods.R +++ b/tests/testthat/test-correctionMethods.R @@ -7,6 +7,10 @@ test_that('correctionMethods returns methods correctly',{ expect_false(FALSE %in% m) }) +test_that('correctionMethods errors if incorrect method specified',{ + expect_error(correctionMethods('incorrect')) +}) + test_that('methods work',{ m <- names(correctionMethods()) d <- analysisData(abr1$neg[,200:250],abr1$fact) %>% diff --git a/tests/testthat/test-correlations.R b/tests/testthat/test-correlations.R index 3787c5b4..a0a8afd7 100644 --- a/tests/testthat/test-correlations.R +++ b/tests/testthat/test-correlations.R @@ -29,3 +29,11 @@ test_that('correlations errors when incorrect correlation p value threshold spec expect_error(correlations(d,corPvalue = 'wrong')) }) +test_that('the number of returned correlations can be thresholded',{ + d <- analysisData(abr1$neg[,200:250],abr1$fact) + + n_correlations <- correlations(d,maxCor = 2) %>% + nrow() + + expect_equal(n_correlations,2) +}) \ No newline at end of file diff --git a/tests/testthat/test-impute.R b/tests/testthat/test-impute.R new file mode 100644 index 00000000..290cc76d --- /dev/null +++ b/tests/testthat/test-impute.R @@ -0,0 +1,29 @@ + +test_that('imputeMethods returns methods correctly',{ + m <- map_lgl(imputeMethods(),is.function) + expect_true(all(m)) +}) + +test_that('imputeMethods errors if incorrect method specified',{ + expect_error(imputeMethods('incorrect')) +}) + +test_that('methods work',{ + d <- analysisData(metaboData::abr1$neg[,500:550], + metaboData::abr1$fact) %>% + keepClasses(classes = c(1,6)) + + m <- list( + imputeAll(d,parallel = 'no'), + imputeClass(d) + ) + + expect_true(all(map_chr(m,class) == 'AnalysisData')) + + expect_true(all(map_lgl(m,~{'tbl_df' %in% class(dat(.x))}))) + expect_true(all(map_lgl(m,~{'tbl_df' %in% class(sinfo(.x))}))) + + expect_true(all(map_lgl(m,~{ncol(dat(.x)) == ncol(dat(d))}))) + expect_true(all(map_lgl(m,~{ncol(sinfo(.x)) == ncol(sinfo(d))}))) + expect_true(all(map_lgl(m,~{nrow(sinfo(.x)) == nrow(sinfo(d))}))) +}) diff --git a/tests/testthat/test-imputeMethods.R b/tests/testthat/test-imputeMethods.R deleted file mode 100644 index fabd8eee..00000000 --- a/tests/testthat/test-imputeMethods.R +++ /dev/null @@ -1,49 +0,0 @@ -library(metaboData) - -context('imputeMethods') - -plan(future::multisession,workers = 2) - -test_that('imputeMethods returns methods correctly',{ - m <- map_lgl(imputeMethods(),is.function) - expect_false(F %in% m) -}) - -test_that('methods work',{ - m <- names(imputeMethods()) - d <- abr1 %>% - { - analysisData(.$neg,.$fact) - } %>% - keepClasses(classes = c(1,6)) %>% - { - keepFeatures(.,features = features(.)[500:600]) - } - - m <- m %>% - map(~{ - method <- imputeMethods(.x) - res <- method(d) - return(res) - }) - - expect_false(FALSE %in% ('AnalysisData' == map_chr(m,class))) - expect_false(FALSE %in% map_lgl( - m, - ~{identical(class(dat(.x)), c('tbl_df','tbl','data.frame'))})) - expect_false(FALSE %in% map_lgl( - m, - ~{identical(class(sinfo(.x)),c('tbl_df','tbl','data.frame'))})) - expect_false(FALSE %in% map_lgl( - m, - ~{ncol(dat(.x)) == ncol(dat(d))})) - expect_false(FALSE %in% map_lgl( - m, - ~{nrow(dat(.x)) == nrow(dat(d))})) - expect_false(FALSE %in% map_lgl( - m, - ~{ncol(sinfo(.x)) == ncol(sinfo(d))})) - expect_false(FALSE %in% map_lgl( - m, - ~{nrow(sinfo(.x)) == nrow(sinfo(d))})) -}) diff --git a/tests/testthat/test-info.R b/tests/testthat/test-info.R index f05f1414..6ae1d8fe 100644 --- a/tests/testthat/test-info.R +++ b/tests/testthat/test-info.R @@ -1,15 +1,15 @@ -library(metaboData) a <- new('Analysis') -raw(a) <- analysisData(abr1$neg,abr1$fact) +raw(a) <- analysisData(metaboData::abr1$neg, + metaboData::abr1$fact) context('sample information methods') test_that('clsAvailable works',{ - cl <- clsAvailable(a) - expect_true(is.character(cl)) - expect_length(cl,9) + cl <- clsAvailable(a,type = 'raw') + expect_true(is.character(cl)) + expect_length(cl,9) }) test_that('clsAvailable throws error when incorrect type specified',{ @@ -17,7 +17,10 @@ test_that('clsAvailable throws error when incorrect type specified',{ }) test_that('clsExtract works',{ - cl <- clsExtract(a,cls = 'class') + cl <- clsExtract( + a, + cls = 'class', + type = 'raw') expect_true(is.numeric(cl)) expect_length(cl,120) }) @@ -29,7 +32,10 @@ test_that('clsExtract errors when incorrect type specified',{ }) test_that('clsReplace works',{ - b <- clsReplace(a,rep(1,120),cls = 'class') + b <- clsReplace( + a,rep(1,120), + cls = 'class', + type = 'raw') i <- b %>% sinfo(type = 'raw') @@ -50,10 +56,16 @@ test_that('clsReplace errors when incorrect type specified',{ }) test_that('clsAdd works',{ - b <- clsAdd(a,'test',rep(1,120)) + b <- clsAdd( + a, + 'test', + rep(1,120), + type = 'raw') + i <- b %>% sinfo(type = 'raw') - expect_true('test' %in% clsAvailable(b)) + + expect_true('test' %in% clsAvailable(b,type = 'raw')) expect_equal(1,unique(i$test)) }) @@ -69,7 +81,7 @@ test_that('clsAdd errors when incorrect type specified',{ }) test_that('clsRemove works',{ - b <- clsRemove(a,'class') + b <- clsRemove(a,'class',type = 'raw') expect_false('class' %in% clsAvailable(b)) }) @@ -82,18 +94,19 @@ test_that('clsRemove errors when incorrect type specified',{ }) test_that('clsArrange works',{ - b <- clsArrange(a,'class') - expect_identical(clsExtract(a) %>% + b <- clsArrange(a,'class',type = 'raw') + expect_identical(clsExtract(a,type = 'raw') %>% sort(), - clsExtract(b)) + clsExtract(b,type = 'raw')) }) test_that('clsArrange works for decending arrangment',{ b <- clsArrange(a,'class', - descending = TRUE) - expect_identical(clsExtract(a) %>% + descending = TRUE, + type = 'raw') + expect_identical(clsExtract(a,type = 'raw') %>% sort(decreasing = TRUE), - clsExtract(b)) + clsExtract(b,type = 'raw')) }) test_that('clsArrange errors when incorrect type specified',{ @@ -101,8 +114,8 @@ test_that('clsArrange errors when incorrect type specified',{ }) test_that('clsRename works',{ - b <- clsRename(a,'rep','replicate') - expect_true('replicate' %in% clsAvailable(b)) + b <- clsRename(a,'rep','replicate',type = 'raw') + expect_true('replicate' %in% clsAvailable(b,type = 'raw')) }) test_that('clsRename errors when incorrect type specified',{ diff --git a/tests/testthat/test-keep.R b/tests/testthat/test-keep.R new file mode 100644 index 00000000..3e429958 --- /dev/null +++ b/tests/testthat/test-keep.R @@ -0,0 +1,4 @@ + +test_that('keepMethods errors if incorrect method specified',{ + expect_error(keepMethods('incorrect')) +}) \ No newline at end of file diff --git a/tests/testthat/test-mds.R b/tests/testthat/test-mds.R index ce75af31..a85486fe 100644 --- a/tests/testthat/test-mds.R +++ b/tests/testthat/test-mds.R @@ -1,4 +1,7 @@ -d <- analysisData(abr1$neg,abr1$fact) %>% + +d <- analysisData( + metaboData::abr1$neg, + metaboData::abr1$fact) %>% keepFeatures(features = features(.)[200:250]) %>% clsAdd('sample_names',paste0(seq_len(nSamples(.)),'_a')) diff --git a/tests/testthat/test-metabolyse.R b/tests/testthat/test-metabolyse.R index d71dbc05..998dee20 100644 --- a/tests/testthat/test-metabolyse.R +++ b/tests/testthat/test-metabolyse.R @@ -1,6 +1,3 @@ -library(metaboData) - -context('metabolyse') test_that('metabolyse-works', { p <- analysisParameters() @@ -13,21 +10,28 @@ test_that('metabolyse-works', { changeParameter(p,'cls') <- 'day' - cls1 <- abr1$neg[abr1$fact$class %in% c('1'),190:200][1:10,] - cls2 <- abr1$neg[abr1$fact$class %in% c('6'),190:200][1:10,] + cls1 <- metaboData::abr1$neg[metaboData::abr1$fact$class %in% c('1'),190:200][1:10,] + cls2 <- metaboData::abr1$neg[metaboData::abr1$fact$class %in% c('6'),190:200][1:10,] dat <- rbind(cls1,cls2) - inf1 <- abr1$fact[abr1$fact$class %in% c('1'),][1:10,] - inf2 <- abr1$fact[abr1$fact$class %in% c('6'),][1:10,] + + inf1 <- metaboData::abr1$fact[abr1$fact$class %in% c('1'),][1:10,] + inf2 <- metaboData::abr1$fact[abr1$fact$class %in% c('6'),][1:10,] info <- rbind(inf1,inf2) + analysis <- metabolyse(dat,info,p,verbose = FALSE) expect_true(isS4(analysis)) expect_true(class(analysis) == 'Analysis') - expect_equal(nFeatures(analysis),11) - expect_equal(nSamples(analysis),20) + expect_equal(nFeatures(analysis,type = 'raw'),11) + expect_equal(nSamples(analysis,type = 'raw'),20) expect_s3_class(metrics(analysis),'tbl_df') + expect_s3_class(predictions(analysis),'tbl_df') expect_s3_class(proximity(analysis),'tbl_df') expect_s3_class(mds(analysis),'tbl_df') expect_s3_class(roc(analysis),'tbl_df') }) + +test_that('getPreTreatMethods errors if incorrect method specified',{ + expect_error(getPreTreatMethods('incorrect')) +}) diff --git a/tests/testthat/test-modellingPlots.R b/tests/testthat/test-modelling-plots.R similarity index 93% rename from tests/testthat/test-modellingPlots.R rename to tests/testthat/test-modelling-plots.R index 139e9474..37d18fec 100644 --- a/tests/testthat/test-modellingPlots.R +++ b/tests/testthat/test-modelling-plots.R @@ -1,7 +1,6 @@ -context('modellingPlots') - -d <- analysisData(abr1$neg,abr1$fact) %>% +d <- analysisData(metaboData::abr1$neg, + metaboData::abr1$fact) %>% keepFeatures(features = features(.)[seq_len(200)]) %>% keepClasses(cls = 'day',classes = c('H','1','2')) %>% occupancyMaximum(cls = 'day') %>% @@ -10,8 +9,8 @@ d <- analysisData(abr1$neg,abr1$fact) %>% ttest_res <- d %>% ttest(cls = 'day') -unsupervised_rf_res <- d %>% - randomForest(cls = NULL) +unsupervised_rf_res <- randomForest(d, + cls = NULL) suppressWarnings( classification_rf_res <- d %>% @@ -46,7 +45,7 @@ test_that('plotImportance works for random forest classification',{ test_that('plotImportance works for random forest regression',{ pl <- plotImportance(regression_rf_res,metric = '%IncMSE') - expect_identical(class(pl),c('gg','ggplot')) + expect_s3_class(pl,'ggplot') }) test_that('plotImportance for Univariate class throws an error when the incorrect response is specified',{ @@ -78,7 +77,7 @@ test_that('plotMetrics works for random forest regression',{ pl <- regression_rf_res %>% plotMetrics() - expect_identical(class(pl),c('gg','ggplot')) + expect_type(pl,'list') }) test_that('an error is thrown from plotMetrics for unsupervised random forest',{ @@ -119,3 +118,4 @@ test_that('plotROC throws an error when non RandomForest object included in list d <- c(classification_rf_res,list('wrong')) expect_error(plotROC(d)) }) + diff --git a/tests/testthat/test-occupancyMethods.R b/tests/testthat/test-occupancyMethods.R index 95454b0f..cce41caf 100644 --- a/tests/testthat/test-occupancyMethods.R +++ b/tests/testthat/test-occupancyMethods.R @@ -7,6 +7,10 @@ test_that('occupancyMethods returns methods correctly',{ expect_false(F %in% m) }) +test_that('occupancyMethods errors if incorrect method specified',{ + expect_error(occupancyMethods('incorrect')) +}) + test_that('methods work',{ m <- names(occupancyMethods()) dat <- analysisData(data = abr1$neg, info = abr1$fact) @@ -32,3 +36,10 @@ test_that('methods work',{ m, ~{nrow(.x %>% sinfo()) == nrow(dat %>% sinfo())})) }) + +test_that('occupancy methods error argument `occupancy` is non-numeric',{ + dat <- analysisData(data = metaboData::abr1$neg, info = metaboData::abr1$fact) + + expect_error(occupancyMaximum(dat,occupancy = 'wrong')) + expect_error(occupancyMinimum(dat,occupancy = 'wrong')) +}) diff --git a/tests/testthat/test-parameters.R b/tests/testthat/test-parameters.R index 2a4fa26c..70fa2f80 100644 --- a/tests/testthat/test-parameters.R +++ b/tests/testthat/test-parameters.R @@ -48,7 +48,7 @@ test_that('correlations parameters assigned for AnalysisParameters',{ parameters(p,'correlations') <- correlationsParameters() expect_identical(parameters(p,'correlations') %>% names(), - c("method","pAdjustMethod","corPvalue")) + c("method","pAdjustMethod","corPvalue",'minCoef','maxCor')) }) test_that('parameters correctly exported and parsed',{ diff --git a/tests/testthat/test-plotExplanatoryHeatmap.R b/tests/testthat/test-plotExplanatoryHeatmap.R index 58592b27..07b23def 100644 --- a/tests/testthat/test-plotExplanatoryHeatmap.R +++ b/tests/testthat/test-plotExplanatoryHeatmap.R @@ -1,7 +1,3 @@ -library(metaboData) - -context('plotExplanatoryHeatmap') - test_that('plotExplanatoryHeatmap returns a plot for random forest classification Analysis',{ p <- analysisParameters(elements = c('pre-treatment','modelling')) @@ -18,7 +14,10 @@ test_that('plotExplanatoryHeatmap returns a plot for random forest classificatio changeParameter(p,'cls') <- 'day' - d <- metabolyse(abr1$neg[,200:300],abr1$fact,p,verbose = TRUE) + d <- metabolyse(metaboData::abr1$neg[,200:300], + metaboData::abr1$fact, + p, + verbose = TRUE) pl_feat <- plotExplanatoryHeatmap(d,threshold = 0.5) pl_no_feat <- plotExplanatoryHeatmap(d,threshold = 0.5,featureNames = FALSE) @@ -59,7 +58,7 @@ test_that('plotExplanatoryHeatmap works for linear regression',{ }) test_that('plotExplanatoryHeatmap method for lists throws an error when non modelling classes supplied',{ - expect_error(list(wrong = list()) %>% + expect_error(list(wrong = list(wrong = 1)) %>% plotExplanatoryHeatmap()) }) diff --git a/tests/testthat/test-plotLDA.R b/tests/testthat/test-plotLDA.R index a835f602..45cefd73 100644 --- a/tests/testthat/test-plotLDA.R +++ b/tests/testthat/test-plotLDA.R @@ -26,30 +26,30 @@ test_that('plotLDA throws error when wrong type specified for Analysis',{ expect_error(plotLDA(d,type = 'wrong')) }) -test_that('A warning is thrown when a single replicate class is included',{ - d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% - occupancyMaximum(cls = 'day') - - d <- d %>% - removeSamples(idx = 'injorder', - samples = c(6,13,30,31,32,38, - 41,58,62,63,70, - 87,88,93,99,102, - 103,107, 120)) +test_that('plotLDA throws a warning if an error is encountered during LDA',{ + a <- analysisData( + tibble::tibble( + x = 1:10, + y = 20:29 + ), + tibble( + class = rep(1,10) + ) + ) - expect_warning(plotLDA(d,cls = 'day')) + expect_warning(plotLDA(a)) }) -test_that('plotLDA throws error when number of classes is less than 2',{ +test_that('plotLDA throws warning and skips plotting when number of classes is less than 2',{ d <- analysisData(abr1$neg,abr1$fact) %>% keepClasses(cls = 'day', classes = '1') - expect_error(plotLDA(d,cls = 'day')) + expect_warning(plotLDA(d,cls = 'day')) }) -test_that('plotLDA throws error when numeric classes specified',{ +test_that('plotLDA throws a warning and skips plotting when numeric classes specified',{ d <- analysisData(abr1$neg,abr1$fact) - expect_error(plotLDA(d)) + expect_warning(plotLDA(d)) }) test_that('plotLDA plots for 2 classes',{ diff --git a/tests/testthat/test-plotTIC.R b/tests/testthat/test-plotTIC.R index b8810250..a42e1edf 100644 --- a/tests/testthat/test-plotTIC.R +++ b/tests/testthat/test-plotTIC.R @@ -7,8 +7,8 @@ test_that('plotTIC works',{ raw(d) <- analysisData(abr1$neg[,300:400],abr1$fact) - pl_scatter <- plotTIC(d,by = 'injorder',colour = 'day') - pl_boxplot <- plotTIC(d,by = 'day',colour = 'day') + pl_scatter <- plotTIC(d,by = 'injorder',colour = 'day',type = 'raw') + pl_boxplot <- plotTIC(d,by = 'day',colour = 'day',type = 'raw') expect_s3_class(pl_scatter,'ggplot') expect_s3_class(pl_boxplot,'ggplot') diff --git a/tests/testthat/test-predict.R b/tests/testthat/test-predict.R index 9c55677c..41108e98 100644 --- a/tests/testthat/test-predict.R +++ b/tests/testthat/test-predict.R @@ -1,5 +1,6 @@ test_that("predict works", { - x <- analysisData(abr1$neg[,200:300],abr1$fact) %>% + x <- analysisData(metaboData::abr1$neg[,200:300], + metaboData::abr1$fact) %>% occupancyMaximum(cls = 'day') %>% transformTICnorm() @@ -25,7 +26,8 @@ test_that("predict works", { }) test_that("predit throws an error if unsupervised random forest used",{ - x <- analysisData(abr1$neg[,200:300],abr1$fact) %>% + x <- analysisData(metaboData::abr1$neg[,200:300], + metaboData::abr1$fact) %>% occupancyMaximum(cls = 'day') %>% transformTICnorm() @@ -46,7 +48,8 @@ test_that("predit throws an error if unsupervised random forest used",{ }) test_that("predict throws an error if RandomForest object does not contain models",{ - x <- analysisData(abr1$neg[,200:300],abr1$fact) %>% + x <- analysisData(metaboData::abr1$neg[,200:300], + metaboData::abr1$fact) %>% occupancyMaximum(cls = 'day') %>% transformTICnorm() diff --git a/tests/testthat/test-randomForest.R b/tests/testthat/test-randomForest.R index 7df93496..8212fe18 100644 --- a/tests/testthat/test-randomForest.R +++ b/tests/testthat/test-randomForest.R @@ -1,22 +1,23 @@ -library(metaboData) -d <- analysisData(abr1$neg,abr1$fact) %>% +d <- analysisData(metaboData::abr1$neg, + metaboData::abr1$fact) %>% keepFeatures(features = features(.)[200:250]) %>% clsAdd('sample_names',paste0(seq_len(nSamples(.)),'_a')) -context('randomForest') - test_that('unsupervised random forest works',{ rf <- randomForest(d,cls = NULL,returnModels = TRUE) expect_s4_class(rf,'RandomForest') expect_identical(type(rf),'unsupervised') + expect_equal(mtry(rf,cls = NULL),7) }) test_that('random forest classification works',{ - expect_warning(rf <- randomForest(d,cls = c('day','name'),perm = 3,returnModels = TRUE)) + expect_warning(rf <- randomForest(d,cls = c('day','name'),perm = 2,returnModels = TRUE)) + rf_no_perm <- randomForest(d,cls = 'day') rf_metrics <- metrics(rf) + rf_predictions <- predictions(rf) rf_importance <- importance(rf) rf_proximity <- proximity(rf,idx = 'sample_names') @@ -25,18 +26,24 @@ test_that('random forest classification works',{ expect_identical(class(rf),'list') expect_identical(type(rf$day),'classification') expect_s3_class(rf_metrics,'tbl_df') + expect_s3_class(rf_predictions,'tbl_df') expect_s3_class(rf_importance,'tbl_df') expect_s3_class(rf_proximity,'tbl_df') + expect_s3_class(explanatoryFeatures(rf,metric = 'MeanDecreaseAccuracy'),'tbl_df') expect_s3_class(metrics(rf_wrong),'tbl_df') + expect_s3_class(predictions(rf_wrong),'tbl_df') expect_error(importance(rf_wrong)) expect_s3_class(proximity(rf_wrong),'tbl_df') expect_error(explanatoryFeatures(rf_wrong)) expect_equal(nrow(metrics(list(0,0))),0) + expect_equal(nrow(predictions(list(0,0))),0) expect_equal(nrow(proximity(list(0,0))),0) expect_error(proximity(rf,idx = 'name')) + expect_error(explanatoryFeatures(rf,metric = 'wrong')) + expect_error(explanatoryFeatures(rf_no_perm,value = 'p-value')) }) test_that('binary classification works',{ @@ -56,9 +63,15 @@ test_that('classification throws an error when less than 2 classes available',{ test_that('random forest regression works',{ rf <- randomForest(d,cls = 'injorder',perm = 3,returnModels = TRUE) + rf_no_perm <- randomForest(d,cls = 'injorder') expect_s4_class(rf,'RandomForest') expect_identical(type(rf),'regression') + expect_s3_class(metrics(rf),'tbl_df') + expect_s3_class(importance(rf),'tbl_df') + expect_s3_class(explanatoryFeatures(rf,metric = "IncNodePurity",value = 'p-value'),'tbl_df') + expect_error(explanatoryFeatures(rf,metric = 'wrong')) + expect_error(explanatoryFeatures(rf_no_perm,value = 'p-value')) }) test_that('low sample permutation testing works',{ diff --git a/tests/testthat/test-removeMethods.R b/tests/testthat/test-removeMethods.R index 210b4fd5..bab006bc 100644 --- a/tests/testthat/test-removeMethods.R +++ b/tests/testthat/test-removeMethods.R @@ -7,6 +7,10 @@ test_that('removeMethods returns methods correctly',{ expect_false(F %in% m) }) +test_that('removeMethods errors if incorrect method specified',{ + expect_error(removeMethods('incorrect')) +}) + test_that('methods work',{ d <- analysisData(abr1$neg,abr1$fact) diff --git a/tests/testthat/test-show.R b/tests/testthat/test-show.R index 2cc90ccb..fecf4474 100644 --- a/tests/testthat/test-show.R +++ b/tests/testthat/test-show.R @@ -1,6 +1,3 @@ -library(metaboData) - -context('show') test_that('AnalysisParameters show method works',{ expect_output(new('AnalysisParameters') %>% @@ -18,12 +15,11 @@ test_that('Analysis show method works',{ }) test_that('RandomForest class show method works',{ - expect_output(new('RandomForest') %>% - { - .@type <- 'classification' - . - } %>% - print(),'Random forest classification') + object <- new('RandomForest', + type = 'classification', + response = 'class') + expect_output(print(object), + 'Random forest classification') }) test_that('Univariate class show method works',{ diff --git a/tests/testthat/test-transformMethods.R b/tests/testthat/test-transformMethods.R index 5af126a6..c806bfa7 100644 --- a/tests/testthat/test-transformMethods.R +++ b/tests/testthat/test-transformMethods.R @@ -7,6 +7,10 @@ test_that('transformMethods returns methods correctly',{ expect_false(FALSE %in% m) }) +test_that('transformMethods errors if incorrect method specified',{ + expect_error(transformMethods('incorrect')) +}) + test_that('methods work',{ m <- names(transformMethods()) diff --git a/tests/testthat/test-tune.R b/tests/testthat/test-tune.R index d8f4db96..1a829d74 100644 --- a/tests/testthat/test-tune.R +++ b/tests/testthat/test-tune.R @@ -1,5 +1,6 @@ -x <- analysisData(abr1$neg[,200:300],abr1$fact) %>% +x <- analysisData(metaboData::abr1$neg[,200:300], + metaboData::abr1$fact) %>% occupancyMaximum(cls = 'day') %>% transformTICnorm() diff --git a/tests/testthat/test-univariate.R b/tests/testthat/test-univariate.R index cc1da131..b8fe0f57 100644 --- a/tests/testthat/test-univariate.R +++ b/tests/testthat/test-univariate.R @@ -14,6 +14,7 @@ test_that('anova works',{ expect_identical(class(i),c("tbl_df","tbl","data.frame")) expect_equal(ncol(i),10) expect_equal(nrow(i),51) + expect_identical(type(res),'ANOVA') }) test_that('ttest works',{ diff --git a/vignettes/modelling.Rmd b/vignettes/modelling.Rmd index 8fe7195a..d4110ea6 100644 --- a/vignettes/modelling.Rmd +++ b/vignettes/modelling.Rmd @@ -23,7 +23,8 @@ library(stringr) opts_chunk$set( collapse = TRUE, comment = "#>", - fig.align = 'center' + fig.align = 'center', + warning = FALSE ) ``` @@ -169,7 +170,7 @@ The following will extract the explanatory features below a threshold of 0.05 ba ```{r unsupervised-explanatory} unsupervised_rf %>% - explanatoryFeatures(metric = "FalsePositiveRate", + explanatoryFeatures(metric = "false_positive_rate", threshold = 0.05) ``` @@ -270,11 +271,11 @@ Here, we will use the false positive rate metric with a threshold of below 0.05 ```{r multinomial-explanatory} multinomial_rf %>% - explanatoryFeatures(metric = 'FalsePositiveRate', + explanatoryFeatures(metric = 'false_positive_rate', threshold = 0.05) ``` -As shown above there were a total of `r multinomial_rf %>% explanatoryFeatures(metric = 'FalsePositiveRate',threshold = 0.05) %>% nrow()` explanatory features identified. +As shown above there were a total of `r multinomial_rf %>% explanatoryFeatures(metric = 'false_positive_rate',threshold = 0.05) %>% nrow()` explanatory features identified. Within a multinomial experiment, it is also possible to specify the exact class comparisons to include, where it might not be suitable to compare all the classes at once using the `comparisons` argument. This should be specified as a named list, the corresponding to the `cls` argument. @@ -369,7 +370,7 @@ This gives a total of `r nrow(explanatoryFeatures(binary_rf))` explanatory featu ```{r binary-explanatory} binary_rf %>% - explanatoryFeatures(metric = 'FalsePositiveRate', + explanatoryFeatures(metric = 'false_positive_rate', threshold = 0.05) ``` @@ -385,7 +386,7 @@ binary_rf <- clsReplace(binary_rf, value = refactor_cls, cls = 'day') binary_rf %>% - plotExplanatoryHeatmap(metric = 'FalsePositiveRate', + plotExplanatoryHeatmap(metric = 'false_positive_rate', threshold = 0.05, featureNames = TRUE) ``` diff --git a/vignettes/pre_treatment.Rmd b/vignettes/pre_treatment.Rmd index 704c3de7..d3353001 100644 --- a/vignettes/pre_treatment.Rmd +++ b/vignettes/pre_treatment.Rmd @@ -262,7 +262,7 @@ Using a different approach such as the parametric analysis Of variance (ANOVA) w ### Sample aggregation -Sample aggregation allows the electronic pooling of samples based on a grouping variable. +Sample aggregation allows the electronic pooling of samples based on a grouping variables. This is useful in situations such as the presence of technical replicates that can be aggregated to reduce the effects of pseudo replication. `metabolyseR` provides methods for mean, median and sum aggregation and each starts with the `aggregate` prefix. @@ -275,12 +275,12 @@ d %>% plotPCA(cls = 'day') ``` -The example below shows the mean aggregation of the data using the experimental classes within the `day` sample information column. +The example below shows the mean aggregation of the data using the experimental factors within the `day` and `class` sample information columns. ```{r day_mean} day_mean <- d %>% occupancyMaximum(cls = 'day') %>% - aggregateMean(cls = 'day') + aggregateMean(cls = c('day','class')) ``` The PCA plot below shows these class averages of the data.