diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index be6fdf0c..db4dceed 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -22,7 +22,7 @@ jobs: - {os: windows-latest, r: 'release'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel/1'} + - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} @@ -43,26 +43,4 @@ jobs: with: extra-packages: rcmdcheck - - name: Check - env: - _R_CHECK_CRAN_INCOMING_: false - run: | - options(crayon.enabled = TRUE) - rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Show testthat output - if: always() - run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true - shell: bash - - - name: Test coverage - run: covr::codecov() - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check + - uses: r-lib/actions/check-r-package@v1 diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 00000000..63cbb18a --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,35 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-pandoc@v1 + + - uses: r-lib/actions/setup-r@v1 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v1 + with: + extra-packages: pkgdown + needs: website + + - name: Deploy package + run: | + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 00000000..3c0da1c9 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,30 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v1 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v1 + with: + extra-packages: covr + + - name: Test coverage + run: covr::codecov() + shell: Rscript {0} diff --git a/.gitignore b/.gitignore index 1fb91752..fb832c23 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ doc Meta /doc/ /Meta/ +docs diff --git a/DESCRIPTION b/DESCRIPTION index 334a9174..df42262b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: metabolyseR Title: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data -Version: 0.14.9 +Version: 0.14.10 Authors@R: person("Jasen", "Finch", email = "jsf9@aber.ac.uk", role = c("aut", "cre")) Description: A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data. URL: https://jasenfinch.github.io/metabolyseR @@ -76,6 +76,7 @@ Collate: allClasses.R plotting.R plotUnsupervisedRF.R pre-treatment.R + predict.R QC.R reexports.R remove.R @@ -85,6 +86,7 @@ Collate: allClasses.R show-method.R split.R transform.R + tune.R univariate.R modelling-accessors.R VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 40a9c352..9e3324d2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,6 +48,7 @@ export(metabolyse) export(metrics) export(modellingMethods) export(modellingParameters) +export(mtry) export(nFeatures) export(nSamples) export(occupancy) @@ -73,6 +74,7 @@ export(preTreated) export(preTreatmentElements) export(preTreatmentMethods) export(preTreatmentParameters) +export(predict) export(proximity) export(randomForest) export(raw) @@ -97,6 +99,7 @@ export(transformSQRT) export(transformTICnorm) export(transformVast) export(ttest) +export(tune) export(type) exportClasses(Analysis) exportClasses(AnalysisData) @@ -137,6 +140,7 @@ importFrom(dplyr,mutate_if) importFrom(dplyr,n) importFrom(dplyr,relocate) importFrom(dplyr,rename) +importFrom(dplyr,rename_with) importFrom(dplyr,rowwise) importFrom(dplyr,select) importFrom(dplyr,select_if) @@ -147,6 +151,7 @@ importFrom(e1071,naiveBayes) importFrom(forestControl,fpr_fs) importFrom(furrr,furrr_options) importFrom(furrr,future_map) +importFrom(furrr,future_map2) importFrom(future,plan) importFrom(ggdendro,dendro_data) importFrom(ggplot2,aes) @@ -210,6 +215,7 @@ importFrom(patchwork,wrap_plots) importFrom(purrr,map) importFrom(purrr,map_chr) importFrom(purrr,map_dbl) +importFrom(purrr,map_depth) importFrom(purrr,map_df) importFrom(purrr,map_lgl) importFrom(purrr,walk) @@ -231,6 +237,7 @@ importFrom(stats,runif) importFrom(stats,sd) importFrom(stringr,str_c) importFrom(stringr,str_extract) +importFrom(stringr,str_remove) importFrom(stringr,str_remove_all) importFrom(stringr,str_replace_all) importFrom(stringr,str_split) @@ -241,6 +248,7 @@ importFrom(tibble,deframe) importFrom(tibble,rowid_to_column) importFrom(tibble,tibble) importFrom(tidyr,drop_na) +importFrom(tidyr,expand_grid) importFrom(tidyr,gather) importFrom(tidyr,spread) importFrom(tidyselect,all_of) diff --git a/NEWS.md b/NEWS.md index badde6a8..7336d921 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# metabolyseR 0.14.10 + +* Added the method [`predict()`](https://jasenfinch.github.io/metabolyseR/reference/predict.html) for the [`RandomForest`](https://jasenfinch.github.io/metabolyseR/reference/RandomForest-class.html) S4 class to predict model response values. + +* Added the method [`mtry()`](https://jasenfinch.github.io/metabolyseR/reference/modelling-accessors.html) for the [`AnalysisData`](https://jasenfinch.github.io/metabolyseR/reference/AnalysisData-class.html) S4 class to return the default `mtry` random forest parameter for a given response variable. + +* Added the method [`tune()`]() for the [`AnalysisData`](https://jasenfinch.github.io/metabolyseR/reference/tune.html) S4 class to tune the random forest parameters `mtry` and `ntree` for a given response variable. + # metabolyseR 0.14.9 * Suppressed name repair console message encountered during random forest permutation testing. diff --git a/R/modelling-accessors.R b/R/modelling-accessors.R index ad11b9d7..3d0b9f60 100644 --- a/R/modelling-accessors.R +++ b/R/modelling-accessors.R @@ -9,6 +9,7 @@ #' @param ... arguments to parse to method for specific class #' @section Methods: #' * `binaryComparisons`: Return a vector of all possible binary comparisons for a given sample information column. +#' * `mtry`: Return the default `mtry` random forest parameter value for a given sample information column. #' * `type`: Return the type of random forest analysis. #' * `response`: Return the response variable name used for a random forest analysis. #' * `metrics`: Retrieve the model performance metrics for a random forest analysis @@ -21,9 +22,12 @@ #' #' d <- analysisData(abr1$neg[,200:300],abr1$fact) #' -#' ## Return possible binary comparisons for the 'day' column +#' ## Return possible binary comparisons for the `day` response column #' binaryComparisons(d,cls = 'day') #' +#' ## Return the default random forest `mtry` parameter for the `day` response column +#' mtry(d,cls = 'day') +#' #' ## Perform random forest analysis #' rf_analysis <- randomForest(d,cls = 'day') #' @@ -70,6 +74,40 @@ setMethod('binaryComparisons',signature = 'AnalysisData', #' @rdname modelling-accessors #' @export +setGeneric("mtry", function(x,cls = 'class') + standardGeneric("mtry")) + +#' @rdname modelling-accessors + +setMethod('mtry',signature = 'AnalysisData', + function(x,cls = 'class'){ + + if (is.null(cls)){ + rf_type <- 'classification' + } else { + response <- x %>% + clsExtract(cls = cls) + + rf_type <- ifelse(is.numeric(response), + 'regression', + 'classification') + } + + n_features <- nFeatures(x) + + mtry <- switch(rf_type, + regression = n_features/3, + classification = sqrt(n_features)) %>% + floor() %>% + c(.,1) %>% + max() + + return(mtry) + }) + +#' @rdname modelling-accessors +#' @export + setGeneric("type", function(x) standardGeneric("type")) diff --git a/R/nlda.R b/R/nlda.R index fdb4f937..670223a1 100644 --- a/R/nlda.R +++ b/R/nlda.R @@ -3,7 +3,7 @@ setGeneric('nlda',function(x,cls = 'class',prior = NULL,scale = FALSE,comprank = FALSE,...) standardGeneric('nlda')) -#' @importFrom e1071 naiveBayes +#' @importFrom e1071 naiveBayes #' @importFrom stats cov predict #' @importFrom methods as @@ -135,7 +135,7 @@ setMethod('nlda',signature = 'AnalysisData', dimnames(xmeans)[[2]] <- colnames(x) nbmod <- naiveBayes(data.frame(x),cl) - prob <- predict(nbmod,data.frame(x),type="raw") + prob <- stats::predict(nbmod,data.frame(x),type="raw") pred <- apply(prob,1,which.max) pred <- factor(levels(cl)[pred], levels = levels(cl)) diff --git a/R/predict.R b/R/predict.R new file mode 100644 index 00000000..0e55c097 --- /dev/null +++ b/R/predict.R @@ -0,0 +1,123 @@ +#' Predict random forest model responses +#' @rdname predict +#' @description Predict values of random forest model response variables from new data. +#' @param model S4 object of class `RandomForest` +#' @param new_data S4 object of class `AnalysisData` +#' @param idx sample information column to use for sample names. If `NULL`, the sample row number will be used. Sample names should be unique for each row of data. +#' @param type one of `response`, `prob`, or `votes` to indicate the type of prediction to make +#' @param ... arguments to pass to `randomForest::predict.randomForest()` +#' @details +#' The features contained within `new_data` should match those of the features used to train `model`. +#' The `features()` method can be used to check this. +#' The argument `returnModels = TRUE` should also be used when training the `RandomForest-class` object used for argument `model`. +#' @examples +#' library(metaboData) +#' +#' ## Prepare some data +#' x <- analysisData(abr1$neg[,200:300],abr1$fact) %>% +#' occupancyMaximum(cls = 'day') %>% +#' transformTICnorm() +#' +#' ## Extract data from which to train a random forest model +#' training_data <- x %>% +#' keepClasses(cls = 'day', +#' classes = c('H','1')) +#' +#' ## Extract data for which response values will be predicted +#' test_data <- x %>% +#' keepClasses(cls = 'day', +#' classes = c('2','3')) +#' +#' rf <- randomForest(training_data, +#' cls = 'day', +#' returnModels = TRUE) +#' +#' predict(rf, +#' test_data) +#' @importFrom purrr map_depth +#' @export + +setGeneric("predict", function(model, + new_data, + idx = NULL, + type = c('response','prob','votes'), + ...) + standardGeneric("predict")) + +#' @rdname predict + +setMethod('predict',signature = c('RandomForest','AnalysisData'), + function(model, + new_data, + idx = NULL, + type = c('response','prob','votes'), + ...){ + + if (type(model) == 'unsupervised') { + stop("Can't predict unsupervised random forest.", + call. = FALSE) + } + + if(length(model@models) == 0){ + stop('No random forest models detected. Use argument `returnModels = TRUE` when running method `randomForest()`.', + call. = FALSE) + } + + if (!is.null(idx)){ + sample_idx <- new_data %>% + clsExtract(cls = idx) + + if (any(duplicated(sample_idx))){ + stop(str_c('Duplicated sample names found in sample information column `', + idx, + '`. The specified sample names should be unique to each sample.'), + call. = FALSE) + } + } else { + sample_idx <- seq_len(nSamples(new_data)) + } + + type <- match.arg(type, + c('response','prob','votes')) + + test_data <- dat(new_data) + + model_object_depth <- switch(type(model), + classification = 4, + regression = 3) + + model_predictions <- model@models %>% + map_depth(.depth = model_object_depth, + .f = ~ .x %>% + { + tibble( + Sample = sample_idx, + Prediction = stats::predict( + object = .x, + newdata = test_data, + type = type, + ...)) + }) %>% + map_depth(.depth = model_object_depth - 2, + .f = ~ .x$models) + + column_headers <- c('Response', + 'Comparison', + 'Rep') + type_column_headers <- switch( + type(model), + classification = column_headers, + regression = column_headers[c(1,3)] + ) + + for (i in rev(type_column_headers)) { + model_predictions <- map_depth(.x = model_predictions, + .depth = which(type_column_headers == i) - 1, + .f = bind_rows,.id = i) + } + + model_predictions <- model_predictions %>% + mutate(Rep = as.numeric(Rep)) + + return(model_predictions) + }) diff --git a/R/tune.R b/R/tune.R new file mode 100644 index 00000000..2a1cc86f --- /dev/null +++ b/R/tune.R @@ -0,0 +1,112 @@ +#' Tune random forest parameters +#' @rdname tune +#' @description Tune the `mtry` and `ntree` random forest parameters using a grid search approach. +#' @param x S4 object of class `AnalysisData` +#' @param cls sample information column to use +#' @param mtry_range numeric vector of `mtry` values to search +#' @param ntree_range numeric vector of `ntree` values to search +#' @param seed random number seed +#' @details +#' Parameter tuning is performed by grid search of all combinations of the `mtry_range` and `ntree_range` vectors provided. +#' The optimal parameter values are selected using the out-of-bag error estimates of the `margin` metric for classification and the `rmse` (root-mean-square error) metric for regression. +#' @return +#' A list containing the optimal `mtry` and `ntree` parameters. +#' This is suitable for use as the `rf` argument in method `randomForest()`. +#' @examples +#' library(metaboData) +#' +#' ## Prepare some data +#' x <- analysisData(abr1$neg[,200:300],abr1$fact) %>% +#' occupancyMaximum(cls = 'day') %>% +#' transformTICnorm() +#' +#' ## Tune the `mtry` parameter for the `day` response +#' tune(x,cls = 'day') +#' @export + +setGeneric("tune", function(x, + cls = 'class', + mtry_range = floor(seq(mtry(x,cls = cls) - mtry(x,cls = cls)/2, + mtry(x,cls = cls) + mtry(x,cls = cls)/2, + length.out = 4)), + ntree_range = 1000, + seed = 1234) + standardGeneric("tune")) + +#' @rdname tune +#' @importFrom tidyr expand_grid +#' @importFrom dplyr rename_with +#' @importFrom stringr str_remove +#' @importFrom furrr future_map2 + +setMethod('tune',signature = 'AnalysisData', + function(x, + cls = 'class', + mtry_range = floor(seq(mtry(x,cls = cls) - mtry(x,cls = cls)/2, + mtry(x,cls = cls) + mtry(x,cls = cls)/2, + length.out = 4)), + ntree_range = 1000, + seed = 1234){ + + if (is.null(cls)){ + stop("Can't tune unsupervised random forest.", + call. = FALSE) + } + + response <- clsExtract(x,cls = cls) + + rf_type <- ifelse(is.numeric(response), + 'regression', + 'classification') + + metric <- switch(rf_type, + regression = 'rmse', + classification = 'margin') + + combinations <- expand_grid(mtry_range, + ntree_range) %>% + rename_with(~ str_remove(.x, + '_range')) + + search_results <- combinations %>% + { + future_map2( + .$ntree, + .$mtry, + .f = ~{ + rf_res <- try(randomForest(x, + cls = cls, + rf = list(ntree = .x, + mtry = .y)), + silent = TRUE) + if (class(rf_res) == 'RandomForest'){ + rf_res %>% + metrics() %>% + select(-Response,-.estimator,-contains('Comparison')) %>% + spread(.metric,.estimate) %>% + mutate(ntree = .x, + mtry = .y) + } else { + NULL + } + + }, + .options = furrr_options(seed = seed)) + } %>% + bind_rows() + + if (nrow(search_results) > 0){ + search_results <- switch(metric, + rmse = search_results %>% + arrange(!!sym(metric)) , + margin = search_results %>% + arrange(desc(!!sym(metric)))) %>% + {list(mtry = .$mtry[1], + ntree = .$ntree[1])} + + return(search_results) + } else { + return(list()) + } + + }) diff --git a/_pkgdown.yml b/_pkgdown.yml index a475f749..62bdfe16 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,6 +1,6 @@ -destination: docs - url: https://jasenfinch.github.io/metabolyseR/ +template: + bootstrap: 5 navbar: components: @@ -55,8 +55,10 @@ reference: - ttest - linearRegression - binaryComparisons + - tune - mds - roc + - predict - title: Correlations contents: @@ -84,4 +86,3 @@ reference: - split - rsd - occupancy - \ No newline at end of file diff --git a/docs/404.html b/docs/404.html deleted file mode 100644 index 44d3a98f..00000000 --- a/docs/404.html +++ /dev/null @@ -1,121 +0,0 @@ - - - - - - - -Page not found (404) • metabolyseR - - - - - - - - - - - -
-
- - - - -
-
- - -Content not found. Please use links in the navbar. - -
- - - -
- - - - -
- - - - - - - - diff --git a/docs/articles/01_quick_start.html b/docs/articles/01_quick_start.html deleted file mode 100644 index beafb25d..00000000 --- a/docs/articles/01_quick_start.html +++ /dev/null @@ -1,219 +0,0 @@ - - - - - - - -Quick start example analysis • metabolyseR - - - - - - - - - - -
-
- - - - -
-
- - - - -

This example analysis will use the abr1 data set from the metaboData package. It is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The analysis will also include use of the pipe %>% from the magrittr package. First load the necessary packages.

- -

For this example we will use only the negative acquisition mode data (abr1$neg) and sample meta-information (abr1$fact). Create an AnalysisData class object using the following:

-
-d <- analysisData(abr1$neg,abr1$fact)
-

The data includes 120 samples and 2000 mass spectral features as shown below.

-
-d
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 2000 
-#> Info: 9
-

The clsAvailable() function can be used to identify the columns available in our meta-information table.

-
-clsAvailable(d)
-#> [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
-#> [8] "day"      "class"
-

For this analysis, we will be using the infection time course class information contained in the day column. This can be extracted and the class frequencies tabulated using the following:

-
-d %>%
-  clsExtract(cls = 'day') %>%
-  table()
-#> .
-#>  1  2  3  4  5  H 
-#> 20 20 20 20 20 20
-

As can be seen above, the experiment is made up of six infection time point classes that includes a healthy control class (H) and five day infection time points (1-5), each with 20 replicates.

-

For data pre-treatment prior to statistical analysis, a two-thirds maximum class occupancy filter can be applied. Features where the maximum proportion of non-missing data per class is above two-thirds are retained. A total ion count normalisation will also be applied.

-
-d <- d %>%
-  occupancyMaximum(cls = 'day', occupancy = 2/3) %>%
-  transformTICnorm()
-
-d
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 1760 
-#> Info: 9
-

This has reduced the data set to 1760 relevant features.

-

The structure of the data can be visualised using both unsupervised and supervised methods. For instance, the first two principle components from a principle component analysis (PCA) of the data with the sample points coloured by infection class can be plotted using:

-
-plotPCA(d,cls = 'day',xAxis = 'PC1',yAxis = 'PC2')
-

-

And similarly, multidimensional scaling (MDS) of sample proximity values from a supervised random forest classification model along with receiver operator characteristic (ROC) curves.

-
-plotSupervisedRF(d,cls = 'day')
-

-

A progression can clearly be seen from the earliest to latest infected time points.

-

For feature selection, one-way analysis of variance (ANOVA) can be performed for each feature to identify features significantly explanatory for the infection time point.

-
-anova_results <- d %>%
-  anova(cls = 'day')
-

A table of the significantly explanatory features can be extracted with a bonferroni correction adjusted p value < 0.05 using:

-
-explan_feat <- explanatoryFeatures(anova_results,threshold = 0.05)
-
-explan_feat
-#> # A tibble: 379 × 10
-#>    Response Comparison  Feature term        df   sumsq  meansq statistic  p.value
-#>    <chr>    <chr>       <chr>   <chr>    <dbl>   <dbl>   <dbl>     <dbl>    <dbl>
-#>  1 day      1~2~3~4~5~H N341    response     5 3.88e-4 7.76e-5     137.  1.55e-46
-#>  2 day      1~2~3~4~5~H N133    response     5 7.00e-5 1.40e-5     126.  8.63e-45
-#>  3 day      1~2~3~4~5~H N163    response     5 6.01e-5 1.20e-5     117.  2.95e-43
-#>  4 day      1~2~3~4~5~H N1087   response     5 2.42e-6 4.84e-7      99.8 5.61e-40
-#>  5 day      1~2~3~4~5~H N171    response     5 2.25e-7 4.50e-8      95.7 3.84e-39
-#>  6 day      1~2~3~4~5~H N513    response     5 3.38e-6 6.76e-7      95.3 4.78e-39
-#>  7 day      1~2~3~4~5~H N1025   response     5 2.78e-6 5.56e-7      91.0 3.91e-38
-#>  8 day      1~2~3~4~5~H N342    response     5 3.71e-6 7.41e-7      90.3 5.32e-38
-#>  9 day      1~2~3~4~5~H N1083   response     5 5.11e-5 1.02e-5      89.0 1.06e-37
-#> 10 day      1~2~3~4~5~H N1085   response     5 1.10e-5 2.19e-6      83.4 1.92e-36
-#> # … with 369 more rows, and 1 more variable: adjusted.p.value <dbl>
-

The ANOVA has identified 379 features significantly explanatory over the infection time course. A heat map of the mean relative intensity for each class of these explanatory features can be plotted to visualise their trends between the infection time point classes.

-
-plotExplanatoryHeatmap(anova_results,
-                       threshold = 0.05,
-                       featureNames = FALSE)
-

-

Many of the explanatory features can be seen to be most highly abundant in the final infection time point 5.

-

Finally, box plots of the trends of individual features can be plotted, such as the N341 feature below.

-
-plotFeature(anova_results,feature = 'N341',cls = 'day')
-

-
- - - -
- - - - -
- - - - - - diff --git a/docs/articles/01_quick_start_files/figure-html/feature_plot-1.png b/docs/articles/01_quick_start_files/figure-html/feature_plot-1.png deleted file mode 100644 index 1fb45f3e..00000000 Binary files a/docs/articles/01_quick_start_files/figure-html/feature_plot-1.png and /dev/null differ diff --git a/docs/articles/01_quick_start_files/figure-html/pca-1.png b/docs/articles/01_quick_start_files/figure-html/pca-1.png deleted file mode 100644 index fdeb07d8..00000000 Binary files a/docs/articles/01_quick_start_files/figure-html/pca-1.png and /dev/null differ diff --git a/docs/articles/01_quick_start_files/figure-html/rf_heatmap-1.png b/docs/articles/01_quick_start_files/figure-html/rf_heatmap-1.png deleted file mode 100644 index 93f7754f..00000000 Binary files a/docs/articles/01_quick_start_files/figure-html/rf_heatmap-1.png and /dev/null differ diff --git a/docs/articles/01_quick_start_files/figure-html/supervised_RF-1.png b/docs/articles/01_quick_start_files/figure-html/supervised_RF-1.png deleted file mode 100644 index 61961757..00000000 Binary files a/docs/articles/01_quick_start_files/figure-html/supervised_RF-1.png and /dev/null differ diff --git a/docs/articles/01_quick_start_files/header-attrs-2.10/header-attrs.js b/docs/articles/01_quick_start_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/01_quick_start_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/02_introduction.html b/docs/articles/02_introduction.html deleted file mode 100644 index 801c7498..00000000 --- a/docs/articles/02_introduction.html +++ /dev/null @@ -1,856 +0,0 @@ - - - - - - - -Introduction • metabolyseR - - - - - - - - - - -
-
- - - - -
-
- - - - -
-

-Introduction

-

The metabolyseR package provides a suite of methods that encompass three elements of metabolomics data analysis:

-
    -
  • data pre-treatment
  • -
  • modelling / data mining
  • -
  • correlation analyses
  • -
-

The package also distinguishes between the flexibility and simplicity required for exploratory analyses compared to the convenience needed for more complex routine analyses. This is reflected in the underlying S4 object-oriented implementations and associated methods defined within the package. It should be noted that it is useful to understand the principles involved in using metabolyseR for exploratory analyses to aid in extracting and wrangling the results generated from routine analyses.

-

The following document will provide an introduction to the basic usage of the package and includes how to create and use the base classes that are the foundation of metabolyseR. This will be focused around the applications for both exploratory and routine analyses. For more detailed information on the individual analysis elements see their associated vignette using:

-
-browseVignettes('metabolyseR')
-

There is also an example quick start analysis vignette provided.

-
-vignette('quick_start','metabolyseR')
-

Any issues, bugs or errors encountered while using the package should be reported here.

-

The examples shown here will use the abr1 data set from the metaboData package (?metaboData::abr1). This is a nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data set from a plant-pathogen infection time course experiment. The examples will also include use of the pipe %>% from the magrittr package.

-

Firstly load the necessary packages:

- -
-
-

-Parallel processing

-

The package supports parallel processing using the future package.

-

By default, processing by metabolyseR will be done sequentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel back-end using plan(). The following example specifies using the multisession implementation (multiple background R sessions) with two worker processes.

-
-plan(future::multisession,workers = 2)
-

See the future package documentation for more information on the types of parallel implementations that are available.

-
-
-

-Exploratory analyses

-

For exploratory analyses, simple questions of the data need to be answered quickly, requiring few steps. Key requirements for any tool used by investigators are that it should be both simple and flexible.

-

In metabolyseR, the AnalysisData class is the base S4 class that provides these requirements. The following sections will give an overview of the basics in constructing and using these objects as the base for analysis.

-
-

-Analysis data

-

We can firstly construct an AnalysisData object which requires two data tables. The first is the metabolomic data where the columns are the metabolome features, the rows the sample observations and contains the abundance values. The second is the sample meta-information where the row order should match to that of the metabolome data table. Using the example data, his can be constructed and assigned to the variable d by:

-
-d <- analysisData(data = abr1$neg,
-                  info = abr1$fact)
-

Where abr1$neg is the negative ionisation mode data and abr1$fact is the corresponding sample information. By printing d we can view some basic information about our data.

-
-print(d)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 2000 
-## Info: 9
-

We can also return the numbers of samples and numbers of features respectively using the following:

- -
## [1] 120
- -
## [1] 2000
-

The data table can be extracted using the dat method:

-
-dat(d)
-
## # A tibble: 120 × 2,000
-##       N1    N2    N3    N4    N5    N6    N7    N8    N9   N10   N11   N12   N13
-##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
-##  1     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  2     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  3     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  4     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  5     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  6     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  7     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  8     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  9     0     0     0     0     0     0     0     0     0     0     0     0     0
-## 10     0     0     0     0     0     0     0     0     0     0     0     0     0
-## # … with 110 more rows, and 1,987 more variables: N14 <dbl>, N15 <dbl>,
-## #   N16 <dbl>, N17 <dbl>, N18 <dbl>, N19 <dbl>, N20 <dbl>, N21 <dbl>,
-## #   N22 <dbl>, N23 <dbl>, N24 <dbl>, N25 <dbl>, N26 <dbl>, N27 <dbl>,
-## #   N28 <dbl>, N29 <dbl>, N30 <dbl>, N31 <dbl>, N32 <dbl>, N33 <dbl>,
-## #   N34 <dbl>, N35 <dbl>, N36 <dbl>, N37 <dbl>, N38 <dbl>, N39 <dbl>,
-## #   N40 <dbl>, N41 <dbl>, N42 <dbl>, N43 <dbl>, N44 <dbl>, N45 <dbl>,
-## #   N46 <dbl>, N47 <dbl>, N48 <dbl>, N49 <dbl>, N50 <dbl>, N51 <dbl>, …
-

Or alternatively, can be used to assign a new data table:

-
-dat(d) <- abr1$pos
-d
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 2000 
-## Info: 9
-

The sample information table can be extracted using the sinfo method:

-
-sinfo(d)
-
## # A tibble: 120 × 9
-##    injorder pathcdf              filecdf name.org remark name    rep day   class
-##       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
-##  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
-##  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
-##  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
-##  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
-##  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
-##  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
-##  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
-##  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
-##  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
-## 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
-## # … with 110 more rows
-

And similarly used to assign a new sample information table:

-
-sinfo(d) <- abr1$fact[,1:2]
-d
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 2000 
-## Info: 2
-
-
-

-Sample information

-

There are a number of methods that provide utility for querying and altering the sample information within an AnalysisData object. These methods are all named with the prefix cls and include:

-
    -
  • clsAdd
  • -
  • clsArrange
  • -
  • clsAvailable
  • -
  • clsExtract
  • -
  • clsRemove
  • -
  • clsRename
  • -
  • clsReplace
  • -
-

The names of the available sample information columns can be shown using clsAvailable().

- -
## [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
-## [8] "day"      "class"
-

A given column can be extracted using clsExtract(). Here, the day column is extracted.

-
-clsExtract(d,cls = 'day')
-
##   [1] 2 3 4 1 2 1 2 4 H H 4 5 1 2 H 5 3 3 2 H 4 3 5 4 H H 3 H H 1 1 1 5 5 3 4 H
-##  [38] 1 5 5 1 2 4 3 2 4 3 2 5 4 4 H 3 4 2 4 4 1 5 4 4 1 1 H 3 2 H 3 3 1 2 H H 2
-##  [75] 3 5 3 2 5 2 4 3 H 2 3 2 1 1 4 5 3 2 1 H 5 2 4 H 1 4 4 1 1 5 H 5 1 3 3 5 5
-## [112] 5 3 2 5 H 5 H 2 1
-## Levels: 1 2 3 4 5 H
-

Sample class frequencies could then be computed.

-
-clsExtract(d,cls = 'day') %>%
-  table()
-
## .
-##  1  2  3  4  5  H 
-## 20 20 20 20 20 20
-

It can be seen that there are 20 samples available in each class.

-

Another example is the addition of a new sample information column. In the following, a column called new_class will be added with all samples labelled 1.

-
-d <- clsAdd(d,cls = 'new_class',value = rep(1,nSamples(d)))
-clsAvailable(d)
-
##  [1] "injorder"  "pathcdf"   "filecdf"   "name.org"  "remark"    "name"     
-##  [7] "rep"       "day"       "class"     "new_class"
-
-
-

-Keeping / removing samples or features

-

Samples or features can easily be kept or removed from an AnalysisData object as is most convenient.

-

Below can be seen the first 6 sample indexes in the injorder column of the sample information.

-
-samples <- d %>%
-  clsExtract(cls = 'injorder') %>%
-  head()
-
-print(samples)
-
## [1] 1 2 3 4 5 6
-

Only these samples could be kept using:

-
-d %>%
-  keepSamples(idx = 'injorder',samples = samples)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 6 
-## Features: 2000 
-## Info: 10
-

Or removed using:

-
-d %>%
-  removeSamples(idx = 'injorder',samples = samples)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 114 
-## Features: 2000 
-## Info: 10
-

The process is very similar for keeping or removing specific metabolome features from the data table. Below can be seen the first 6 feature names in the data table.

-
-feat <- d %>%
-  features() %>%
-  head()
-
-print(feat)
-
## [1] "N1" "N2" "N3" "N4" "N5" "N6"
-

Only these features can be kept using:

-
-d %>%
-  keepFeatures(features = feat)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 6 
-## Info: 10
-

Or to remove these features:

-
-d %>%
-  removeFeatures(features = feat)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 1994 
-## Info: 10
-
-
-
-

-Routine analyses

-

Routine analyses are those that are often made up of numerous steps where parameters have likely already been previously established. The emphasis here is on convenience with as little code as possible required. In these analyses, the necessary analysis elements, order and parameters are first prepared and then the analysis routine subsequently performed in a single step. This section will introduce how this type of analysis can be performed using metabolyseR and will include four main topics:

-
    -
  • analysis parameter selection
  • -
  • performing an analysis
  • -
  • performing a re-analysis
  • -
  • extracting analysis results
  • -
-
-

-Analysis parameters

-

Parameter selection is the fundamental aspect for performing routine analyses using metabolyseR and will be the step requiring the most input from the user. The parameters for an analysis are stored in an S4 object of class AnalysisParameters containing the relevant parameters of the selected analysis elements.

-

The parameters have been named so that they denote the same functionality commonly across all analysis element methods. Discussion of the specific parameters can be found withing the vignettes of the relevant analysis elements. These can be accessed using:

-
-browseVignettes('metabolyseR')
-

There are several ways to specify the parameters to use for analysis. The first is programatically and the second is through the use of the YAML format.

-
-

-Programatic specification

-

The available analysis elements can be shown using:

- -
## [1] "pre-treatment" "modelling"     "correlations"
-

The analysisParameters() function can be used to create an AnalysisParameters object containing the default parameters. For example, the code below will return default parameters for all the metabolyseR analysis elements.

- -
## Parameters:
-## pre-treatment
-##  QC
-##      occupancyFilter
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##      impute
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##          parallel = variables
-##          seed = 1234
-##      RSDfilter
-##          cls = class
-##          QCidx = QC
-##          RSDthresh = 50
-##      removeQC
-##          cls = class
-##          QCidx = QC
-##  occupancyFilter
-##      maximum
-##          cls = class
-##          occupancy = 2/3
-##  impute
-##      class
-##          cls = class
-##          occupancy = 2/3
-##          seed = 1234
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = class
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-## 
-## correlations
-##  method = pearson
-##  pAdjustMethod = bonferroni
-##  corPvalue = 0.05
-

To retrieve parameters for a subset of analysis elements the following can be run, returning parameters for only the pre-treatment and modelling elements.

-
-p <- analysisParameters(c('pre-treatment','modelling'))
-p
-
## Parameters:
-## pre-treatment
-##  QC
-##      occupancyFilter
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##      impute
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##          parallel = variables
-##          seed = 1234
-##      RSDfilter
-##          cls = class
-##          QCidx = QC
-##          RSDthresh = 50
-##      removeQC
-##          cls = class
-##          QCidx = QC
-##  occupancyFilter
-##      maximum
-##          cls = class
-##          occupancy = 2/3
-##  impute
-##      class
-##          cls = class
-##          occupancy = 2/3
-##          seed = 1234
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = class
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-

The changeParameter() function can be used to uniformly change these parameters across all of the selected methods. The example below changes the defaults of all the parameters named cls from the default class to day.

-
-p <- analysisParameters()
-changeParameter(p,'cls') <- 'day'
-p
-
## Parameters:
-## pre-treatment
-##  QC
-##      occupancyFilter
-##          cls = day
-##          QCidx = QC
-##          occupancy = 2/3
-##      impute
-##          cls = day
-##          QCidx = QC
-##          occupancy = 2/3
-##          parallel = variables
-##          seed = 1234
-##      RSDfilter
-##          cls = day
-##          QCidx = QC
-##          RSDthresh = 50
-##      removeQC
-##          cls = day
-##          QCidx = QC
-##  occupancyFilter
-##      maximum
-##          cls = day
-##          occupancy = 2/3
-##  impute
-##      class
-##          cls = day
-##          occupancy = 2/3
-##          seed = 1234
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = day
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-## 
-## correlations
-##  method = pearson
-##  pAdjustMethod = bonferroni
-##  corPvalue = 0.05
-

Alternatively the parameters of a specific analysis elements can be targeted using the elements argument. The following will only alter the cls parameter back to class for the pre-treatment element parameters:

-
-changeParameter(p,'cls',elements = 'pre-treatment') <- 'class'
-

Parameters can be extracted from the AnalysisParameters class using the parameters() function for a specified element.

-
-parameters(p,'correlations')
-
## $method
-## [1] "pearson"
-## 
-## $pAdjustMethod
-## [1] "bonferroni"
-## 
-## $corPvalue
-## [1] 0.05
-

Each analysis element has a function for returning default parameters for specific methods. These include preTreatmentParameters(), modellingParameters() and correlationParameters(). Each returns a list of the default parameters for a specified methods as shown in the example for modellingParameters() below.

- -
## $anova
-## $anova$cls
-## [1] "class"
-## 
-## $anova$pAdjust
-## [1] "bonferroni"
-## 
-## $anova$comparisons
-## list()
-## 
-## $anova$returnModels
-## [1] FALSE
-

Refer to the documentation (?) of each function for sepecific usage details.

-

The parameters returned by these functions can be assigned to an AnalysisParameters object, again using parameters()

-
-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
-    occupancyFilter = 'maximum',
-    transform = 'TICnorm'
-      )
-  )
-
-
-

-YAML specification

-

Due to the relatively complex structure of the parameters needed for analyses containing many components, it is also possible to specify analysis parameters using the YAML file format. YAML parameter files (.yaml) can be parsed using the parseParameters() function. The example below shows the YAML specification for the defaults returned by analysisParameters().

-
pre-treatment:
-  QC:
-    occupancyFilter:
-      cls: class
-      QCidx: QC
-      occupancy: 0.667
-    impute:
-      cls: class
-      QCidx: QC
-      occupancy: 0.667
-    RSDfilter:
-      cls: class
-      QCidx: QC
-      RSDthresh: 0.5
-    removeQC:
-      cls: class
-      QCidx: QC
-  occupancyFilter:
-    maximum:
-      cls: class
-      occupancy: 0.667
-  impute:
-    class:
-      cls: class
-      occupancy: 0.667
-      nCores: 4
-      clusterType: FORK
-  transform:
-    TICnorm: ~
-classification:
-  cls: class
-  method: randomForest
-  pars:
-    sampling: boot
-    niter: 10
-    nreps: 10
-    strat: yes
-  nCores: 4
-  clusterType: Fork
-featureSelection:
-  method: fs.rf
-  cls: class
-  pars:
-    fs.rf:
-      nreps: 100
-  nCores: 4
-  clusterType: FORK
-correlations:
-  method: pearson
-  pAdjustMethod: bonferroni
-  corPvalue: 0.05
-

This can be passed directly into an AnalysisParameters object using the following:

-
-paramFile <- system.file('defaultParameters.yaml',package = 'metabolyseR')
-p <- parseParameters(paramFile)
-

For more complex pre-treatment situations such as the following:

-
pre-treatment:
-  remove:
-    sample:
-      idx: fileOrder
-      samples: 1
-  remove1:
-    class:
-      cls: day
-      classes:
-      - H
-      - 1
-  occupancyFilter:
-    maximum:
-      cls: class
-      occupancy: 0.667
-  transform:
-    TICnorm: ~
-

Where multiple steps of the same method needed (here is remove), these are numbered sequentially. Where multiple values also need to be provided to a particular argument (e.g. classes = c('H','1')), these should be supplied as a hyphenated list.

-

Existing AnalysisParameters objects can also be exported to YAML format as shown below:

-
-p <- analysisParameters()
-exportParameters(p,file = 'analysis_parameters.yaml')
-
-
-
-

-Performing an analysis

-

The analysis is performed in a single step using the metabolyse() function. This accepts the metabolomic data, the sample information and the analysis parameters.

-

The metabolomic data table of abundance values where the columns are the metabolome features and the rows are each sample observation. Similarly, the sample meta-information table should consist of the observations as rows and the meta information as columns. The order of the observation rows of the sample information table should be concordant with the rows in the metabolomics data table.

-

We can run an example analysis using the abr1 data set by first generating the default parameters for pre-treatment and modelling (random forest) analysis elements.

-
-p <- analysisParameters(c('pre-treatment','modelling'))
-

Custom pre-treatment parameters can then be specified to only inlude occupancy filtering and total ion count normalisation.

-
-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
-  occupancyFilter = 'maximum',
-  transform = 'TICnorm')
-)
-

Next the cls parameters can be changed to use the day sample information column throughout the analysis.

-
-changeParameter(p,'cls') <- 'day'
-

Finally, the analysis can be run in a single step. Here only the fist 200 features of the negative ionisation mode data are specified to reduce the analysis time needed for this example.

-
-analysis <- metabolyse(abr1$neg[,1:200],abr1$fact,p) 
-
## 
-## metabolyseR  v0.14.3 Tue Sep 14 10:08:22 2021
-
## ________________________________________________________________________________
-
## Parameters:
-## pre-treatment
-##  occupancyFilter
-##      maximum
-##          cls = day
-##          occupancy = 2/3
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = day
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-
## ________________________________________________________________________________
-
## Pre-treatment …
-
-Pre-treatment   ✓ [0.9S]
-## Modelling …
-
-Modelling   ✓ [3.7S]
-## ________________________________________________________________________________
-## 
-## Complete! [4.6S]
-

Note: If a data pre-treatment step is not performed prior to modelling or correlation analysis, the raw data will automatically be used.

-

The analysis object containing the analysis results can be printed to provide some basic information about the results of the analysis.

-
-print(analysis)
-
## 
-## metabolyseR v0.14.3
-## Analysis:
-##  Tue Sep 14 10:08:22 2021
-## 
-##  Raw Data:
-##      No. samples = 120
-##      No. features = 200
-## 
-##  Pre-treated Data:
-##      Tue Sep 14 10:08:22 2021
-##      No. samples = 120
-##      No. features = 48
-## 
-##  Modelling:
-##      Tue Sep 14 10:08:26 2021
-##      Methods: randomForest
-
-
-

-Performing a re-analysis

-

There are likely to be occasions where an analysis will need to be re-analysed using a new set of parameters. This can be achieved using the reAnalyse() function.

-

In the example below we will run a correlation analysis in addition to the pre-treatment and modelling elements already performed.

-

Firstly, we can specify the correlation parameters:

-
-parameters <- analysisParameters('correlations')
-

Then perform the re-analysis on our previously analysed Analysis object, specifying the additional parameters.

-
-analysis <- reAnalyse(analysis,parameters)
-
## 
-## metabolyseR v0.14.3 Tue Sep 14 10:08:26 2021
-## ________________________________________________________________________________
-## Parameters:
-## correlations
-##  method = pearson
-##  pAdjustMethod = bonferroni
-##  corPvalue = 0.05
-## ________________________________________________________________________________
-
## Correlations …
-
-Correlations    ✓ [0.1S]
-
## ________________________________________________________________________________
-## 
-## Complete! [0.1S]
-

An overview of the results of the analysis (now including correlations) can then be printed.

-
-print(analysis)
-
## 
-## metabolyseR v0.14.3
-## Analysis:
-##  Tue Sep 14 10:08:22 2021
-## 
-##  Raw Data:
-##      No. samples = 120
-##      No. features = 200
-## 
-##  Pre-treated Data:
-##      Tue Sep 14 10:08:22 2021
-##      No. samples = 120
-##      No. features = 48
-## 
-##  Modelling:
-##      Tue Sep 14 10:08:26 2021
-##      Methods: randomForest
-## 
-##  Correlations:
-##      Tue Sep 14 10:08:27 2021
-##      No. correlations = 140
-
-
-

-Extracting analysis results

-

An analysis performed by metabolyse() returns an S4 object of class Analysis. There are a number of ways of extracting analysis results from this object.

-

Similarly to the AnalysisData class, the dat() and sinfo() functions can be used to extract the metabolomics data or sample information tables directly for either the raw or pre-treated data.

-

For example, to extract the pre-treated metabolomics data from our object analysis:

-
-dat(analysis,type = 'pre-treated')
-
## # A tibble: 120 × 48
-##       N113    N115    N117    N118    N119    N127    N128    N129  N130    N131
-##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>
-##  1 0.00646 0       1.68e-4 0       1.60e-3 0.0323  2.65e-4 2.80e-4     0 0      
-##  2 0.0113  7.74e-4 1.02e-3 0       1.43e-3 0.00856 0       3.95e-4     0 0      
-##  3 0.00931 6.01e-4 2.70e-3 6.22e-5 5.58e-3 0       0       1.05e-4     0 6.51e-4
-##  4 0.00798 0       0       0       1.62e-4 0.00848 0       4.05e-4     0 1.28e-4
-##  5 0.0105  0       0       0       0       0.00658 0       1.97e-3     0 0      
-##  6 0.00454 0       2.48e-4 3.25e-4 5.31e-4 0.00207 0       1.98e-4     0 0      
-##  7 0.0117  0       1.14e-3 0       4.39e-4 0.00603 0       4.04e-4     0 0      
-##  8 0.00787 2.36e-3 1.43e-3 1.52e-4 4.22e-3 0.00290 2.78e-4 5.76e-5     0 0      
-##  9 0.00136 1.87e-4 8.17e-4 1.87e-4 0       0.0610  1.31e-4 5.23e-4     0 0      
-## 10 0.00899 4.26e-4 2.06e-3 0       8.36e-4 0.00106 7.72e-4 0           0 0      
-## # … with 110 more rows, and 38 more variables: N132 <dbl>, N133 <dbl>,
-## #   N134 <dbl>, N135 <dbl>, N136 <dbl>, N137 <dbl>, N139 <dbl>, N143 <dbl>,
-## #   N145 <dbl>, N146 <dbl>, N147 <dbl>, N149 <dbl>, N153 <dbl>, N155 <dbl>,
-## #   N157 <dbl>, N161 <dbl>, N163 <dbl>, N164 <dbl>, N165 <dbl>, N168 <dbl>,
-## #   N169 <dbl>, N170 <dbl>, N171 <dbl>, N173 <dbl>, N174 <dbl>, N175 <dbl>,
-## #   N179 <dbl>, N180 <dbl>, N181 <dbl>, N183 <dbl>, N187 <dbl>, N191 <dbl>,
-## #   N192 <dbl>, N193 <dbl>, N195 <dbl>, N196 <dbl>, N197 <dbl>, N198 <dbl>
-

Or to extract the raw sample information:

-
-sinfo(analysis,type = 'raw')
-
## # A tibble: 120 × 9
-##    injorder pathcdf              filecdf name.org remark name    rep day   class
-##       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
-##  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
-##  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
-##  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
-##  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
-##  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
-##  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
-##  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
-##  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
-##  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
-## 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
-## # … with 110 more rows
-

Alternatively the raw or preTreated functions can be used to extract the AnalysisData class objects containing both the metabolomics data and sample information for the raw and pre-treated data respectively.

-
-raw(analysis)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 200 
-## Info: 9
-
-preTreated(analysis)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 48 
-## Info: 9
-

Lastly the analysisResults function can be used to extract the results of any of the analysis elements. The following will extract the modelling results:

-
-analysisResults(analysis,element = 'modelling')
-
## $randomForest
-## 
-## Random forest classification 
-## 
-## Samples:  120 
-## Features:     48 
-## Response:     day 
-## # comparisons:    1
-
-
-
- - - -
- - - - -
- - - - - - diff --git a/docs/articles/02_introduction_files/header-attrs-2.10/header-attrs.js b/docs/articles/02_introduction_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/02_introduction_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/03_pre_treatment.html b/docs/articles/03_pre_treatment.html deleted file mode 100644 index 67fc0763..00000000 --- a/docs/articles/03_pre_treatment.html +++ /dev/null @@ -1,595 +0,0 @@ - - - - - - - -Metabolomics data pre-treatment • metabolyseR - - - - - - - - - - -
-
- - - - -
-
- - - - -
-

-Introduction

-

Metabolomics data from any analytical technique requires various data pre-treatment steps prior to subsequent data mining or other downstream analyses. This aids both the data quality and integrity. It is important that appropriate pre-treatment strategies are used not only for the analytical technique being applied but are also suitable for the statistical or machine learning analyses that are to be utilised. Careful consideration of the pre-treatment steps to be undertaken are required as they can have a substantial influence on the results and inferences taken from metabolomic analyses.

-

Data pre-treatment is the most faceted aspect of the analysis elements in metabolyseR. It is itself made up of a number of elements, which themselves are made up of methods. The following document will outline the application of each of these pre-treatment elements for use in exploratory analyses then outline how to apply them in routine analyses. For an introduction to the usage of metabolyseR for both exploratory and routine analyses, see the introduction vignette using:

-
-vignette('introduction','metabolyseR')
-

To further supplement this document, a quick start example analysis is also available as a vignette:

-
-vignette('quick_start','metabolyseR')
-

To begin, the package can be loaded using:

-
-library(metabolyseR)
-#> 
-#> Attaching package: 'metabolyseR'
-#> The following object is masked from 'package:stats':
-#> 
-#>     anova
-#> The following objects are masked from 'package:base':
-#> 
-#>     raw, split
-
-

-Example data

-

The examples used here will use the abr1 data set from the metaboData package. This is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The pipe %>% from the magrittr package will also be used. The example data can be loaded using:

- -

Only the negative acquisition mode data (abr1$neg) will be used along with the sample meta-information (abr1$fact). Create an AnalysisData class object, assigned to the variable d, using the following:

-
-d <- analysisData(abr1$neg,abr1$fact)
-
-print(d)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 2000 
-#> Info: 9
-

As can be seen above the data set contains a total of 120 samples and 2000 features.

-
-
-

-Parallel processing

-

The package supports parallel processing using the future package.

-

By default, processing by metabolyseR will be done seqentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel implementation using plan(). The following example specifies using the multisession implementation (muliple background R sessions) with two worker processes.

-
-plan(future::multisession,workers = 2)
-

See the future package documentation for more information on the types of parallel implementations that are available.

-
-
-
-

-Pre-treatment elements

-

The following sections will outline the numerous pre-treatment elements available within metabolyseR. There will be examples of their application during exploratory analyses along with useful visualisations. These can aid interpretation of when particular treatments should be applied as well as their effect once they have been used.

-
-

-Removal of samples, classes or features

-

In many situations, it will be necessary to exclude either individual samples, sample classes or certain features from further analysis.

-

Individual samples can be removed using removeSamples() as below, where the idx argument stipulates the sample information column cotaining the sample indexes and the samples argument a vector of sample indexes to remove.

-
-d %>%
-  removeSamples(idx = 'injorder',samples = 1)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 119 
-#> Features: 2000 
-#> Info: 9
-

The removeClasses function can be used similarly to remove whole classes from further analysis:

-
-d %>%
-  removeClasses(cls = 'day',classes = 'H')
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 100 
-#> Features: 2000 
-#> Info: 9
-

The following will enable the removal of specified features as a vector supplied to the features argument:

-
-d %>%
-  removeFeatures(features = c('N1','N2'))
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 1998 
-#> Info: 9
-

There could be occasions where the numbers of samples, classes or features to remove are greater than the numbers of samples, classes or features that are to be retained. In these situations it will be more convenient to directly specify the samples, classes or features to retain. Keeping samples, classes or features is outlined in the following section.

-
-
-

-Keeping samples, classes or features

-

Often it will be necessary to retain only particular samples, sample classes or certain features for further analysis.

-

Individual samples can be kept using keepSamples() as below, where the idx argument stipulates the sample information column cotaining the sample indexes and the samples argument, a vector of sample indexes to keep.

-
-d %>%
-  keepSamples(idx = 'injorder',samples = 1)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 1 
-#> Features: 2000 
-#> Info: 9
-

The keepClasses() method can be used similarly to keep whole classes for further analysis:

-
-d %>%
-  keepClasses(cls = 'day',classes = 'H')
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 20 
-#> Features: 2000 
-#> Info: 9
-

The following will specify features to keep, with a vector of feature names supplied to the features argument:

-
-d %>%
-  keepFeatures(features = c('N1','N2'))
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 2 
-#> Info: 9
-

There are likely to be occasions where the numbers of samples, classes or features to keep are greater than the numbers of samples, classes or features that are to be excluded. In these situations it will be more convenient to directly specify the samples, classes or features to remove. Removing samples, classes or features is outlined in the previous section.

-
-
-

-Feature filtering based on occupancy

-

Occupancy provides a useful metric by which to filter poorly represented features (features containing a majority zero or missing values). An occupancy threshold provides a means of specifying this majority with variables below the threshold excluded from further analyses. However, this can be complicated by an underlying class structure present within the data where a variable may be well represented within one class but not in another.

-

The proportional occupancy for each feature within a data set for a given class structure can be calculated using the occupancy() method, specifying the sample information column using the cls argument.

-
-d %>%
-  occupancy(cls = 'day')
-#> # A tibble: 11,914 × 5
-#>    day   Feature     N `Class total` Occupancy
-#>    <fct> <chr>   <dbl>         <int>     <dbl>
-#>  1 1     N1          0            20         0
-#>  2 1     N10         0            20         0
-#>  3 1     N100        0            20         0
-#>  4 1     N1000      20            20         1
-#>  5 1     N1001      20            20         1
-#>  6 1     N1002      20            20         1
-#>  7 1     N1003      20            20         1
-#>  8 1     N1004      20            20         1
-#>  9 1     N1005      20            20         1
-#> 10 1     N1006      20            20         1
-#> # … with 11,904 more rows
-

Alternatively the occupancy distributions can be plotted providing a useful overview of the data set:

-
-d %>%
-  plotOccupancy(cls = 'day')
-

-

It can be seen that there are a number of unoccupied features across all the sample classes with a small rise in the density distribution near 0.

-

There are two strategies for thresholding occupancy. The first is a maximum theshold; where the maximum occupancy across all classes is above the threshold. Therefore, for a feature to be retained, only a single class needs to have an occupancy above the threshold. It is this strategy that will be appropriate for most applications. A two-thirds maximum occupancy filter can be applied to the day sample information column of our data using:

-
-maximum_occupancy_filtered <- d %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3)
-

It can be seen below that this removes 240 features.

-
-print(maximum_occupancy_filtered)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 1760 
-#> Info: 9
-

Plotting the occupancy distributions shows that all the low occupancy features have now been removed.

-
-maximum_occupancy_filtered %>%
-  plotOccupancy(cls = 'day')
-

-

The alternative strategy is by applying a minimum threshold; where the minimum occupancy across all classes is required to be above the threshold. Therefore, for a feature to be retained, all classes would need to have an occupancy above the threshold. A two-thirds minimum occupancy filter can be applied to the day sample information column of our data using:

-
-minimum_occupancy_filtered <- d %>%
-  occupancyMinimum(cls = 'day',occupancy = 2/3)
-

It can be seen below that this removes 344 features.

-
-print(minimum_occupancy_filtered)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 1656 
-#> Info: 9
-
-
-

-Data transformation

-

Prior to downstream analyses, metabolomics data often require transformation to fulfill the assumptions of a particular statistical/data mining technique.

-

There are a wide range of transformation methods available that are commonly used for the analysis of metabolomics data. These methods are all named with the prefix transform.

-

The effects of a transformation on a data set can be assessed using a supervised classifcation approach. The following performs a supervised random forest analysis of the example data and plots the results using both multidimensional scaling (MDS) and reciever operator characteristic (ROC) curves.

-
-d %>%
-  plotSupervisedRF(cls = 'day')
-

-

Alternatively a log10 transformation can be applied prior to analysis:

-
-d %>%
-  transformLog10() %>%
-  plotSupervisedRF(cls = 'day')
-

-

Or a total ion count (TIC) normalisation where each individual sample is corrected by its TIC. This is one method that can be used to account for small variablility in sample concentration.

-
-d %>%
-  transformTICnorm() %>%
-  plotSupervisedRF(cls = 'day')
-

-

The margin value is a metric that can be used to assess model perfomance. Positive values indicate a models ability, on average, to correctly predict the class labels of the analysed data.

-

As can be seen in the plots above, the transformations have little effect on the overall structure of the data set. However, there are small increases in the margins of the transformed data (model improvement). Note that here, a non-parametric machine learning approach has been applied to assess the effects of the transformations on the data. Using a different approach such as the parametric analysis Of variance (ANOVA) which different underlying assumptions will likely give different results to the assessment above.

-
-
-

-Sample aggregation

-

Sample aggregation allows the electronic pooling of samples based on a grouping variable. This is useful in situations such as the presence of technical replicates that can be aggregated to reduce the effects of pseudo replication. metabolyseR provides methods for mean, median and sum aggregation and each starts with the aggregate prefix.

-

Below shows a principle component analysis (PCA) plot of the example data coloured by the classes of the day sample information column. It is first maximum occupancy filtered to remove empty features.

-
-d %>%
-  occupancyMaximum(cls = 'day') %>%
-  plotPCA(cls = 'day')
-

-

The example below shows the mean aggregation of the data using the experimental classes within the day sample information column.

-
-day_mean <- d %>%
-  occupancyMaximum(cls = 'day') %>%
-  aggregateMean(cls = 'day')
-

The PCA plot below shows these class averages of the data.

-
-plotPCA(day_mean,cls = 'day',ellipses = FALSE)
-

-
-
-

-Batch/block correction

-

There can sometimes be artificial batch related variability introduced into metabolomics analyses as a result of analytical instrumentation or sample preparation. With appropriate sample randomisation (see section on feature filtering based on QC samples), batch related variability can be corrected for using an average centring correction method, applied to the individual features.

-

The plot below shows differences in the TIC distributions for each of the classes in the day sample information column.

-
-d %>%
-  plotTIC(by = 'day',colour = 'day')
-

-

The data can then be corrected by class average centring as shown below.

-
-corrected_data <- d %>%
-  correctionCenter(block = 'day',type = 'median')
-

The plot of the TICs below shows that the inter-class variability has been removed but the intra-class variability has been retained.

-
-plotTIC(corrected_data,
-        by = 'day',
-        colour = 'day')
-

-
-
-

-Imputation of missing data

-

Missing values can have an important influence on downstream analyses with zero values heavily influencing the outcomes of parametric tests. Where and how they are imputed are important considerations and this is highly related to variable occupancy. The methods provided here allow both these aspects to be taken into account and utilise Random Forest imputation using the missForest package.

-

Below shows a Linear Discriminant Analysis (LDA) plot of the example data. The eigenvalue (Tw) gives a comparable indication of the separation between the sample classes.

-
-d %>%
-  keepClasses(cls = 'day',classes = c('H','5')) %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
-  plotLDA(cls = 'day')
-

-

The following shows the same, except there is an application of imputation prior to the LDA. The imputed data is based on the data of all the samples present on the data set. It shows a very slight drop in the eigenvalue and therefore reduced separation between the sample classes.

-
-d %>%
-  keepClasses(cls = 'day',classes = c('H','5')) %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
-  imputeAll(parallel = 'variables') %>%
-  plotLDA(cls = 'day')
-

-

Imputation accuracy is likely to be reduced if data is sparse or there is underlying class structure where there is significant discrimination. Below shows the application imputation prior the LDA, except this time the imputation is class-wise. The imputed data is based only on the values of other samples within the class.

-
-d %>%
-  keepClasses(cls = 'day',classes = c('H','5')) %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
-  imputeClass(cls = 'day') %>%
-  plotLDA(cls = 'day')
-

-

This shows a slight increase in the eigenvalue with the classes showing greater separation. This is likely due to the increased accuracy of the imputed data relative to the class structure.

-
-
-

-Feature filtering based on quality control (QC) samples

-

A QC sample is an average pooled sample, equally representative in composition of all the samples present within an experimental set. Within an analytical run, the QC sample is analysed at equal intervals throughout the run. If there is class structure within the run, this should be randomised within a block fashion so that the classes are equally represented in each block throughout the run. A QC sample can then be injected and analysed between these randomised blocks. This provides a set of technical injections that allows the variability in instrument performance over the run to be accounted for and the robustness of the acquired variables to be assessed.

-

The technical reproducibility of an acquired variable can be assessed using it’s relative standard deviation (RSD) within the QC samples. The variable RSDs can then be filtered below a threshold value to remove metabolome features that are poorly reproducible across the analytical runs. This variable filtering strategy has an advantage over that of occupancy alone as it is not dependent on underlying class structure. Therefore, the variables and variable numbers will not alter if a new class structure is imposed upon the data.

-

The example data set does not include QC samples. For this example, the H class will be used.

-

Firstly, the RSD distribution will be assessed for the only H class. The following retains only the H class samples to aid visualisation.

-
-QC <- d %>%
-  keepClasses(cls = 'day',classes = 'H')
-

The table of RSD values for each of the features can be computed as below.

-
-QC %>%
-  rsd(cls = 'day')
-#> # A tibble: 2,000 × 5
-#>    day   Feature  Mean    SD   RSD
-#>    <fct> <chr>   <dbl> <dbl> <dbl>
-#>  1 H     N1        0     0   NaN  
-#>  2 H     N10       0     0   NaN  
-#>  3 H     N100      0     0   NaN  
-#>  4 H     N1000   114.   19.4  17.0
-#>  5 H     N1001    99.2  21.6  21.7
-#>  6 H     N1002    86.7  23.9  27.6
-#>  7 H     N1003    82.3  18.0  21.9
-#>  8 H     N1004    91.6  18.8  20.5
-#>  9 H     N1005    78.2  14.0  17.9
-#> 10 H     N1006    78.6  21.3  27.1
-#> # … with 1,990 more rows
-

The distributions of the feature RSD values can be plotted for the H class.

-
-QC %>%
-  plotRSD(cls = 'day')
-#> Warning: Removed 123 rows containing non-finite values (stat_density).
-#> Warning: Removed 1 row(s) containing missing values (geom_path).
-

-

This shows that there are a number of features with very high RSD values and therefore poor analytical robustness. Many of these are likely to be as a result of poor occupancy and zero values. Applying an occupancy filter prior to plotting does indeed show a reduction in the upper range of RSD values retained.

-
-QC %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
-  plotRSD(cls = 'day')
-

-

metabolyseR contains a number of methods for applying pre-treatment routines specifically on QC samples and are all prefixed with QC. These include methods for feature filtering of a data set based the occupancy of the QC class, imputation of the QC class only, feature filtering based in the RSD values of the QC class and removal of only the QC class.

-

Below shows an example of applying some of these QC methods. This will first filter the features in the data set based on the occupancy of the QC class. Then the features are filtered based on the RSD values of the QC class using an RSD threshold of 50%. The class index of the QC samples is specified using the QCidx argument.

-
-QC_filtered <- d %>%
-  QCoccupancy(cls = 'day',QCidx = 'H',occupancy = 2/3) %>%
-  QCrsdFilter(cls = 'day',QCidx = 'H',RSDthresh = 50)
-

This removes a total of 637 features.

-
-print(QC_filtered)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 1363 
-#> Info: 9
-
-
-
-

-Routine analyses

-

For routine analyses, the available pre-treatment elements can retreived using:

-
-preTreatmentElements()
-#> [1] "aggregate"       "correction"      "impute"          "keep"           
-#> [5] "occupancyFilter" "QC"              "remove"          "transform"
-

The available methods for a specified pre-treatment element can be viewed using:

-
-preTreatmentMethods('remove')
-#> [1] "classes"  "features" "samples"
-

The default pre-treatment parameters can first be assigned to the variable p.

-
-p <- analysisParameters('pre-treatment')
-

The preTreatmentParameters() function allows the parameters for particular pre-treatment elements to be specified. The following specifies the pre-treatment elements that will be used for this data set. These will include the keeping of certain sample classes, the filtering of features based on class occupancy and the application of a TIC normalisation. These will be assigned to the p variable using the parameters() method.

-
-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
-    keep = 'classes',
-    occupancyFilter = 'maximum',
-    transform = 'TICnorm' 
-  )
-)
-

Printing p shows these pre-treatment steps.

-
-print(p)
-#> Parameters:
-#> pre-treatment
-#>  keep
-#>      classes
-#>          cls = class
-#>          classes = c()
-#>  occupancyFilter
-#>      maximum
-#>          cls = class
-#>          occupancy = 2/3
-#>  transform
-#>      TICnorm
-

Next, the day sample information column can be specified, along with the classes to be kept which will be the H, the 1 and the 2 classes.

-
-changeParameter(p,'cls') <- 'day'
-changeParameter(p,'classes') <- c('H','1','2')
-

Printing p shows the final pre-treatment parameters that will be used for this analysis.

-
-print(p)
-#> Parameters:
-#> pre-treatment
-#>  keep
-#>      classes
-#>          cls = day
-#>          classes = c("H", "1", "2")
-#>  occupancyFilter
-#>      maximum
-#>          cls = day
-#>          occupancy = 2/3
-#>  transform
-#>      TICnorm
-

The pre-treatment routine can then be executed.

-
analysis <- metabolyse(abr1$neg,abr1$fact,p)
-#> 
-#> metabolyseR  v0.14.3 Tue Sep 14 10:12:19 2021
-#> ________________________________________________________________________________
-#> Parameters:
-#> pre-treatment
-#>  keep
-#>      classes
-#>          cls = day
-#>          classes = c("H", "1", "2")
-#>  occupancyFilter
-#>      maximum
-#>          cls = day
-#>          occupancy = 2/3
-#>  transform
-#>      TICnorm
-#> ________________________________________________________________________________
-#> Pre-treatment …
-
-Pre-treatment   ✓ [9.1S]
-#> ________________________________________________________________________________
-#> 
-#> Complete! [9.1S]
-

Printing the analysis object shows the resulting data from the pre-treatment routine.

-
-print(analysis)
-#> 
-#> metabolyseR v0.14.3
-#> Analysis:
-#>  Tue Sep 14 10:12:19 2021
-#> 
-#>  Raw Data:
-#>      No. samples = 120
-#>      No. features = 2000
-#> 
-#>  Pre-treated Data:
-#>      Tue Sep 14 10:12:28 2021
-#>      No. samples = 60
-#>      No. features = 1723
-

The pre-treated data can be extracted from the Analysis object using several methods.

-

Firstly the analysisResults() method.

-
-analysisResults(analysis,'pre-treatment')
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 60 
-#> Features: 1723 
-#> Info: 9
-

And secondly the preTreated() method.

-
-preTreated(analysis)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 60 
-#> Features: 1723 
-#> Info: 9
-

A supervised random forest analysis can be used to visualise the structure of the resulting pre-treated data.

-
-analysis %>%
-  plotSupervisedRF(cls = 'day',type = 'pre-treated')
-

-
-
- - - -
- - - - -
- - - - - - diff --git a/docs/articles/03_pre_treatment_files/figure-html/QC_occupancy_rsd-1.png b/docs/articles/03_pre_treatment_files/figure-html/QC_occupancy_rsd-1.png deleted file mode 100644 index 42bc0231..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/QC_occupancy_rsd-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/QC_rsd_plot-1.png b/docs/articles/03_pre_treatment_files/figure-html/QC_rsd_plot-1.png deleted file mode 100644 index 594f8a73..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/QC_rsd_plot-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/TICnorm_RF-1.png b/docs/articles/03_pre_treatment_files/figure-html/TICnorm_RF-1.png deleted file mode 100644 index e6b738f8..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/TICnorm_RF-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/corrected-TIC plot-1.png b/docs/articles/03_pre_treatment_files/figure-html/corrected-TIC plot-1.png deleted file mode 100644 index d17aa2e6..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/corrected-TIC plot-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/day_TICs-1.png b/docs/articles/03_pre_treatment_files/figure-html/day_TICs-1.png deleted file mode 100644 index 2f83f27c..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/day_TICs-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/day_mean_pca-1.png b/docs/articles/03_pre_treatment_files/figure-html/day_mean_pca-1.png deleted file mode 100644 index 278a5b43..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/day_mean_pca-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/impute_all_lda-1.png b/docs/articles/03_pre_treatment_files/figure-html/impute_all_lda-1.png deleted file mode 100644 index 7fcd2ace..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/impute_all_lda-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/imputed_class_lda-1.png b/docs/articles/03_pre_treatment_files/figure-html/imputed_class_lda-1.png deleted file mode 100644 index 7d0e904d..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/imputed_class_lda-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/lda-1.png b/docs/articles/03_pre_treatment_files/figure-html/lda-1.png deleted file mode 100644 index aac17e03..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/lda-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/log10_RF-1.png b/docs/articles/03_pre_treatment_files/figure-html/log10_RF-1.png deleted file mode 100644 index 07ccf5ea..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/log10_RF-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/pca-1.png b/docs/articles/03_pre_treatment_files/figure-html/pca-1.png deleted file mode 100644 index 6913734c..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/pca-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/plot_filtered_occupancy-1.png b/docs/articles/03_pre_treatment_files/figure-html/plot_filtered_occupancy-1.png deleted file mode 100644 index 53c13122..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/plot_filtered_occupancy-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/plot_occupancy-1.png b/docs/articles/03_pre_treatment_files/figure-html/plot_occupancy-1.png deleted file mode 100644 index e73f927c..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/plot_occupancy-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/supervised-rf-1.png b/docs/articles/03_pre_treatment_files/figure-html/supervised-rf-1.png deleted file mode 100644 index 2da0f7a1..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/supervised-rf-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/figure-html/transform_RF-1.png b/docs/articles/03_pre_treatment_files/figure-html/transform_RF-1.png deleted file mode 100644 index e9d84a63..00000000 Binary files a/docs/articles/03_pre_treatment_files/figure-html/transform_RF-1.png and /dev/null differ diff --git a/docs/articles/03_pre_treatment_files/header-attrs-2.10/header-attrs.js b/docs/articles/03_pre_treatment_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/03_pre_treatment_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/04_modelling.html b/docs/articles/04_modelling.html deleted file mode 100644 index 4254756c..00000000 --- a/docs/articles/04_modelling.html +++ /dev/null @@ -1,860 +0,0 @@ - - - - - - - -Modelling and feature selection • metabolyseR - - - - - - - - - - -
-
- - - - -
-
- - - - -
-

-Introduction

-

Modelling provides the essential data mining step for extracting biological information and explanatory metabolome features from a data set relating to the experimental conditions. metabolyseR provides a number of both univariate and multivariate methods for data mining.

-

For an introduction to the usage of metabolyseR for both exploratory and routine analyses, see the introduction vignette using:

-
-vignette('introduction','metabolyseR')
-

To further supplement this document, a quick start example analysis is also available as a vignette:

-
-vignette('quick_start','metabolyseR')
-

To begin, the package can be loaded using:

-
-library(metabolyseR)
-#> 
-#> Attaching package: 'metabolyseR'
-#> The following object is masked from 'package:stats':
-#> 
-#>     anova
-#> The following objects are masked from 'package:base':
-#> 
-#>     raw, split
-
-

-Example data

-

The examples used here will use the abr1 data set from the metaboData package. This is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The pipe %>% from the magrittr package will also be used. The example data can be loaded using:

- -

Only the negative acquisition mode data (abr1$neg) will be used along with the sample meta-information (abr1$fact). Create an AnalysisData class object, assigned to the variable d, using the following:

-
-d <- analysisData(abr1$neg[,1:500],abr1$fact)
-
-print(d)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 500 
-#> Info: 9
-

As can be seen above the data set contains a total of 120 samples and 500 features.

-
-
-

-Parallel processing

-

The package supports parallel processing using the future package.

-

By default, processing by metabolyseR will be done seqentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel implementation using plan(). The following example specifies using the multisession implementation (muliple background R sessions) with two worker processes.

-
-plan(future::multisession,workers = 2)
-

See the future package documentation for more information on the types of parallel implementations that are available.

-
-
-
-

-Random Forest

-

Random forest is a versatile ensemble machine learning approach based on forests of decision trees for multivariate data mining. This can include unsupervised analysis, classification of discrete response variables and regression of continuous responses.

-

Random forest can be performed in metabolyseR using the randomForest() method. For further details on the arguments for using this function, see ?randomForest. This implementation of random forest in metabolyseR utilises the randomForest package. See ?randomForest::randomForest for more information about that implementation.

-
-

-Unsupervised

-

The unsupervised random forest approach can be useful starting point for analysis in any experimental context. It can be used to give a general overview of the structure of the data and to identify any possible problems. These could include situations such as the presence of outliers samples or splits in the data caused by the impact of analytical or sample preparation factors. Unsupervised random forest can have advantages in these assessments over other approaches such as Principle Component Analysis (PCA). It is less sensitive to the effect of a single feature that in fact could have little overall impact relative to the other hundreds that could be present in a data set.

-

The examples below will show the use of unsupervised random forest for assessing the general structure of the example data set and the presence of outlier samples.

-

Unsupervised random forest can be performed by setting the cls argument of randomForest() to NULL:

-
-unsupervised_rf <- d %>%
-  randomForest(cls = NULL)
-

The type of random forest that has been performed can be checked using the type method.

-
-type(unsupervised_rf)
-#> [1] "unsupervised"
-

Or by printing the results object.

-
-unsupervised_rf
-#> 
-#> Unsupervised random forest
-#> 
-#> Samples:  120 
-#> Features:     500
-

Firstly, the presence of outlier samples will be assessed. A multidimensional scaling (MDS) plot can be used to visualise the relative proximity of the observations, as shown in the following. The individual points are also labelled by their injection order to enable the identification of individual samples if necessary.

-
-plotMDS(unsupervised_rf,
-        cls = NULL,
-        label = 'injorder',
-        labelSize = 3,
-        title = 'Outlier detection')
-#> Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider
-#> increasing max.overlaps
-

-

From the plot above, it can be seen a single sample lies outside the 95% confidence ellipse. It is unlikely that this sample can be considered an outlier as it’s position is as a result of the underlying class structure as opposed to differences specific to that individual sample.

-

The structure of these observations can be investigated further by colouring the points by a different experimental factor. This will be by the day class column which is the main experimental factor of interest in this experiment.

-
-plotMDS(unsupervised_rf,
-        cls = 'day')
-

-

This shows that it is indeed the experimental factor of interest that is having the greatest impact on the structure of the data. The progression of the experimental time points are obvious across Dimension 1.

-

The available feature importance metrics for a random forest analysis can be retrieved by:

-
-importanceMetrics(unsupervised_rf)
-#> [1] "1"                    "2"                    "FalsePositiveRate"   
-#> [4] "MeanDecreaseAccuracy" "MeanDecreaseGini"     "SelectionFrequency"
-

And the importance values of these metrics for each feature can returned using:

-
-importance(unsupervised_rf)
-#> # A tibble: 3,000 × 3
-#>    Feature Metric                Value
-#>    <chr>   <chr>                 <dbl>
-#>  1 N1      1                    0     
-#>  2 N1      2                    0     
-#>  3 N1      FalsePositiveRate    0.0238
-#>  4 N1      MeanDecreaseAccuracy 0     
-#>  5 N1      MeanDecreaseGini     0     
-#>  6 N1      SelectionFrequency   0     
-#>  7 N10     1                    0     
-#>  8 N10     2                    0     
-#>  9 N10     FalsePositiveRate    0.0238
-#> 10 N10     MeanDecreaseAccuracy 0     
-#> # … with 2,990 more rows
-

The explanatory features for a given threshold can be extracted for any of the importance metrics. The following will extract the explanatory features below a threshold of 0.05 based on the false positive rate metric.

-
-unsupervised_rf %>%
-  explanatoryFeatures(metric = "FalsePositiveRate", 
-                      threshold = 0.05)
-#> # A tibble: 359 × 3
-#>    Feature Metric               Value
-#>    <chr>   <chr>                <dbl>
-#>  1 N342    FalsePositiveRate 1.31e-19
-#>  2 N161    FalsePositiveRate 2.34e-16
-#>  3 N341    FalsePositiveRate 6.50e-16
-#>  4 N315    FalsePositiveRate 1.79e-15
-#>  5 N367    FalsePositiveRate 3.47e-14
-#>  6 N173    FalsePositiveRate 9.09e-14
-#>  7 N385    FalsePositiveRate 9.09e-14
-#>  8 N133    FalsePositiveRate 1.52e-12
-#>  9 N439    FalsePositiveRate 1.52e-12
-#> 10 N379    FalsePositiveRate 3.78e-12
-#> # … with 349 more rows
-

In this example there are 359 explanatory features.

-

The trend of the most highly ranked explanatory feature against the day factor can be plotted using the plotFeature() method.

-
-unsupervised_rf %>%
-  plotFeature(feature = 'N425',
-              cls = 'day')
-

-
-
-

-Classification

-

Random forest classification can be used to assess the extent of discrimination (difference) between classes of a discrete response variable. This includes both multinomial (number of classes > 2) and binary (number of classes = 2) comparisons.

-

In multinomial situations, the suitability of a multinomial comparison versus multiple binary comparisons can depend on the experimental context. For instance, in a treatment/control experiment that includes multiple time points, a multinomial comparison using all available classes could be useful to visualise the general structure of the data. However, it could make any extracted explanatory features difficult to reason about as to how they relate to the individual experimental time point or treatment conditions. An investigator could instead identify the binary comparisons relevant to the biological question and focus the further classification comparisons to better select for explanatory features.

-
-

-Multinomial comparisons

-

In experiments with more than two classes, multinomial random forest classification can be used to assess the discrimination between the classes and give an overview of the relative structure between classes.

-

The example data set consists of a total of 6 classes for the day response variable.

-
-d %>% 
-  clsExtract(cls = 'day') %>% 
-  unique()
-#> [1] 2 3 4 1 H 5
-#> Levels: 1 2 3 4 5 H
-

Multinomial classification can be performed by:

-
-multinomial_rf <- d %>%
-  randomForest(cls = 'day')
-
-print(multinomial_rf)
-#> 
-#> Random forest classification 
-#> 
-#> Samples:  120 
-#> Features:     500 
-#> Response:     day 
-#> # comparisons:    1
-

The performance of this model can be assessed using metrics based on the success of the out of bag (OOB) predictions. The performance metrics can be extracted using:

-
-multinomial_rf %>%
-  metrics()
-#> # A tibble: 4 × 5
-#>   Response Comparison  .metric  .estimator .estimate
-#>   <chr>    <chr>       <chr>    <chr>          <dbl>
-#> 1 day      1~2~3~4~5~H accuracy multiclass     0.8  
-#> 2 day      1~2~3~4~5~H kap      multiclass     0.76 
-#> 3 day      1~2~3~4~5~H roc_auc  hand_till      0.964
-#> 4 day      1~2~3~4~5~H margin   <NA>           0.146
-

These metrics include accuracy, Cohen’s kappa (kap), area under the receiver operator characteristic curve (roc_auc, ROC-AUC) and margin. Each metric has both strengths and weaknesses that depend on the context of the classification such as the balance of observations between the classes. As shown below, the class frequencies for this example are balanced with 20 observations per class.

-
-d %>% 
-  clsExtract(cls = 'day') %>% 
-  table()
-#> .
-#>  1  2  3  4  5  H 
-#> 20 20 20 20 20 20
-

In this context, each of these metrics could be used to assess the predictive performance of the model. The margin metric is the difference between the proportion of votes for the correct class and the maximum proportion of votes for the other classes for a given observation which is then averaged across all the observations. A positive margin value indicates correct classification and values greater than 0.2 can be considered as the models having strong predictive power. The margin also allows the extent of discrimination to be discerned even in very distinct cases above where both the accuracy and ROC-AUC would be registering values of 1.

-

In this example, the values of all the metrics suggest that the model is showing good predictive performance. This can be investigated further by plotting the MDS of observation proximity values.

-
-multinomial_rf %>% 
-  plotMDS(cls = 'day')
-

-

This shows that the model is able to discriminate highly between classes such as 5 and H. It is less able to discriminate more similar classes such as H and 1 or 4 and 5 whose confidence ellipses show a high degree of overlap. This makes sense in the context of this experiment as these are adjacent time points that are more likely to be similar than time points at each end of the experiment.

-

The ROC curves can also be plotted as shown below.

-
-multinomial_rf %>% 
-  plotROC()
-

-

Classes with their line further from the central dashed line are those that were predicted with the greatest reliability by the model. This plot shows that both the H and 1 classes were least reliably predicted which is a result of their close proximity shown in the MDS plot previously.

-

Importance metrics can be used to identify the metabolome features that contribute most to the class discrimination in the model. The available importance metrics for this model are shown below.

-
-importanceMetrics(multinomial_rf)
-#>  [1] "1"                    "2"                    "3"                   
-#>  [4] "4"                    "5"                    "FalsePositiveRate"   
-#>  [7] "H"                    "MeanDecreaseAccuracy" "MeanDecreaseGini"    
-#> [10] "SelectionFrequency"
-

Here, we will use the false positive rate metric with a threshold of below 0.05 to identify explanatory features for the day response variable.

-
-multinomial_rf %>%
-  explanatoryFeatures(metric = 'FalsePositiveRate',
-                      threshold = 0.05)
-#> # A tibble: 121 × 5
-#>    Response Comparison  Feature Metric               Value
-#>    <chr>    <chr>       <chr>   <chr>                <dbl>
-#>  1 day      1~2~3~4~5~H N341    FalsePositiveRate 1.02e-93
-#>  2 day      1~2~3~4~5~H N133    FalsePositiveRate 7.38e-68
-#>  3 day      1~2~3~4~5~H N163    FalsePositiveRate 3.59e-61
-#>  4 day      1~2~3~4~5~H N439    FalsePositiveRate 1.07e-54
-#>  5 day      1~2~3~4~5~H N342    FalsePositiveRate 3.19e-49
-#>  6 day      1~2~3~4~5~H N377    FalsePositiveRate 3.19e-49
-#>  7 day      1~2~3~4~5~H N171    FalsePositiveRate 6.26e-44
-#>  8 day      1~2~3~4~5~H N497    FalsePositiveRate 6.11e-30
-#>  9 day      1~2~3~4~5~H N146    FalsePositiveRate 2.74e-29
-#> 10 day      1~2~3~4~5~H N195    FalsePositiveRate 7.16e-25
-#> # … with 111 more rows
-

As shown above there were a total of 121 explanatory features identified.

-

Within a multinomial experiment, it is also possible to specify the exact class comparisons to include, where it might not be suitable to compare all the classes at once using the comparisons argument. This should be specified as a named list, the corresponding to the cls argument. Each named element should then consist of a vector of comparisons, the classes to compare separated using the ~.

-

The following specifies two comparisons (H~1~2,H~1~5) for the day response variable and displays the performance metrics.

-
-d %>%
-  randomForest(cls = 'day',
-               comparisons = list(day = c('H~1~2',
-                                          'H~1~5'))) %>%
-  metrics()
-#> # A tibble: 8 × 5
-#>   Response Comparison .metric  .estimator .estimate
-#>   <chr>    <chr>      <chr>    <chr>          <dbl>
-#> 1 day      H~1~2      accuracy multiclass     0.833
-#> 2 day      H~1~2      kap      multiclass     0.75 
-#> 3 day      H~1~5      accuracy multiclass     0.75 
-#> 4 day      H~1~5      kap      multiclass     0.625
-#> 5 day      H~1~2      roc_auc  hand_till      0.906
-#> 6 day      H~1~5      roc_auc  hand_till      0.909
-#> 7 day      H~1~2      margin   <NA>           0.172
-#> 8 day      H~1~5      margin   <NA>           0.320
-

The MDS and ROC curve plots can also be plotted simultaneously for the two comparisons.

-
-d %>%
-  randomForest(cls = 'day',
-               comparisons = list(day = c('H~1~2',
-                                          'H~1~5'))) %>%
-  {plotMDS(.,cls = 'day') +
-      plotROC(.) +
-      patchwork::plot_layout(ncol = 1)}
-

-

Similarly, it is also possible to model multiple response factors with a single random forest call by specifying a vector of response class information column names to the cls argument. In the following, both the name and day response factors will be analysed and the performance metrics returned in a single table.

-
-d %>%
-  randomForest(cls = c('name','day')) %>%
-  metrics()
-#> Warning: Classes with < 5 replicates removed: "11_3", "11_4", "11_5", "11_6",
-#> "11_H", "12_1", "12_3", "12_6", "12_H", "13_1", "13_2", "13_3", "13_5", "13_6",
-#> "13_H", "14_2", "14_3", "14_5", "14_6", "14_H", "15_1", "15_2", "15_4", "15_5",
-#> "15_6", "15_H"
-#> Unbalanced classes detected. Stratifying sample size to the smallest class size.
-#> # A tibble: 8 × 5
-#>   Response Comparison                    .metric  .estimator .estimate
-#>   <chr>    <chr>                         <chr>    <chr>          <dbl>
-#> 1 name     11_2~12_2~12_4~13_4~14_4~15_3 accuracy multiclass    0.35  
-#> 2 name     11_2~12_2~12_4~13_4~14_4~15_3 kap      multiclass    0.212 
-#> 3 name     11_2~12_2~12_4~13_4~14_4~15_3 roc_auc  hand_till     0.753 
-#> 4 name     11_2~12_2~12_4~13_4~14_4~15_3 margin   <NA>         -0.0485
-#> 5 day      1~2~3~4~5~H                   accuracy multiclass    0.8   
-#> 6 day      1~2~3~4~5~H                   kap      multiclass    0.76  
-#> 7 day      1~2~3~4~5~H                   roc_auc  hand_till     0.964 
-#> 8 day      1~2~3~4~5~H                   margin   <NA>          0.146
-

The MDS plots can also be returned for both models simultaneously.

-
-d %>%
-  randomForest(cls = c('name','day')) %>%
-  plotMDS()
-#> Warning: Classes with < 5 replicates removed: "11_3", "11_4", "11_5", "11_6",
-#> "11_H", "12_1", "12_3", "12_6", "12_H", "13_1", "13_2", "13_3", "13_5", "13_6",
-#> "13_H", "14_2", "14_3", "14_5", "14_6", "14_H", "15_1", "15_2", "15_4", "15_5",
-#> "15_6", "15_H"
-#> Unbalanced classes detected. Stratifying sample size to the smallest class size.
-

-
-
-

-Binary comparisons

-

It may in some cases be preferable to analyse class comparisons as multiple binary comparisons.

-

The possible binary comparisons for a given response variable can be displayed using the binaryComparisons() method. Below shows the 15 comparisons for the day response variable.

-
-binaryComparisons(d,cls = 'day')
-#>  [1] "1~2" "1~3" "1~4" "1~5" "1~H" "2~3" "2~4" "2~5" "2~H" "3~4" "3~5" "3~H"
-#> [13] "4~5" "4~H" "5~H"
-

For this example we will only use the binary comparisons containing the H class.

-
-binary_comparisons <- binaryComparisons(d,cls = 'day') %>% 
-  .[stringr::str_detect(.,'H')]
-

The binary comparisons can then be performed using the following.

-
-binary_rf <- d %>%
-  randomForest(cls = 'day',
-               comparisons = list(day = binary_comparisons))
-
-print(binary_rf)
-#> 
-#> Random forest classification 
-#> 
-#> Samples:  120 
-#> Features:     500 
-#> Response:     day 
-#> # comparisons:    5
-

To run all possible binary comparisons, the binary = TRUE argument could instead be used.

-

The MDS plots for each comparison can be visualised to inspect the comparisons.

-
-binary_rf %>% 
-  plotMDS(cls = 'day')
-

-

These plots show good separation in all the comparisons except H~1 which is also shown by the plot of the performance metrics below. Each of the comparisons are showing perfect performance for the accuracy, Cohen’s kappa and ROC-AUC metrics as well as very high margin values except for the H~1 comparison.

-
-binary_rf %>% 
-  plotMetrics()
-

-

The explanatory features for these comparisons can be extracted as below using the false positive rate metric and a cut-off threshold of 0.05. This gives a total of 251 explanatory features.

-
-binary_rf %>% 
-  explanatoryFeatures(metric = 'FalsePositiveRate',
-                      threshold = 0.05)
-#> # A tibble: 251 × 5
-#>    Response Comparison Feature Metric               Value
-#>    <chr>    <chr>      <chr>   <chr>                <dbl>
-#>  1 day      2~H        N341    FalsePositiveRate 7.34e-52
-#>  2 day      2~H        N439    FalsePositiveRate 1.80e-45
-#>  3 day      3~H        N342    FalsePositiveRate 2.71e-39
-#>  4 day      2~H        N327    FalsePositiveRate 1.06e-35
-#>  5 day      3~H        N439    FalsePositiveRate 1.06e-35
-#>  6 day      2~H        N477    FalsePositiveRate 1.60e-34
-#>  7 day      3~H        N377    FalsePositiveRate 1.60e-34
-#>  8 day      4~H        N477    FalsePositiveRate 7.40e-34
-#>  9 day      2~H        N447    FalsePositiveRate 6.48e-30
-#> 10 day      3~H        N163    FalsePositiveRate 6.48e-30
-#> # … with 241 more rows
-

A heatmap of these explanatory features can be plotted to show their mean relative intensities across the experiment time points. Here, the classes are also refactored to customise the order of the classes on the x-axis.

-
-refactor_cls <- clsExtract(binary_rf,
-                           cls = 'day') %>% 
-  factor(.,levels = c('H','1','2','3','4','5'))
-
-binary_rf <- clsReplace(binary_rf,
-                        value = refactor_cls,
-                        cls = 'day')
-binary_rf %>% 
-  plotExplanatoryHeatmap(metric = 'FalsePositiveRate',
-                      threshold = 0.05,
-                      featureNames = TRUE)
-

-
-
-
-

-Regression

-

Random forest regression can be used to assess the extent of association of the metabolomic data with continuous response variables.

-

In this example, the extent of association of injection order with the example data will be assessed.

-
-regression_rf <- d %>% 
-  randomForest(cls = 'injorder')
-
-print(regression_rf)
-#> 
-#> Random forest regression 
-#> 
-#> Samples:  120 
-#> Features:     500 
-#> Response:     injorder
-

The regression model performance metrics, based on the OOB prediction error, can be extracted using the following:

-
-regression_rf %>% 
-  metrics()
-#> # A tibble: 5 × 4
-#>   Response .metric .estimator .estimate
-#>   <chr>    <chr>   <chr>          <dbl>
-#> 1 injorder rsq     standard       0.476
-#> 2 injorder mae     standard      23.5  
-#> 3 injorder mape    standard     154.   
-#> 4 injorder rmse    standard      26.5  
-#> 5 injorder ccc     standard       0.508
-

These regression metrics include R2 (rsq), mean absolute error (mae), mean absolute percentage error (mape), root mean squared error (rmse) and the concordance correlation coefficient (ccc).

-

The R2 and concordance correlation coefficient metrics suggest that there is some association of features with the injection order, although this is weak. This is in agreement with mean absolute error metric that shows that on average, the injection order could only be predicted to an accuracy of 23 injection order positions.

-

The MDS plot belows the relative proximities of the samples based on this injection order regression model. This shows that for the most part, there is little correspondence of the sample positions with their injection order. However, there is a small grouping of samples towards the end of the run around sample ~99 to 120. It suggests that there could have been some analytical issues, for certain features, towards the end of the mass spectral analytical run.

-
-regression_rf %>% 
-  plotMDS(cls = NULL,
-          ellipses = FALSE,
-          label = 'injorder',
-          labelSize = 3)
-#> Warning: ggrepel: 40 unlabeled data points (too many overlaps). Consider
-#> increasing max.overlaps
-

-

The available feature importance metrics for this regression model can be listed.

-
-regression_rf %>% 
-  importanceMetrics()
-#> [1] "%IncMSE"       "IncNodePurity"
-

The feature importance metrics can be plotted to give an overview of their distribution. The following will plot the percentage increase in the mean squared error (%IncMSE) importance metric.

-
-regression_rf %>% 
-  plotImportance(metric = "%IncMSE", 
-                 rank = FALSE)
-

-

This shows that there are only a few features that are contributing to the association with injection order. These explanatory features can be extracted with the following, using a threshold of above 5.

-
-regression_rf %>% 
-  explanatoryFeatures(metric = '%IncMSE',
-                      threshold = 5)
-#> # A tibble: 7 × 4
-#>   Response Feature Metric  Value
-#>   <chr>    <chr>   <chr>   <dbl>
-#> 1 injorder N283    %IncMSE 19.9 
-#> 2 injorder N135    %IncMSE  8.71
-#> 3 injorder N451    %IncMSE  5.58
-#> 4 injorder N161    %IncMSE  5.51
-#> 5 injorder N306    %IncMSE  5.49
-#> 6 injorder N118    %IncMSE  5.22
-#> 7 injorder N297    %IncMSE  5.07
-

This returned a total of 7 explanatory features above this threshold. The top ranked feature N283 can be plotted to investigate it’s trend in relation to injection order.

-
-regression_rf %>% 
-  plotFeature(feature = 'N283',
-              cls = 'injorder')
-

-

This shows an increase in the intensity of that feature for samples above 100 in the injection order which corresponds with the cluster that was seen in the MDS plot above.

-
-
-
-

-Univariate analyses

-

Univariate methods select features, explanatory for response variables, with features tested on an individual basis. These methods offer simplicity and easy interpretation in their use, however they provide no information as to how features may interact.

-

The univariate methods currently available in metabolyseR include Welch’s t-test, analysis of variance (ANOVA) and linear regression. The following sections will provide brief examples of the use of each of these methods.

-
-

-Welch’s t-test

-

Welch’s t-test can be used to select explanatory metabolome features for binary comparisons of discrete variables. By default, all the possible binary comparisons for the categories of a response variable will be tested.

-

Below shows the possible binary comparisons for the day response variable for the example data set.

-
-binaryComparisons(d,
-                  cls = 'day')
-#>  [1] "1~2" "1~3" "1~4" "1~5" "1~H" "2~3" "2~4" "2~5" "2~H" "3~4" "3~5" "3~H"
-#> [13] "4~5" "4~H" "5~H"
-

For the following example, only a subset of comparisons will be tested. These will be selected by supplying a list to the comparisons argument.

-
-ttest_analysis <- ttest(d,
-                        cls = 'day',
-                        comparisons = list(day = c('H~1',
-                                                   'H~2',
-                                                   'H~5')))
-
-print(ttest_analysis)
-#> 
-#> Univariate t-test analysis
-#> 
-#> Samples:  120 
-#> Features:     500 
-#> Responses:    day 
-#> # comparisons:    3
-

The explanatory features that show a significant difference between the response categories can be extracted as shown below.

-
-explanatoryFeatures(ttest_analysis,
-                    threshold = 0.05)
-#> # A tibble: 73 × 14
-#>    Response Comparison Feature estimate estimate1 estimate2 statistic  p.value
-#>    <chr>    <chr>      <chr>      <dbl>     <dbl>     <dbl>     <dbl>    <dbl>
-#>  1 day      H~5        N163      -735.       19.5   755.       -13.8  1.43e-11
-#>  2 day      H~5        N341      2445.     2537.     92.6       13.6  2.88e-11
-#>  3 day      H~5        N133      1055.     1077.     21.9       13.0  5.44e-11
-#>  4 day      H~2        N341       200.      293.     92.6       10.6  1.38e-10
-#>  5 day      H~5        N171        62.6      64.7     2.15      11.9  2.62e-10
-#>  6 day      H~5        N119        17.2      17.9     0.763     11.0  8.54e-10
-#>  7 day      H~5        N342       243.      247.      4.13      10.8  1.42e- 9
-#>  8 day      H~5        N343        27.4      28.3     0.961      9.83 5.99e- 9
-#>  9 day      H~5        N377       152.      157.      5.05       9.81 6.75e- 9
-#> 10 day      H~5        N477       103.      129.     26.1        9.30 1.05e- 8
-#> # … with 63 more rows, and 6 more variables: parameter <dbl>, conf.low <dbl>,
-#> #   conf.high <dbl>, method <chr>, alternative <chr>, adjusted.p.value <dbl>
-

This will threshold the features based on their adjusted p-value, found in the adjusted.p.value column of the table. The results of all of the features can be returned using the importance() method.

-

A heat map of the explanatory features can be plotted to inspect the relative trends of the explanatory features in relation to the response variable.

-
-plotExplanatoryHeatmap(ttest_analysis)
-

-
-
-

-ANOVA

-

ANOVA can be used to select explanatory features for discrete response variables with 3 or more categories. The following example will compare all the categories in the day response variable. However, the comparisons argument can be used to select particular comparisons of interest.

-
-anova_analysis <- anova(d,
-                        cls = 'day')
-
-print(anova_analysis)
-#> 
-#> Univariate ANOVA analysis
-#> 
-#> Samples:  120 
-#> Features:     500 
-#> Responses:    day 
-#> # comparisons:    1
-

The explanatory features that are significantly different between the categories can then be extracted.

-
-explanatoryFeatures(anova_analysis,
-                    threshold = 0.05)
-#> # A tibble: 110 × 10
-#>    Response Comparison  Feature term        df   sumsq meansq statistic  p.value
-#>    <chr>    <chr>       <chr>   <chr>    <dbl>   <dbl>  <dbl>     <dbl>    <dbl>
-#>  1 day      1~2~3~4~5~H N341    response     5  1.09e8 2.17e7     124.  1.90e-44
-#>  2 day      1~2~3~4~5~H N163    response     5  1.25e7 2.51e6     113.  1.71e-42
-#>  3 day      1~2~3~4~5~H N133    response     5  1.96e7 3.92e6     108.  1.71e-41
-#>  4 day      1~2~3~4~5~H N171    response     5  6.29e4 1.26e4      88.8 1.16e-37
-#>  5 day      1~2~3~4~5~H N342    response     5  1.04e6 2.07e5      85.1 7.61e-37
-#>  6 day      1~2~3~4~5~H N343    response     5  1.19e4 2.38e3      66.1 4.43e-32
-#>  7 day      1~2~3~4~5~H N119    response     5  4.92e3 9.83e2      53.8 2.07e-28
-#>  8 day      1~2~3~4~5~H N497    response     5  1.10e5 2.20e4      49.6 4.83e-27
-#>  9 day      1~2~3~4~5~H N137    response     5  6.32e3 1.26e3      39.9 1.59e-23
-#> 10 day      1~2~3~4~5~H N277    response     5  6.31e4 1.26e4      39.1 3.14e-23
-#> # … with 100 more rows, and 1 more variable: adjusted.p.value <dbl>
-

The top ranked explanatory feature N341 can be plotted to inspect it’s trend relative to the day response variable.

-
-plotFeature(anova_analysis,
-            feature = 'N341',
-            cls = 'day')
-

-
-
-

-Linear regression

-

Univariate linear regression can be used to associate a continuous response variable with metabolome features. In the example below, the example data will be regressed against injection order to identify any linearly associated metabolome features.

-
-lr_analysis <- linearRegression(d,
-                                cls = 'injorder')
-
-print(lr_analysis)
-#> 
-#> Univariate linear regression analysis
-#> 
-#> Samples:  120 
-#> Features:     500 
-#> Responses:    injorder
-

The explanatory features can then be extracted.

-
-explanatoryFeatures(lr_analysis)
-#> # A tibble: 8 × 15
-#>   Response Feature r.squared adj.r.squared sigma statistic  p.value    df logLik
-#>   <chr>    <chr>       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl>
-#> 1 injorder N283        0.310         0.304  4.27      53.0 4.10e-11     1  -343.
-#> 2 injorder N135        0.165         0.157 78.7       23.2 4.31e- 6     1  -693.
-#> 3 injorder N221        0.140         0.133  5.87      19.3 2.50e- 5     1  -382.
-#> 4 injorder N473        0.135         0.127  7.24      18.3 3.78e- 5     1  -407.
-#> 5 injorder N335        0.132         0.124 20.1       17.9 4.59e- 5     1  -529.
-#> 6 injorder N452        0.120         0.112  4.00      16.0 1.10e- 4     1  -335.
-#> 7 injorder N255        0.119         0.111 11.1       15.9 1.17e- 4     1  -458.
-#> 8 injorder N267        0.118         0.111 26.4       15.8 1.22e- 4     1  -562.
-#> # … with 6 more variables: AIC <dbl>, BIC <dbl>, deviance <dbl>,
-#> #   df.residual <int>, nobs <int>, adjusted.p.value <dbl>
-

The top ranked explanatory feature N283 can be plotted to inspect inspects it’s association with injection order.

-
-plotFeature(lr_analysis,
-            feature = 'N283',
-            cls = 'injorder')
-

-
-
-
-

-Routine analyses

-

For routine analyses, the initial analysis parameters for pre-treatment of the data and then the modelling can be selected.

-
-p <- analysisParameters(c('pre-treatment','modelling'))
-

More specific parameters for pre-treatment of the example data can be declared using the following.

-
-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
-    keep = 'classes',
-    occupancyFilter = 'maximum',
-    transform = 'TICnorm' 
-  )
-)
-

The modellingMethods() function can be used to list the modelling methods that are currently available in metabolyseR.

-
-modellingMethods()
-#> [1] "anova"            "ttest"            "linearRegression" "randomForest"
-

The modellingParameters() function can be used to retrieve the default parameters for specific modelling methods. Below, the default modelling parameters for the randomForest and ttest methods are specified.

-
-parameters(p,'modelling') <- modellingParameters(c('randomForest','ttest'))
-

The class parameters can the be universily specified for both the pre-treatment and modelling elements. For this example, the day response variable will be used with just the H and 2 classes.

-
-changeParameter(p,'cls') <- 'day'
-changeParameter(p,'classes') <- c('H','2')
-

This gives the following parameters for the analysis.

-
-p
-#> Parameters:
-#> pre-treatment
-#>  keep
-#>      classes
-#>          cls = day
-#>          classes = c("H", "2")
-#>  occupancyFilter
-#>      maximum
-#>          cls = day
-#>          occupancy = 2/3
-#>  transform
-#>      TICnorm
-#> 
-#> modelling
-#>  randomForest
-#>      cls = day
-#>      rf = list()
-#>      reps = 1
-#>      binary = FALSE
-#>      comparisons = list()
-#>      perm = 0
-#>      returnModels = FALSE
-#>      seed = 1234
-#>  ttest
-#>      cls = day
-#>      pAdjust = bonferroni
-#>      comparisons = list()
-#>      returnModels = FALSE
-

The analysis can then be executed.

-
analysis <- metabolyse(abr1$neg,abr1$fact,p)
-#> 
-#> metabolyseR  v0.14.3 Tue Sep 14 10:13:39 2021
-#> ________________________________________________________________________________
-#> Parameters:
-#> pre-treatment
-#>  keep
-#>      classes
-#>          cls = day
-#>          classes = c("H", "2")
-#>  occupancyFilter
-#>      maximum
-#>          cls = day
-#>          occupancy = 2/3
-#>  transform
-#>      TICnorm
-#> 
-#> modelling
-#>  randomForest
-#>      cls = day
-#>      rf = list()
-#>      reps = 1
-#>      binary = FALSE
-#>      comparisons = list()
-#>      perm = 0
-#>      returnModels = FALSE
-#>      seed = 1234
-#>  ttest
-#>      cls = day
-#>      pAdjust = bonferroni
-#>      comparisons = list()
-#>      returnModels = FALSE
-#> ________________________________________________________________________________
-#> Pre-treatment …
-
-Pre-treatment   ✓ [6.4S]
-#> Modelling …
-
-Modelling   ✓ [4.2S]
-#> ________________________________________________________________________________
-#> 
-#> Complete! [10.6S]
-

The results for the modelling can be specifically extracted using the following.

-
-analysisResults(analysis,'modelling')
-#> $randomForest
-#> 
-#> Random forest classification 
-#> 
-#> Samples:  40 
-#> Features:     1713 
-#> Response:     day 
-#> # comparisons:    1 
-#> 
-#> 
-#> $ttest
-#> 
-#> Univariate t-test analysis
-#> 
-#> Samples:  40 
-#> Features:     1713 
-#> Responses:    day 
-#> # comparisons:    1
-

This returns the results as a list containing the modelling results objects for each specified method.

-

Alternatively, the modelling results can be assess directly from the Analysis object. Below shows the extraction of the explanatory features, using default parameters for each method, with the results returned in a single table.

-
-explanatory_features <- analysis %>% 
-  explanatoryFeatures()
-
-print(explanatory_features)
-#> # A tibble: 100 × 17
-#>    Method       Response Comparison Feature Metric      Value estimate estimate1
-#>    <chr>        <chr>    <chr>      <chr>   <chr>       <dbl>    <dbl>     <dbl>
-#>  1 randomForest day      2~H        N341    FalsePo… 8.06e-28       NA        NA
-#>  2 randomForest day      2~H        N377    FalsePo… 5.70e-18       NA        NA
-#>  3 randomForest day      2~H        N447    FalsePo… 5.70e-18       NA        NA
-#>  4 randomForest day      2~H        N579    FalsePo… 5.70e-18       NA        NA
-#>  5 randomForest day      2~H        N1084   FalsePo… 1.19e-16       NA        NA
-#>  6 randomForest day      2~H        N327    FalsePo… 2.33e-15       NA        NA
-#>  7 randomForest day      2~H        N580    FalsePo… 4.32e-14       NA        NA
-#>  8 randomForest day      2~H        N1083   FalsePo… 7.49e-13       NA        NA
-#>  9 randomForest day      2~H        N1085   FalsePo… 7.49e-13       NA        NA
-#> 10 randomForest day      2~H        N503    FalsePo… 7.49e-13       NA        NA
-#> # … with 90 more rows, and 9 more variables: estimate2 <dbl>, statistic <dbl>,
-#> #   p.value <dbl>, parameter <dbl>, conf.low <dbl>, conf.high <dbl>,
-#> #   method <chr>, alternative <chr>, adjusted.p.value <dbl>
-

Heat maps of the explanatory features can also be plotted for both the modelling methods.

-
-plotExplanatoryHeatmap(analysis) %>% 
-  patchwork::wrap_plots()
-

-
-
- - - -
- - - - -
- - - - - - diff --git a/docs/articles/04_modelling_files/figure-html/anova-feature-1.png b/docs/articles/04_modelling_files/figure-html/anova-feature-1.png deleted file mode 100644 index 51a11b87..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/anova-feature-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/binary-heatmap-1.png b/docs/articles/04_modelling_files/figure-html/binary-heatmap-1.png deleted file mode 100644 index 3f80db1c..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/binary-heatmap-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/binary-mds-1.png b/docs/articles/04_modelling_files/figure-html/binary-mds-1.png deleted file mode 100644 index 00965f0c..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/binary-mds-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/binary-metrics-1.png b/docs/articles/04_modelling_files/figure-html/binary-metrics-1.png deleted file mode 100644 index f6993546..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/binary-metrics-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/classification-comparison-mds-roc-1.png b/docs/articles/04_modelling_files/figure-html/classification-comparison-mds-roc-1.png deleted file mode 100644 index 30d67fc6..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/classification-comparison-mds-roc-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/explanatory-heatmap-1.png b/docs/articles/04_modelling_files/figure-html/explanatory-heatmap-1.png deleted file mode 100644 index 45f38407..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/explanatory-heatmap-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/linear-regression-feature-1.png b/docs/articles/04_modelling_files/figure-html/linear-regression-feature-1.png deleted file mode 100644 index ea3497b1..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/linear-regression-feature-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/multinomial-mds-1.png b/docs/articles/04_modelling_files/figure-html/multinomial-mds-1.png deleted file mode 100644 index 61c3e786..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/multinomial-mds-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/multinomial-multiple-mds-1.png b/docs/articles/04_modelling_files/figure-html/multinomial-multiple-mds-1.png deleted file mode 100644 index b9582c81..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/multinomial-multiple-mds-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/multinomial-roc-1.png b/docs/articles/04_modelling_files/figure-html/multinomial-roc-1.png deleted file mode 100644 index 4ab456fd..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/multinomial-roc-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/outlier-detect-1.png b/docs/articles/04_modelling_files/figure-html/outlier-detect-1.png deleted file mode 100644 index 9ac7108d..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/outlier-detect-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/regression-feature-1.png b/docs/articles/04_modelling_files/figure-html/regression-feature-1.png deleted file mode 100644 index ea3497b1..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/regression-feature-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/regression-importance-plot-1.png b/docs/articles/04_modelling_files/figure-html/regression-importance-plot-1.png deleted file mode 100644 index 3165d24a..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/regression-importance-plot-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/regression-mds-1.png b/docs/articles/04_modelling_files/figure-html/regression-mds-1.png deleted file mode 100644 index e9135d8a..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/regression-mds-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/t-test-heatmap-1.png b/docs/articles/04_modelling_files/figure-html/t-test-heatmap-1.png deleted file mode 100644 index fa203e82..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/t-test-heatmap-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/unsupervised-feature-1.png b/docs/articles/04_modelling_files/figure-html/unsupervised-feature-1.png deleted file mode 100644 index f7225ce7..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/unsupervised-feature-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/figure-html/unsupervised-rf-1.png b/docs/articles/04_modelling_files/figure-html/unsupervised-rf-1.png deleted file mode 100644 index c6e4fa39..00000000 Binary files a/docs/articles/04_modelling_files/figure-html/unsupervised-rf-1.png and /dev/null differ diff --git a/docs/articles/04_modelling_files/header-attrs-2.10/header-attrs.js b/docs/articles/04_modelling_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/04_modelling_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/index.html b/docs/articles/index.html deleted file mode 100644 index 6c4c2dcc..00000000 --- a/docs/articles/index.html +++ /dev/null @@ -1,98 +0,0 @@ - -Articles • metabolyseR - - -
-
- - - -
- -
- - -
- - - - - - - - diff --git a/docs/articles/introduction.html b/docs/articles/introduction.html deleted file mode 100644 index 55de758f..00000000 --- a/docs/articles/introduction.html +++ /dev/null @@ -1,850 +0,0 @@ - - - - - - - -Introduction • metabolyseR - - - - - - - - - - -
-
- - - - -
-
- - - - -
-

-Introduction

-

The metabolyseR package provides a suite of methods that encompass three elements of metabolomics data analysis:

-
    -
  • data pre-treatment
  • -
  • modelling / data mining
  • -
  • correlation analyses
  • -
-

The package also distinguishes between the flexibility and simplicity required for exploratory analyses compared to the convenience needed for more complex routine analyses. This is reflected in the underlying S4 object-oriented implementations and associated methods defined within the package. It should be noted that it is useful to understand the principles involved in using metabolyseR for exploratory analyses to aid in extracting and wrangling the results generated from routine analyses.

-

The following document will provide an introduction to the basic usage of the package and includes how to create and use the base classes that are the foundation of metabolyseR. This will be focused around the applications for both exploratory and routine analyses. For more detailed information on the individual analysis elements see their associated vignette using:

-
-browseVignettes('metabolyseR')
-

There is also an example quick start analysis vignette provided.

-
-vignette('quick_start','metabolyseR')
-

Any issues, bugs or errors encountered while using the package should be reported here.

-

The examples shown here will use the abr1 data set from the metaboData package (?metaboData::abr1). This is a nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data set from a plant-pathogen infection time course experiment. The examples will also include use of the pipe %>% from the magrittr package.

-

Firstly load the necessary packages:

- -
-
-

-Parallel processing

-

The package supports parallel processing using the future package.

-

By default, processing by metabolyseR will be done sequentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel back-end using plan(). The following example specifies using the multisession implementation (multiple background R sessions) with two worker processes.

-
-plan(future::multisession,workers = 2)
-

See the future package documentation for more information on the types of parallel implementations that are available.

-
-
-

-Exploratory analyses

-

For exploratory analyses, simple questions of the data need to be answered quickly, requiring few steps. Key requirements for any tool used by investigators are that it should be both simple and flexible.

-

In metabolyseR, the AnalysisData class is the base S4 class that provides these requirements. The following sections will give an overview of the basics in constructing and using these objects as the base for analysis.

-
-

-Analysis data

-

We can firstly construct an AnalysisData object which requires two data tables. The first is the metabolomic data where the columns are the metabolome features, the rows the sample observations and contains the abundance values. The second is the sample meta-information where the row order should match to that of the metabolome data table. Using the example data, his can be constructed and assigned to the variable d by:

-
-d <- analysisData(data = abr1$neg,
-                  info = abr1$fact)
-

Where abr1$neg is the negative ionisation mode data and abr1$fact is the corresponding sample information. By printing d we can view some basic information about our data.

-
-print(d)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 2000 
-## Info: 9
-

We can also return the numbers of samples and numbers of features respectively using the following:

- -
## [1] 120
- -
## [1] 2000
-

The data table can be extracted using the dat method:

-
-dat(d)
-
## # A tibble: 120 × 2,000
-##       N1    N2    N3    N4    N5    N6    N7    N8    N9   N10   N11   N12   N13
-##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
-##  1     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  2     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  3     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  4     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  5     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  6     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  7     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  8     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  9     0     0     0     0     0     0     0     0     0     0     0     0     0
-## 10     0     0     0     0     0     0     0     0     0     0     0     0     0
-## # … with 110 more rows, and 1,987 more variables: N14 <dbl>, N15 <dbl>,
-## #   N16 <dbl>, N17 <dbl>, N18 <dbl>, N19 <dbl>, N20 <dbl>, N21 <dbl>,
-## #   N22 <dbl>, N23 <dbl>, N24 <dbl>, N25 <dbl>, N26 <dbl>, N27 <dbl>,
-## #   N28 <dbl>, N29 <dbl>, N30 <dbl>, N31 <dbl>, N32 <dbl>, N33 <dbl>,
-## #   N34 <dbl>, N35 <dbl>, N36 <dbl>, N37 <dbl>, N38 <dbl>, N39 <dbl>,
-## #   N40 <dbl>, N41 <dbl>, N42 <dbl>, N43 <dbl>, N44 <dbl>, N45 <dbl>,
-## #   N46 <dbl>, N47 <dbl>, N48 <dbl>, N49 <dbl>, N50 <dbl>, N51 <dbl>, …
-

Or alternatively, can be used to assign a new data table:

-
-dat(d) <- abr1$pos
-d
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 2000 
-## Info: 9
-

The sample information table can be extracted using the sinfo method:

-
-sinfo(d)
-
## # A tibble: 120 × 9
-##    injorder pathcdf              filecdf name.org remark name    rep day   class
-##       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
-##  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
-##  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
-##  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
-##  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
-##  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
-##  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
-##  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
-##  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
-##  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
-## 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
-## # … with 110 more rows
-

And similarly used to assign a new sample information table:

-
-sinfo(d) <- abr1$fact[,1:2]
-d
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 2000 
-## Info: 2
-
-
-

-Sample information

-

There are a number of methods that provide utility for querying and altering the sample information within an AnalysisData object. These methods are all named with the prefix cls and include:

-
    -
  • clsAdd
  • -
  • clsArrange
  • -
  • clsAvailable
  • -
  • clsExtract
  • -
  • clsRemove
  • -
  • clsRename
  • -
  • clsReplace
  • -
-

The names of the available sample information columns can be shown using clsAvailable().

- -
## [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
-## [8] "day"      "class"
-

A given column can be extracted using clsExtract(). Here, the day column is extracted.

-
-clsExtract(d,cls = 'day')
-
##   [1] 2 3 4 1 2 1 2 4 H H 4 5 1 2 H 5 3 3 2 H 4 3 5 4 H H 3 H H 1 1 1 5 5 3 4 H
-##  [38] 1 5 5 1 2 4 3 2 4 3 2 5 4 4 H 3 4 2 4 4 1 5 4 4 1 1 H 3 2 H 3 3 1 2 H H 2
-##  [75] 3 5 3 2 5 2 4 3 H 2 3 2 1 1 4 5 3 2 1 H 5 2 4 H 1 4 4 1 1 5 H 5 1 3 3 5 5
-## [112] 5 3 2 5 H 5 H 2 1
-## Levels: 1 2 3 4 5 H
-

Sample class frequencies could then be computed.

-
-clsExtract(d,cls = 'day') %>%
-  table()
-
## .
-##  1  2  3  4  5  H 
-## 20 20 20 20 20 20
-

It can be seen that there are 20 samples available in each class.

-

Another example is the addition of a new sample information column. In the following, a column called new_class will be added with all samples labelled 1.

-
-d <- clsAdd(d,cls = 'new_class',value = rep(1,nSamples(d)))
-clsAvailable(d)
-
##  [1] "injorder"  "pathcdf"   "filecdf"   "name.org"  "remark"    "name"     
-##  [7] "rep"       "day"       "class"     "new_class"
-
-
-

-Keeping / removing samples or features

-

Samples or features can easily be kept or removed from an AnalysisData object as is most convenient.

-

Below can be seen the first 6 sample indexes in the injorder column of the sample information.

-
-samples <- d %>%
-  clsExtract(cls = 'injorder') %>%
-  head()
-
-print(samples)
-
## [1] 1 2 3 4 5 6
-

Only these samples could be kept using:

-
-d %>%
-  keepSamples(idx = 'injorder',samples = samples)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 6 
-## Features: 2000 
-## Info: 10
-

Or removed using:

-
-d %>%
-  removeSamples(idx = 'injorder',samples = samples)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 114 
-## Features: 2000 
-## Info: 10
-

The process is very similar for keeping or removing specific metabolome features from the data table. Below can be seen the first 6 feature names in the data table.

-
-feat <- d %>%
-  features() %>%
-  head()
-
-print(feat)
-
## [1] "N1" "N2" "N3" "N4" "N5" "N6"
-

Only these features can be kept using:

-
-d %>%
-  keepFeatures(features = feat)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 6 
-## Info: 10
-

Or to remove these features:

-
-d %>%
-  removeFeatures(features = feat)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 1994 
-## Info: 10
-
-
-
-

-Routine analyses

-

Routine analyses are those that are often made up of numerous steps where parameters have likely already been previously established. The emphasis here is on convenience with as little code as possible required. In these analyses, the necessary analysis elements, order and parameters are first prepared and then the analysis routine subsequently performed in a single step. This section will introduce how this type of analysis can be performed using metabolyseR and will include four main topics:

-
    -
  • analysis parameter selection
  • -
  • performing an analysis
  • -
  • performing a re-analysis
  • -
  • extracting analysis results
  • -
-
-

-Analysis parameters

-

Parameter selection is the fundamental aspect for performing routine analyses using metabolyseR and will be the step requiring the most input from the user. The parameters for an analysis are stored in an S4 object of class AnalysisParameters containing the relevant parameters of the selected analysis elements.

-

The parameters have been named so that they denote the same functionality commonly across all analysis element methods. Discussion of the specific parameters can be found withing the vignettes of the relevant analysis elements. These can be accessed using:

-
-browseVignettes('metabolyseR')
-

There are several ways to specify the parameters to use for analysis. The first is programatically and the second is through the use of the YAML format.

-
-

-Programatic specification

-

The available analysis elements can be shown using:

- -
## [1] "pre-treatment" "modelling"     "correlations"
-

The analysisParameters() function can be used to create an AnalysisParameters object containing the default parameters. For example, the code below will return default parameters for all the metabolyseR analysis elements.

- -
## Parameters:
-## pre-treatment
-##  QC
-##      occupancyFilter
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##      impute
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##          parallel = variables
-##          seed = 1234
-##      RSDfilter
-##          cls = class
-##          QCidx = QC
-##          RSDthresh = 50
-##      removeQC
-##          cls = class
-##          QCidx = QC
-##  occupancyFilter
-##      maximum
-##          cls = class
-##          occupancy = 2/3
-##  impute
-##      class
-##          cls = class
-##          occupancy = 2/3
-##          seed = 1234
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = class
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-## 
-## correlations
-##  method = pearson
-##  pAdjustMethod = bonferroni
-##  corPvalue = 0.05
-

To retrieve parameters for a subset of analysis elements the following can be run, returning parameters for only the pre-treatment and modelling elements.

-
-p <- analysisParameters(c('pre-treatment','modelling'))
-p
-
## Parameters:
-## pre-treatment
-##  QC
-##      occupancyFilter
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##      impute
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##          parallel = variables
-##          seed = 1234
-##      RSDfilter
-##          cls = class
-##          QCidx = QC
-##          RSDthresh = 50
-##      removeQC
-##          cls = class
-##          QCidx = QC
-##  occupancyFilter
-##      maximum
-##          cls = class
-##          occupancy = 2/3
-##  impute
-##      class
-##          cls = class
-##          occupancy = 2/3
-##          seed = 1234
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = class
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-

The changeParameter() function can be used to uniformly change these parameters across all of the selected methods. The example below changes the defaults of all the parameters named cls from the default class to day.

-
-p <- analysisParameters()
-changeParameter(p,'cls') <- 'day'
-p
-
## Parameters:
-## pre-treatment
-##  QC
-##      occupancyFilter
-##          cls = day
-##          QCidx = QC
-##          occupancy = 2/3
-##      impute
-##          cls = day
-##          QCidx = QC
-##          occupancy = 2/3
-##          parallel = variables
-##          seed = 1234
-##      RSDfilter
-##          cls = day
-##          QCidx = QC
-##          RSDthresh = 50
-##      removeQC
-##          cls = day
-##          QCidx = QC
-##  occupancyFilter
-##      maximum
-##          cls = day
-##          occupancy = 2/3
-##  impute
-##      class
-##          cls = day
-##          occupancy = 2/3
-##          seed = 1234
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = day
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-## 
-## correlations
-##  method = pearson
-##  pAdjustMethod = bonferroni
-##  corPvalue = 0.05
-

Alternatively the parameters of a specific analysis elements can be targeted using the elements argument. The following will only alter the cls parameter back to class for the pre-treatment element parameters:

-
-changeParameter(p,'cls',elements = 'pre-treatment') <- 'class'
-

Parameters can be extracted from the AnalysisParameters class using the parameters() function for a specified element.

-
-parameters(p,'correlations')
-
## $method
-## [1] "pearson"
-## 
-## $pAdjustMethod
-## [1] "bonferroni"
-## 
-## $corPvalue
-## [1] 0.05
-

Each analysis element has a function for returning default parameters for specific methods. These include preTreatmentParameters(), modellingParameters() and correlationParameters(). Each returns a list of the default parameters for a specified methods as shown in the example for modellingParameters() below.

- -
## $anova
-## $anova$cls
-## [1] "class"
-## 
-## $anova$pAdjust
-## [1] "bonferroni"
-## 
-## $anova$comparisons
-## list()
-## 
-## $anova$returnModels
-## [1] FALSE
-

Refer to the documentation (?) of each function for sepecific usage details.

-

The parameters returned by these functions can be assigned to an AnalysisParameters object, again using parameters()

-
-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
-    occupancyFilter = 'maximum',
-    transform = 'TICnorm'
-      )
-  )
-
-
-

-YAML specification

-

Due to the relatively complex structure of the parameters needed for analyses containing many components, it is also possible to specify analysis parameters using the YAML file format. YAML parameter files (.yaml) can be parsed using the parseParameters() function. The example below shows the YAML specification for the defaults returned by analysisParameters().

- -

This can be passed directly into an AnalysisParameters object using the following:

-
-paramFile <- system.file('defaultParameters.yaml',package = 'metabolyseR')
-p <- parseParameters(paramFile)
-

For more complex pre-treatment situations such as the following:

- -

Where multiple steps of the same method needed (here is remove), these are numbered sequentially. Where multiple values also need to be provided to a particular argument (e.g. classes = c('H','1')), these should be supplied as a hyphenated list.

-

Existing AnalysisParameters objects can also be exported to YAML format as shown below:

-
-p <- analysisParameters()
-exportParameters(p,file = 'analysis_parameters.yaml')
-
-
-
-

-Performing an analysis

-

The analysis is performed in a single step using the metabolyse() function. This accepts the metabolomic data, the sample information and the analysis parameters.

-

The metabolomic data table of abundance values where the columns are the metabolome features and the rows are each sample observation. Similarly, the sample meta-information table should consist of the observations as rows and the meta information as columns. The order of the observation rows of the sample information table should be concordant with the rows in the metabolomics data table.

-

We can run an example analysis using the abr1 data set by first generating the default parameters for pre-treatment and modelling (random forest) analysis elements.

-
-p <- analysisParameters(c('pre-treatment','modelling'))
-

Custom pre-treatment parameters can then be specified to only inlude occupancy filtering and total ion count normalisation.

-
-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
-  occupancyFilter = 'maximum',
-  transform = 'TICnorm')
-)
-

Next the cls parameters can be changed to use the day sample information column throughout the analysis.

-
-changeParameter(p,'cls') <- 'day'
-

Finally, the analysis can be run in a single step. Here only the fist 200 features of the negative ionisation mode data are specified to reduce the analysis time needed for this example.

-
-analysis <- metabolyse(abr1$neg[,1:200],abr1$fact,p) 
-
## 
-## metabolyseR  v0.14.3 Tue Sep 14 11:36:36 2021
-
## ________________________________________________________________________________
-
## Parameters:
-## pre-treatment
-##  occupancyFilter
-##      maximum
-##          cls = day
-##          occupancy = 2/3
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = day
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-
## ________________________________________________________________________________
-
## Pre-treatment …
-
-Pre-treatment   ✓ [0.8S]
-## Modelling …
-
-Modelling   ✓ [3S]
-## ________________________________________________________________________________
-## 
-## Complete! [3.8S]
-

Note: If a data pre-treatment step is not performed prior to modelling or correlation analysis, the raw data will automatically be used.

-

The analysis object containing the analysis results can be printed to provide some basic information about the results of the analysis.

-
-print(analysis)
-
## 
-## metabolyseR v0.14.3
-## Analysis:
-##  Tue Sep 14 11:36:36 2021
-## 
-##  Raw Data:
-##      No. samples = 120
-##      No. features = 200
-## 
-##  Pre-treated Data:
-##      Tue Sep 14 11:36:37 2021
-##      No. samples = 120
-##      No. features = 48
-## 
-##  Modelling:
-##      Tue Sep 14 11:36:40 2021
-##      Methods: randomForest
-
-
-

-Performing a re-analysis

-

There are likely to be occasions where an analysis will need to be re-analysed using a new set of parameters. This can be achieved using the reAnalyse() function.

-

In the example below we will run a correlation analysis in addition to the pre-treatment and modelling elements already performed.

-

Firstly, we can specify the correlation parameters:

-
-parameters <- analysisParameters('correlations')
-

Then perform the re-analysis on our previously analysed Analysis object, specifying the additional parameters.

-
-analysis <- reAnalyse(analysis,parameters)
-
## 
-## metabolyseR v0.14.3 Tue Sep 14 11:36:40 2021
-## ________________________________________________________________________________
-## Parameters:
-## correlations
-##  method = pearson
-##  pAdjustMethod = bonferroni
-##  corPvalue = 0.05
-## ________________________________________________________________________________
-
## Correlations …
-
-Correlations    ✓ [0.1S]
-
## ________________________________________________________________________________
-## 
-## Complete! [0.1S]
-

An overview of the results of the analysis (now including correlations) can then be printed.

-
-print(analysis)
-
## 
-## metabolyseR v0.14.3
-## Analysis:
-##  Tue Sep 14 11:36:36 2021
-## 
-##  Raw Data:
-##      No. samples = 120
-##      No. features = 200
-## 
-##  Pre-treated Data:
-##      Tue Sep 14 11:36:37 2021
-##      No. samples = 120
-##      No. features = 48
-## 
-##  Modelling:
-##      Tue Sep 14 11:36:40 2021
-##      Methods: randomForest
-## 
-##  Correlations:
-##      Tue Sep 14 11:36:40 2021
-##      No. correlations = 140
-
-
-

-Extracting analysis results

-

An analysis performed by metabolyse() returns an S4 object of class Analysis. There are a number of ways of extracting analysis results from this object.

-

Similarly to the AnalysisData class, the dat() and sinfo() functions can be used to extract the metabolomics data or sample information tables directly for either the raw or pre-treated data.

-

For example, to extract the pre-treated metabolomics data from our object analysis:

-
-dat(analysis,type = 'pre-treated')
-
## # A tibble: 120 × 48
-##       N113    N115    N117    N118    N119    N127    N128    N129  N130    N131
-##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>
-##  1 0.00646 0       1.68e-4 0       1.60e-3 0.0323  2.65e-4 2.80e-4     0 0      
-##  2 0.0113  7.74e-4 1.02e-3 0       1.43e-3 0.00856 0       3.95e-4     0 0      
-##  3 0.00931 6.01e-4 2.70e-3 6.22e-5 5.58e-3 0       0       1.05e-4     0 6.51e-4
-##  4 0.00798 0       0       0       1.62e-4 0.00848 0       4.05e-4     0 1.28e-4
-##  5 0.0105  0       0       0       0       0.00658 0       1.97e-3     0 0      
-##  6 0.00454 0       2.48e-4 3.25e-4 5.31e-4 0.00207 0       1.98e-4     0 0      
-##  7 0.0117  0       1.14e-3 0       4.39e-4 0.00603 0       4.04e-4     0 0      
-##  8 0.00787 2.36e-3 1.43e-3 1.52e-4 4.22e-3 0.00290 2.78e-4 5.76e-5     0 0      
-##  9 0.00136 1.87e-4 8.17e-4 1.87e-4 0       0.0610  1.31e-4 5.23e-4     0 0      
-## 10 0.00899 4.26e-4 2.06e-3 0       8.36e-4 0.00106 7.72e-4 0           0 0      
-## # … with 110 more rows, and 38 more variables: N132 <dbl>, N133 <dbl>,
-## #   N134 <dbl>, N135 <dbl>, N136 <dbl>, N137 <dbl>, N139 <dbl>, N143 <dbl>,
-## #   N145 <dbl>, N146 <dbl>, N147 <dbl>, N149 <dbl>, N153 <dbl>, N155 <dbl>,
-## #   N157 <dbl>, N161 <dbl>, N163 <dbl>, N164 <dbl>, N165 <dbl>, N168 <dbl>,
-## #   N169 <dbl>, N170 <dbl>, N171 <dbl>, N173 <dbl>, N174 <dbl>, N175 <dbl>,
-## #   N179 <dbl>, N180 <dbl>, N181 <dbl>, N183 <dbl>, N187 <dbl>, N191 <dbl>,
-## #   N192 <dbl>, N193 <dbl>, N195 <dbl>, N196 <dbl>, N197 <dbl>, N198 <dbl>
-

Or to extract the raw sample information:

-
-sinfo(analysis,type = 'raw')
-
## # A tibble: 120 × 9
-##    injorder pathcdf              filecdf name.org remark name    rep day   class
-##       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
-##  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
-##  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
-##  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
-##  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
-##  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
-##  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
-##  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
-##  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
-##  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
-## 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
-## # … with 110 more rows
-

Alternatively the raw or preTreated functions can be used to extract the AnalysisData class objects containing both the metabolomics data and sample information for the raw and pre-treated data respectively.

-
-raw(analysis)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 200 
-## Info: 9
-
-preTreated(analysis)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 48 
-## Info: 9
-

Lastly the analysisResults function can be used to extract the results of any of the analysis elements. The following will extract the modelling results:

-
-analysisResults(analysis,element = 'modelling')
-
## $randomForest
-## 
-## Random forest classification 
-## 
-## Samples:  120 
-## Features:     48 
-## Response:     day 
-## # comparisons:    1
-
-
-
- - - -
- - - - -
- - - - - - diff --git a/docs/articles/introduction_files/header-attrs-2.10/header-attrs.js b/docs/articles/introduction_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/introduction_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/introduction_files/header-attrs-2.7/header-attrs.js b/docs/articles/introduction_files/header-attrs-2.7/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/introduction_files/header-attrs-2.7/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/metabolyseR.html b/docs/articles/metabolyseR.html deleted file mode 100644 index e3440bd4..00000000 --- a/docs/articles/metabolyseR.html +++ /dev/null @@ -1,847 +0,0 @@ - - - - - - - -Introduction • metabolyseR - - - - - - - - - - - - -
-
- - - - -
-
- - - - -
-

Introduction -

-

The metabolyseR package provides a suite of methods that encompass three elements of metabolomics data analysis:

-
    -
  • data pre-treatment
  • -
  • modelling / data mining
  • -
  • correlation analyses
  • -
-

The package also distinguishes between the flexibility and simplicity required for exploratory analyses compared to the convenience needed for more complex routine analyses. This is reflected in the underlying S4 object-oriented implementations and associated methods defined within the package. It should be noted that it is useful to understand the principles involved in using metabolyseR for exploratory analyses to aid in extracting and wrangling the results generated from routine analyses.

-

The following document will provide an introduction to the basic usage of the package and includes how to create and use the base classes that are the foundation of metabolyseR. This will be focused around the applications for both exploratory and routine analyses. For more detailed information on the individual analysis elements see their associated vignette using:

-
-browseVignettes('metabolyseR')
-

There is also an example quick start analysis vignette provided.

-
-vignette('quick_start','metabolyseR')
-

Any issues, bugs or errors encountered while using the package should be reported here.

-

The examples shown here will use the abr1 data set from the metaboData package (?metaboData::abr1). This is a nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data set from a plant-pathogen infection time course experiment. The examples will also include use of the pipe %>% from the magrittr package.

-

Firstly load the necessary packages:

- -
-
-

Parallel processing -

-

The package supports parallel processing using the future package.

-

By default, processing by metabolyseR will be done sequentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel back-end using plan(). The following example specifies using the multisession implementation (multiple background R sessions) with two worker processes.

-
-plan(future::multisession,workers = 2)
-

See the future package documentation for more information on the types of parallel implementations that are available.

-
-
-

Exploratory analyses -

-

For exploratory analyses, simple questions of the data need to be answered quickly, requiring few steps. Key requirements for any tool used by investigators are that it should be both simple and flexible.

-

In metabolyseR, the AnalysisData class is the base S4 class that provides these requirements. The following sections will give an overview of the basics in constructing and using these objects as the base for analysis.

-
-

Analysis data -

-

We can firstly construct an AnalysisData object which requires two data tables. The first is the metabolomic data where the columns are the metabolome features, the rows the sample observations and contains the abundance values. The second is the sample meta-information where the row order should match to that of the metabolome data table. Using the example data, his can be constructed and assigned to the variable d by:

-
-d <- analysisData(data = abr1$neg,
-                  info = abr1$fact)
-

Where abr1$neg is the negative ionisation mode data and abr1$fact is the corresponding sample information. By printing d we can view some basic information about our data.

-
-print(d)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 2000 
-## Info: 9
-

We can also return the numbers of samples and numbers of features respectively using the following:

- -
## [1] 120
- -
## [1] 2000
-

The data table can be extracted using the dat method:

-
-dat(d)
-
## # A tibble: 120 × 2,000
-##       N1    N2    N3    N4    N5    N6    N7    N8    N9   N10   N11   N12   N13
-##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
-##  1     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  2     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  3     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  4     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  5     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  6     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  7     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  8     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  9     0     0     0     0     0     0     0     0     0     0     0     0     0
-## 10     0     0     0     0     0     0     0     0     0     0     0     0     0
-## # … with 110 more rows, and 1,987 more variables: N14 <dbl>, N15 <dbl>,
-## #   N16 <dbl>, N17 <dbl>, N18 <dbl>, N19 <dbl>, N20 <dbl>, N21 <dbl>,
-## #   N22 <dbl>, N23 <dbl>, N24 <dbl>, N25 <dbl>, N26 <dbl>, N27 <dbl>,
-## #   N28 <dbl>, N29 <dbl>, N30 <dbl>, N31 <dbl>, N32 <dbl>, N33 <dbl>,
-## #   N34 <dbl>, N35 <dbl>, N36 <dbl>, N37 <dbl>, N38 <dbl>, N39 <dbl>,
-## #   N40 <dbl>, N41 <dbl>, N42 <dbl>, N43 <dbl>, N44 <dbl>, N45 <dbl>,
-## #   N46 <dbl>, N47 <dbl>, N48 <dbl>, N49 <dbl>, N50 <dbl>, N51 <dbl>, …
-

Or alternatively, can be used to assign a new data table:

-
-dat(d) <- abr1$pos
-d
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 2000 
-## Info: 9
-

The sample information table can be extracted using the sinfo method:

-
-sinfo(d)
-
## # A tibble: 120 × 9
-##    injorder pathcdf              filecdf name.org remark name    rep day   class
-##       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
-##  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
-##  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
-##  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
-##  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
-##  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
-##  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
-##  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
-##  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
-##  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
-## 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
-## # … with 110 more rows
-

And similarly used to assign a new sample information table:

-
-sinfo(d) <- abr1$fact[,1:2]
-d
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 2000 
-## Info: 2
-
-
-

Sample information -

-

There are a number of methods that provide utility for querying and altering the sample information within an AnalysisData object. These methods are all named with the prefix cls and include:

-
    -
  • clsAdd
  • -
  • clsArrange
  • -
  • clsAvailable
  • -
  • clsExtract
  • -
  • clsRemove
  • -
  • clsRename
  • -
  • clsReplace
  • -
-

The names of the available sample information columns can be shown using clsAvailable().

- -
## [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
-## [8] "day"      "class"
-

A given column can be extracted using clsExtract(). Here, the day column is extracted.

-
-clsExtract(d,cls = 'day')
-
##   [1] 2 3 4 1 2 1 2 4 H H 4 5 1 2 H 5 3 3 2 H 4 3 5 4 H H 3 H H 1 1 1 5 5 3 4 H
-##  [38] 1 5 5 1 2 4 3 2 4 3 2 5 4 4 H 3 4 2 4 4 1 5 4 4 1 1 H 3 2 H 3 3 1 2 H H 2
-##  [75] 3 5 3 2 5 2 4 3 H 2 3 2 1 1 4 5 3 2 1 H 5 2 4 H 1 4 4 1 1 5 H 5 1 3 3 5 5
-## [112] 5 3 2 5 H 5 H 2 1
-## Levels: 1 2 3 4 5 H
-

Sample class frequencies could then be computed.

-
-clsExtract(d,cls = 'day') %>%
-  table()
-
## .
-##  1  2  3  4  5  H 
-## 20 20 20 20 20 20
-

It can be seen that there are 20 samples available in each class.

-

Another example is the addition of a new sample information column. In the following, a column called new_class will be added with all samples labelled 1.

-
-d <- clsAdd(d,cls = 'new_class',value = rep(1,nSamples(d)))
-clsAvailable(d)
-
##  [1] "injorder"  "pathcdf"   "filecdf"   "name.org"  "remark"    "name"     
-##  [7] "rep"       "day"       "class"     "new_class"
-
-
-

Keeping / removing samples or features -

-

Samples or features can easily be kept or removed from an AnalysisData object as is most convenient.

-

Below can be seen the first 6 sample indexes in the injorder column of the sample information.

-
-samples <- d %>%
-  clsExtract(cls = 'injorder') %>%
-  head()
-
-print(samples)
-
## [1] 1 2 3 4 5 6
-

Only these samples could be kept using:

-
-d %>%
-  keepSamples(idx = 'injorder',samples = samples)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 6 
-## Features: 2000 
-## Info: 10
-

Or removed using:

-
-d %>%
-  removeSamples(idx = 'injorder',samples = samples)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 114 
-## Features: 2000 
-## Info: 10
-

The process is very similar for keeping or removing specific metabolome features from the data table. Below can be seen the first 6 feature names in the data table.

-
-feat <- d %>%
-  features() %>%
-  head()
-
-print(feat)
-
## [1] "N1" "N2" "N3" "N4" "N5" "N6"
-

Only these features can be kept using:

-
-d %>%
-  keepFeatures(features = feat)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 6 
-## Info: 10
-

Or to remove these features:

-
-d %>%
-  removeFeatures(features = feat)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 1994 
-## Info: 10
-
-
-
-

Routine analyses -

-

Routine analyses are those that are often made up of numerous steps where parameters have likely already been previously established. The emphasis here is on convenience with as little code as possible required. In these analyses, the necessary analysis elements, order and parameters are first prepared and then the analysis routine subsequently performed in a single step. This section will introduce how this type of analysis can be performed using metabolyseR and will include four main topics:

-
    -
  • analysis parameter selection
  • -
  • performing an analysis
  • -
  • performing a re-analysis
  • -
  • extracting analysis results
  • -
-
-

Analysis parameters -

-

Parameter selection is the fundamental aspect for performing routine analyses using metabolyseR and will be the step requiring the most input from the user. The parameters for an analysis are stored in an S4 object of class AnalysisParameters containing the relevant parameters of the selected analysis elements.

-

The parameters have been named so that they denote the same functionality commonly across all analysis element methods. Discussion of the specific parameters can be found withing the vignettes of the relevant analysis elements. These can be accessed using:

-
-browseVignettes('metabolyseR')
-

There are several ways to specify the parameters to use for analysis. The first is programatically and the second is through the use of the YAML format.

-
-

Programatic specification -

-

The available analysis elements can be shown using:

- -
## [1] "pre-treatment" "modelling"     "correlations"
-

The analysisParameters() function can be used to create an AnalysisParameters object containing the default parameters. For example, the code below will return default parameters for all the metabolyseR analysis elements.

- -
## Parameters:
-## pre-treatment
-##  QC
-##      occupancyFilter
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##      impute
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##          parallel = variables
-##          seed = 1234
-##      RSDfilter
-##          cls = class
-##          QCidx = QC
-##          RSDthresh = 50
-##      removeQC
-##          cls = class
-##          QCidx = QC
-##  occupancyFilter
-##      maximum
-##          cls = class
-##          occupancy = 2/3
-##  impute
-##      class
-##          cls = class
-##          occupancy = 2/3
-##          seed = 1234
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = class
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-## 
-## correlations
-##  method = pearson
-##  pAdjustMethod = bonferroni
-##  corPvalue = 0.05
-

To retrieve parameters for a subset of analysis elements the following can be run, returning parameters for only the pre-treatment and modelling elements.

-
-p <- analysisParameters(c('pre-treatment','modelling'))
-p
-
## Parameters:
-## pre-treatment
-##  QC
-##      occupancyFilter
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##      impute
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##          parallel = variables
-##          seed = 1234
-##      RSDfilter
-##          cls = class
-##          QCidx = QC
-##          RSDthresh = 50
-##      removeQC
-##          cls = class
-##          QCidx = QC
-##  occupancyFilter
-##      maximum
-##          cls = class
-##          occupancy = 2/3
-##  impute
-##      class
-##          cls = class
-##          occupancy = 2/3
-##          seed = 1234
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = class
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-

The changeParameter() function can be used to uniformly change these parameters across all of the selected methods. The example below changes the defaults of all the parameters named cls from the default class to day.

-
-p <- analysisParameters()
-changeParameter(p,'cls') <- 'day'
-p
-
## Parameters:
-## pre-treatment
-##  QC
-##      occupancyFilter
-##          cls = day
-##          QCidx = QC
-##          occupancy = 2/3
-##      impute
-##          cls = day
-##          QCidx = QC
-##          occupancy = 2/3
-##          parallel = variables
-##          seed = 1234
-##      RSDfilter
-##          cls = day
-##          QCidx = QC
-##          RSDthresh = 50
-##      removeQC
-##          cls = day
-##          QCidx = QC
-##  occupancyFilter
-##      maximum
-##          cls = day
-##          occupancy = 2/3
-##  impute
-##      class
-##          cls = day
-##          occupancy = 2/3
-##          seed = 1234
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = day
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-## 
-## correlations
-##  method = pearson
-##  pAdjustMethod = bonferroni
-##  corPvalue = 0.05
-

Alternatively the parameters of a specific analysis elements can be targeted using the elements argument. The following will only alter the cls parameter back to class for the pre-treatment element parameters:

-
-changeParameter(p,'cls',elements = 'pre-treatment') <- 'class'
-

Parameters can be extracted from the AnalysisParameters class using the parameters() function for a specified element.

-
-parameters(p,'correlations')
-
## $method
-## [1] "pearson"
-## 
-## $pAdjustMethod
-## [1] "bonferroni"
-## 
-## $corPvalue
-## [1] 0.05
-

Each analysis element has a function for returning default parameters for specific methods. These include preTreatmentParameters(), modellingParameters() and correlationParameters(). Each returns a list of the default parameters for a specified methods as shown in the example for modellingParameters() below.

- -
## $anova
-## $anova$cls
-## [1] "class"
-## 
-## $anova$pAdjust
-## [1] "bonferroni"
-## 
-## $anova$comparisons
-## list()
-## 
-## $anova$returnModels
-## [1] FALSE
-

Refer to the documentation (?) of each function for sepecific usage details.

-

The parameters returned by these functions can be assigned to an AnalysisParameters object, again using parameters()

-
-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
-    occupancyFilter = 'maximum',
-    transform = 'TICnorm'
-      )
-  )
-
-
-

YAML specification -

-

Due to the relatively complex structure of the parameters needed for analyses containing many components, it is also possible to specify analysis parameters using the YAML file format. YAML parameter files (.yaml) can be parsed using the parseParameters() function. The example below shows the YAML specification for the defaults returned by analysisParameters().

-
pre-treatment:
-  QC:
-    occupancyFilter:
-      cls: class
-      QCidx: QC
-      occupancy: 0.667
-    impute:
-      cls: class
-      QCidx: QC
-      occupancy: 0.667
-    RSDfilter:
-      cls: class
-      QCidx: QC
-      RSDthresh: 0.5
-    removeQC:
-      cls: class
-      QCidx: QC
-  occupancyFilter:
-    maximum:
-      cls: class
-      occupancy: 0.667
-  impute:
-    class:
-      cls: class
-      occupancy: 0.667
-  transform:
-    TICnorm: ~
-classification:
-  cls: class
-  method: randomForest
-  pars:
-    sampling: boot
-    niter: 10
-    nreps: 10
-    strat: yes
-featureSelection:
-  method: fs.rf
-  cls: class
-  pars:
-    fs.rf:
-      nreps: 100
-correlations:
-  method: pearson
-  pAdjustMethod: bonferroni
-  corPvalue: 0.05
-

This can be passed directly into an AnalysisParameters object using the following:

-
-paramFile <- system.file('defaultParameters.yaml',package = 'metabolyseR')
-p <- parseParameters(paramFile)
-

For more complex pre-treatment situations such as the following:

-
pre-treatment:
-  remove:
-    sample:
-      idx: fileOrder
-      samples: 1
-  remove1:
-    class:
-      cls: day
-      classes:
-      - H
-      - 1
-  occupancyFilter:
-    maximum:
-      cls: class
-      occupancy: 0.667
-  transform:
-    TICnorm: ~
-

Where multiple steps of the same method needed (here is remove), these are numbered sequentially. Where multiple values also need to be provided to a particular argument (e.g. classes = c('H','1')), these should be supplied as a hyphenated list.

-

Existing AnalysisParameters objects can also be exported to YAML format as shown below:

-
-p <- analysisParameters()
-exportParameters(p,file = 'analysis_parameters.yaml')
-
-
-
-

Performing an analysis -

-

The analysis is performed in a single step using the metabolyse() function. This accepts the metabolomic data, the sample information and the analysis parameters.

-

The metabolomic data table of abundance values where the columns are the metabolome features and the rows are each sample observation. Similarly, the sample meta-information table should consist of the observations as rows and the meta information as columns. The order of the observation rows of the sample information table should be concordant with the rows in the metabolomics data table.

-

We can run an example analysis using the abr1 data set by first generating the default parameters for pre-treatment and modelling (random forest) analysis elements.

-
-p <- analysisParameters(c('pre-treatment','modelling'))
-

Custom pre-treatment parameters can then be specified to only inlude occupancy filtering and total ion count normalisation.

-
-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
-  occupancyFilter = 'maximum',
-  transform = 'TICnorm')
-)
-

Next the cls parameters can be changed to use the day sample information column throughout the analysis.

-
-changeParameter(p,'cls') <- 'day'
-

Finally, the analysis can be run in a single step. Here only the fist 200 features of the negative ionisation mode data are specified to reduce the analysis time needed for this example.

-
-analysis <- metabolyse(abr1$neg[,1:200],abr1$fact,p) 
-
## 
-## metabolyseR  v0.14.9 Thu Jan 27 11:59:17 2022
-
## ________________________________________________________________________________
-
## Parameters:
-## pre-treatment
-##  occupancyFilter
-##      maximum
-##          cls = day
-##          occupancy = 2/3
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = day
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-
## ________________________________________________________________________________
-
## 
[34mPre-treatment 
[39m…
-
-
[34mPre-treatment 
[39m    
[32m✓
[39m [0.7S]
-## 
[34mModelling 
[39m…
-
[34m
-Modelling 
[39m 
[32m✓
[39m [2.4S]
-## ________________________________________________________________________________
-## 
-## 
[32mComplete! 
[39m[3.1S]
-

Note: If a data pre-treatment step is not performed prior to modelling or correlation analysis, the raw data will automatically be used.

-

The analysis object containing the analysis results can be printed to provide some basic information about the results of the analysis.

-
-print(analysis)
-
## 
-## metabolyseR v0.14.9
-## Analysis:
-##     Thu Jan 27 11:59:17 2022
-## 
-##  Raw Data:
-##      No. samples = 120
-##      No. features = 200
-## 
-##  Pre-treated Data:
-##      Thu Jan 27 11:59:18 2022
-##      No. samples = 120
-##      No. features = 48
-## 
-##  Modelling:
-##      Thu Jan 27 11:59:20 2022
-##      Methods: randomForest
-
-
-

Performing a re-analysis -

-

There are likely to be occasions where an analysis will need to be re-analysed using a new set of parameters. This can be achieved using the reAnalyse() function.

-

In the example below we will run a correlation analysis in addition to the pre-treatment and modelling elements already performed.

-

Firstly, we can specify the correlation parameters:

-
-parameters <- analysisParameters('correlations')
-

Then perform the re-analysis on our previously analysed Analysis object, specifying the additional parameters.

-
-analysis <- reAnalyse(analysis,parameters)
-
## 
-## metabolyseR v0.14.9 Thu Jan 27 11:59:21 2022
-## ________________________________________________________________________________
-## Parameters:
-## correlations
-##  method = pearson
-##  pAdjustMethod = bonferroni
-##  corPvalue = 0.05
-## ________________________________________________________________________________
-
## 
[34mCorrelations 
[39m…
-
[34m
-Correlations 
[39m  
[32m✓
[39m [0.1S]
-
## ________________________________________________________________________________
-## 
-## Complete! [0.1S]
-

An overview of the results of the analysis (now including correlations) can then be printed.

-
-print(analysis)
-
## 
-## metabolyseR v0.14.9
-## Analysis:
-##     Thu Jan 27 11:59:17 2022
-## 
-##  Raw Data:
-##      No. samples = 120
-##      No. features = 200
-## 
-##  Pre-treated Data:
-##      Thu Jan 27 11:59:18 2022
-##      No. samples = 120
-##      No. features = 48
-## 
-##  Modelling:
-##      Thu Jan 27 11:59:20 2022
-##      Methods: randomForest
-## 
-##  Correlations:
-##      Thu Jan 27 11:59:21 2022
-##      No. correlations = 140
-
-
-

Extracting analysis results -

-

An analysis performed by metabolyse() returns an S4 object of class Analysis. There are a number of ways of extracting analysis results from this object.

-

Similarly to the AnalysisData class, the dat() and sinfo() functions can be used to extract the metabolomics data or sample information tables directly for either the raw or pre-treated data.

-

For example, to extract the pre-treated metabolomics data from our object analysis:

-
-dat(analysis,type = 'pre-treated')
-
## # A tibble: 120 × 48
-##       N113    N115    N117    N118    N119    N127    N128    N129  N130    N131
-##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>
-##  1 0.00646 0       1.68e-4 0       1.60e-3 0.0323  2.65e-4 2.80e-4     0 0      
-##  2 0.0113  7.74e-4 1.02e-3 0       1.43e-3 0.00856 0       3.95e-4     0 0      
-##  3 0.00931 6.01e-4 2.70e-3 6.22e-5 5.58e-3 0       0       1.05e-4     0 6.51e-4
-##  4 0.00798 0       0       0       1.62e-4 0.00848 0       4.05e-4     0 1.28e-4
-##  5 0.0105  0       0       0       0       0.00658 0       1.97e-3     0 0      
-##  6 0.00454 0       2.48e-4 3.25e-4 5.31e-4 0.00207 0       1.98e-4     0 0      
-##  7 0.0117  0       1.14e-3 0       4.39e-4 0.00603 0       4.04e-4     0 0      
-##  8 0.00787 2.36e-3 1.43e-3 1.52e-4 4.22e-3 0.00290 2.78e-4 5.76e-5     0 0      
-##  9 0.00136 1.87e-4 8.17e-4 1.87e-4 0       0.0610  1.31e-4 5.23e-4     0 0      
-## 10 0.00899 4.26e-4 2.06e-3 0       8.36e-4 0.00106 7.72e-4 0           0 0      
-## # … with 110 more rows, and 38 more variables: N132 <dbl>, N133 <dbl>,
-## #   N134 <dbl>, N135 <dbl>, N136 <dbl>, N137 <dbl>, N139 <dbl>, N143 <dbl>,
-## #   N145 <dbl>, N146 <dbl>, N147 <dbl>, N149 <dbl>, N153 <dbl>, N155 <dbl>,
-## #   N157 <dbl>, N161 <dbl>, N163 <dbl>, N164 <dbl>, N165 <dbl>, N168 <dbl>,
-## #   N169 <dbl>, N170 <dbl>, N171 <dbl>, N173 <dbl>, N174 <dbl>, N175 <dbl>,
-## #   N179 <dbl>, N180 <dbl>, N181 <dbl>, N183 <dbl>, N187 <dbl>, N191 <dbl>,
-## #   N192 <dbl>, N193 <dbl>, N195 <dbl>, N196 <dbl>, N197 <dbl>, N198 <dbl>
-

Or to extract the raw sample information:

-
-sinfo(analysis,type = 'raw')
-
## # A tibble: 120 × 9
-##    injorder pathcdf              filecdf name.org remark name    rep day   class
-##       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
-##  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
-##  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
-##  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
-##  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
-##  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
-##  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
-##  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
-##  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
-##  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
-## 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
-## # … with 110 more rows
-

Alternatively the raw or preTreated functions can be used to extract the AnalysisData class objects containing both the metabolomics data and sample information for the raw and pre-treated data respectively.

-
-raw(analysis)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 200 
-## Info: 9
-
-preTreated(analysis)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 48 
-## Info: 9
-

Lastly the analysisResults function can be used to extract the results of any of the analysis elements. The following will extract the modelling results:

-
-analysisResults(analysis,element = 'modelling')
-
## $randomForest
-## 
-## Random forest classification 
-## 
-## Samples:  120 
-## Features:     48 
-## Response:     day 
-## # comparisons:    1
-
-
-
- - - -
- - - - -
- - - - - - - - diff --git a/docs/articles/metabolyseR_files/header-attrs-2.11/header-attrs.js b/docs/articles/metabolyseR_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/metabolyseR_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/modelling.html b/docs/articles/modelling.html deleted file mode 100644 index 71b72881..00000000 --- a/docs/articles/modelling.html +++ /dev/null @@ -1,857 +0,0 @@ - - - - - - - -Modelling and feature selection • metabolyseR - - - - - - - - - - - - -
-
- - - - -
-
- - - - -
-

Introduction -

-

Modelling provides the essential data mining step for extracting biological information and explanatory metabolome features from a data set relating to the experimental conditions. metabolyseR provides a number of both univariate and multivariate methods for data mining.

-

For an introduction to the usage of metabolyseR for both exploratory and routine analyses, see the introduction vignette using:

-
-vignette('introduction','metabolyseR')
-

To further supplement this document, a quick start example analysis is also available as a vignette:

-
-vignette('quick_start','metabolyseR')
-

To begin, the package can be loaded using:

-
-library(metabolyseR)
-#> 
-#> Attaching package: 'metabolyseR'
-#> The following object is masked from 'package:stats':
-#> 
-#>     anova
-#> The following objects are masked from 'package:base':
-#> 
-#>     raw, split
-
-

Example data -

-

The examples used here will use the abr1 data set from the metaboData package. This is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The pipe %>% from the magrittr package will also be used. The example data can be loaded using:

- -

Only the negative acquisition mode data (abr1$neg) will be used along with the sample meta-information (abr1$fact). Create an AnalysisData class object, assigned to the variable d, using the following:

-
-d <- analysisData(abr1$neg[,1:500],abr1$fact)
-
-print(d)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 500 
-#> Info: 9
-

As can be seen above the data set contains a total of 120 samples and 500 features.

-
-
-

Parallel processing -

-

The package supports parallel processing using the future package.

-

By default, processing by metabolyseR will be done seqentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel implementation using plan(). The following example specifies using the multisession implementation (muliple background R sessions) with two worker processes.

-
-plan(future::multisession,workers = 2)
-

See the future package documentation for more information on the types of parallel implementations that are available.

-
-
-
-

Random Forest -

-

Random forest is a versatile ensemble machine learning approach based on forests of decision trees for multivariate data mining. This can include unsupervised analysis, classification of discrete response variables and regression of continuous responses.

-

Random forest can be performed in metabolyseR using the randomForest() method. For further details on the arguments for using this function, see ?randomForest. This implementation of random forest in metabolyseR utilises the randomForest package. See ?randomForest::randomForest for more information about that implementation.

-
-

Unsupervised -

-

The unsupervised random forest approach can be useful starting point for analysis in any experimental context. It can be used to give a general overview of the structure of the data and to identify any possible problems. These could include situations such as the presence of outliers samples or splits in the data caused by the impact of analytical or sample preparation factors. Unsupervised random forest can have advantages in these assessments over other approaches such as Principle Component Analysis (PCA). It is less sensitive to the effect of a single feature that in fact could have little overall impact relative to the other hundreds that could be present in a data set.

-

The examples below will show the use of unsupervised random forest for assessing the general structure of the example data set and the presence of outlier samples.

-

Unsupervised random forest can be performed by setting the cls argument of randomForest() to NULL:

-
-unsupervised_rf <- d %>%
-  randomForest(cls = NULL)
-

The type of random forest that has been performed can be checked using the type method.

-
-type(unsupervised_rf)
-#> [1] "unsupervised"
-

Or by printing the results object.

-
-unsupervised_rf
-#> 
-#> Unsupervised random forest
-#> 
-#> Samples:  120 
-#> Features:     500
-

Firstly, the presence of outlier samples will be assessed. A multidimensional scaling (MDS) plot can be used to visualise the relative proximity of the observations, as shown in the following. The individual points are also labelled by their injection order to enable the identification of individual samples if necessary.

-
-plotMDS(unsupervised_rf,
-        cls = NULL,
-        label = 'injorder',
-        labelSize = 3,
-        title = 'Outlier detection')
-#> Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider
-#> increasing max.overlaps
-

-

From the plot above, it can be seen a single sample lies outside the 95% confidence ellipse. It is unlikely that this sample can be considered an outlier as it’s position is as a result of the underlying class structure as opposed to differences specific to that individual sample.

-

The structure of these observations can be investigated further by colouring the points by a different experimental factor. This will be by the day class column which is the main experimental factor of interest in this experiment.

-
-plotMDS(unsupervised_rf,
-        cls = 'day')
-

-

This shows that it is indeed the experimental factor of interest that is having the greatest impact on the structure of the data. The progression of the experimental time points are obvious across Dimension 1.

-

The available feature importance metrics for a random forest analysis can be retrieved by:

-
-importanceMetrics(unsupervised_rf)
-#> [1] "1"                    "2"                    "FalsePositiveRate"   
-#> [4] "MeanDecreaseAccuracy" "MeanDecreaseGini"     "SelectionFrequency"
-

And the importance values of these metrics for each feature can returned using:

-
-importance(unsupervised_rf)
-#> # A tibble: 3,000 × 3
-#>    Feature Metric                Value
-#>    <chr>   <chr>                 <dbl>
-#>  1 N1      1                    0     
-#>  2 N1      2                    0     
-#>  3 N1      FalsePositiveRate    0.0238
-#>  4 N1      MeanDecreaseAccuracy 0     
-#>  5 N1      MeanDecreaseGini     0     
-#>  6 N1      SelectionFrequency   0     
-#>  7 N10     1                    0     
-#>  8 N10     2                    0     
-#>  9 N10     FalsePositiveRate    0.0238
-#> 10 N10     MeanDecreaseAccuracy 0     
-#> # … with 2,990 more rows
-

The explanatory features for a given threshold can be extracted for any of the importance metrics. The following will extract the explanatory features below a threshold of 0.05 based on the false positive rate metric.

-
-unsupervised_rf %>%
-  explanatoryFeatures(metric = "FalsePositiveRate", 
-                      threshold = 0.05)
-#> # A tibble: 359 × 3
-#>    Feature Metric               Value
-#>    <chr>   <chr>                <dbl>
-#>  1 N342    FalsePositiveRate 1.31e-19
-#>  2 N161    FalsePositiveRate 2.34e-16
-#>  3 N341    FalsePositiveRate 6.50e-16
-#>  4 N315    FalsePositiveRate 1.79e-15
-#>  5 N367    FalsePositiveRate 3.47e-14
-#>  6 N173    FalsePositiveRate 9.09e-14
-#>  7 N385    FalsePositiveRate 9.09e-14
-#>  8 N133    FalsePositiveRate 1.52e-12
-#>  9 N439    FalsePositiveRate 1.52e-12
-#> 10 N379    FalsePositiveRate 3.78e-12
-#> # … with 349 more rows
-

In this example there are 359 explanatory features.

-

The trend of the most highly ranked explanatory feature against the day factor can be plotted using the plotFeature() method.

-
-unsupervised_rf %>%
-  plotFeature(feature = 'N425',
-              cls = 'day')
-

-
-
-

Classification -

-

Random forest classification can be used to assess the extent of discrimination (difference) between classes of a discrete response variable. This includes both multinomial (number of classes > 2) and binary (number of classes = 2) comparisons.

-

In multinomial situations, the suitability of a multinomial comparison versus multiple binary comparisons can depend on the experimental context. For instance, in a treatment/control experiment that includes multiple time points, a multinomial comparison using all available classes could be useful to visualise the general structure of the data. However, it could make any extracted explanatory features difficult to reason about as to how they relate to the individual experimental time point or treatment conditions. An investigator could instead identify the binary comparisons relevant to the biological question and focus the further classification comparisons to better select for explanatory features.

-
-

Multinomial comparisons -

-

In experiments with more than two classes, multinomial random forest classification can be used to assess the discrimination between the classes and give an overview of the relative structure between classes.

-

The example data set consists of a total of 6 classes for the day response variable.

-
-d %>% 
-  clsExtract(cls = 'day') %>% 
-  unique()
-#> [1] 2 3 4 1 H 5
-#> Levels: 1 2 3 4 5 H
-

Multinomial classification can be performed by:

-
-multinomial_rf <- d %>%
-  randomForest(cls = 'day')
-
-print(multinomial_rf)
-#> 
-#> Random forest classification 
-#> 
-#> Samples:  120 
-#> Features:     500 
-#> Response:     day 
-#> # comparisons:    1
-

The performance of this model can be assessed using metrics based on the success of the out of bag (OOB) predictions. The performance metrics can be extracted using:

-
-multinomial_rf %>%
-  metrics()
-#> # A tibble: 4 × 5
-#>   Response Comparison  .metric  .estimator .estimate
-#>   <chr>    <chr>       <chr>    <chr>          <dbl>
-#> 1 day      1~2~3~4~5~H accuracy multiclass     0.8  
-#> 2 day      1~2~3~4~5~H kap      multiclass     0.76 
-#> 3 day      1~2~3~4~5~H roc_auc  hand_till      0.964
-#> 4 day      1~2~3~4~5~H margin   NA             0.146
-

These metrics include accuracy, Cohen’s kappa (kap), area under the receiver operator characteristic curve (roc_auc, ROC-AUC) and margin. Each metric has both strengths and weaknesses that depend on the context of the classification such as the balance of observations between the classes. As shown below, the class frequencies for this example are balanced with 20 observations per class.

-
-d %>% 
-  clsExtract(cls = 'day') %>% 
-  table()
-#> .
-#>  1  2  3  4  5  H 
-#> 20 20 20 20 20 20
-

In this context, each of these metrics could be used to assess the predictive performance of the model. The margin metric is the difference between the proportion of votes for the correct class and the maximum proportion of votes for the other classes for a given observation which is then averaged across all the observations. A positive margin value indicates correct classification and values greater than 0.2 can be considered as the models having strong predictive power. The margin also allows the extent of discrimination to be discerned even in very distinct cases above where both the accuracy and ROC-AUC would be registering values of 1.

-

In this example, the values of all the metrics suggest that the model is showing good predictive performance. This can be investigated further by plotting the MDS of observation proximity values.

-
-multinomial_rf %>% 
-  plotMDS(cls = 'day')
-

-

This shows that the model is able to discriminate highly between classes such as 5 and H. It is less able to discriminate more similar classes such as H and 1 or 4 and 5 whose confidence ellipses show a high degree of overlap. This makes sense in the context of this experiment as these are adjacent time points that are more likely to be similar than time points at each end of the experiment.

-

The ROC curves can also be plotted as shown below.

-
-multinomial_rf %>% 
-  plotROC()
-

-

Classes with their line further from the central dashed line are those that were predicted with the greatest reliability by the model. This plot shows that both the H and 1 classes were least reliably predicted which is a result of their close proximity shown in the MDS plot previously.

-

Importance metrics can be used to identify the metabolome features that contribute most to the class discrimination in the model. The available importance metrics for this model are shown below.

-
-importanceMetrics(multinomial_rf)
-#>  [1] "1"                    "2"                    "3"                   
-#>  [4] "4"                    "5"                    "FalsePositiveRate"   
-#>  [7] "H"                    "MeanDecreaseAccuracy" "MeanDecreaseGini"    
-#> [10] "SelectionFrequency"
-

Here, we will use the false positive rate metric with a threshold of below 0.05 to identify explanatory features for the day response variable.

-
-multinomial_rf %>%
-  explanatoryFeatures(metric = 'FalsePositiveRate',
-                      threshold = 0.05)
-#> # A tibble: 121 × 5
-#>    Response Comparison  Feature Metric               Value
-#>    <chr>    <chr>       <chr>   <chr>                <dbl>
-#>  1 day      1~2~3~4~5~H N341    FalsePositiveRate 1.02e-93
-#>  2 day      1~2~3~4~5~H N133    FalsePositiveRate 7.38e-68
-#>  3 day      1~2~3~4~5~H N163    FalsePositiveRate 3.59e-61
-#>  4 day      1~2~3~4~5~H N439    FalsePositiveRate 1.07e-54
-#>  5 day      1~2~3~4~5~H N342    FalsePositiveRate 3.19e-49
-#>  6 day      1~2~3~4~5~H N377    FalsePositiveRate 3.19e-49
-#>  7 day      1~2~3~4~5~H N171    FalsePositiveRate 6.26e-44
-#>  8 day      1~2~3~4~5~H N497    FalsePositiveRate 6.11e-30
-#>  9 day      1~2~3~4~5~H N146    FalsePositiveRate 2.74e-29
-#> 10 day      1~2~3~4~5~H N195    FalsePositiveRate 7.16e-25
-#> # … with 111 more rows
-

As shown above there were a total of 121 explanatory features identified.

-

Within a multinomial experiment, it is also possible to specify the exact class comparisons to include, where it might not be suitable to compare all the classes at once using the comparisons argument. This should be specified as a named list, the corresponding to the cls argument. Each named element should then consist of a vector of comparisons, the classes to compare separated using the ~.

-

The following specifies two comparisons (H~1~2,H~1~5) for the day response variable and displays the performance metrics.

-
-d %>%
-  randomForest(cls = 'day',
-               comparisons = list(day = c('H~1~2',
-                                          'H~1~5'))) %>%
-  metrics()
-#> # A tibble: 8 × 5
-#>   Response Comparison .metric  .estimator .estimate
-#>   <chr>    <chr>      <chr>    <chr>          <dbl>
-#> 1 day      H~1~2      accuracy multiclass     0.833
-#> 2 day      H~1~2      kap      multiclass     0.75 
-#> 3 day      H~1~5      accuracy multiclass     0.75 
-#> 4 day      H~1~5      kap      multiclass     0.625
-#> 5 day      H~1~2      roc_auc  hand_till      0.906
-#> 6 day      H~1~5      roc_auc  hand_till      0.909
-#> 7 day      H~1~2      margin   NA             0.172
-#> 8 day      H~1~5      margin   NA             0.320
-

The MDS and ROC curve plots can also be plotted simultaneously for the two comparisons.

-
-d %>%
-  randomForest(cls = 'day',
-               comparisons = list(day = c('H~1~2',
-                                          'H~1~5'))) %>%
-  {plotMDS(.,cls = 'day') +
-      plotROC(.) +
-      patchwork::plot_layout(ncol = 1)}
-

-

Similarly, it is also possible to model multiple response factors with a single random forest call by specifying a vector of response class information column names to the cls argument. In the following, both the name and day response factors will be analysed and the performance metrics returned in a single table.

-
-d %>%
-  randomForest(cls = c('name','day')) %>%
-  metrics()
-#> Warning: Classes with < 5 replicates removed: "11_3", "11_4", "11_5", "11_6",
-#> "11_H", "12_1", "12_3", "12_6", "12_H", "13_1", "13_2", "13_3", "13_5", "13_6",
-#> "13_H", "14_2", "14_3", "14_5", "14_6", "14_H", "15_1", "15_2", "15_4", "15_5",
-#> "15_6", "15_H"
-#> Unbalanced classes detected. Stratifying sample size to the smallest class size.
-#> # A tibble: 8 × 5
-#>   Response Comparison                    .metric  .estimator .estimate
-#>   <chr>    <chr>                         <chr>    <chr>          <dbl>
-#> 1 name     11_2~12_2~12_4~13_4~14_4~15_3 accuracy multiclass    0.35  
-#> 2 name     11_2~12_2~12_4~13_4~14_4~15_3 kap      multiclass    0.212 
-#> 3 name     11_2~12_2~12_4~13_4~14_4~15_3 roc_auc  hand_till     0.753 
-#> 4 name     11_2~12_2~12_4~13_4~14_4~15_3 margin   NA           -0.0485
-#> 5 day      1~2~3~4~5~H                   accuracy multiclass    0.8   
-#> 6 day      1~2~3~4~5~H                   kap      multiclass    0.76  
-#> 7 day      1~2~3~4~5~H                   roc_auc  hand_till     0.964 
-#> 8 day      1~2~3~4~5~H                   margin   NA            0.146
-

The MDS plots can also be returned for both models simultaneously.

-
-d %>%
-  randomForest(cls = c('name','day')) %>%
-  plotMDS()
-#> Warning: Classes with < 5 replicates removed: "11_3", "11_4", "11_5", "11_6",
-#> "11_H", "12_1", "12_3", "12_6", "12_H", "13_1", "13_2", "13_3", "13_5", "13_6",
-#> "13_H", "14_2", "14_3", "14_5", "14_6", "14_H", "15_1", "15_2", "15_4", "15_5",
-#> "15_6", "15_H"
-#> Unbalanced classes detected. Stratifying sample size to the smallest class size.
-

-
-
-

Binary comparisons -

-

It may in some cases be preferable to analyse class comparisons as multiple binary comparisons.

-

The possible binary comparisons for a given response variable can be displayed using the binaryComparisons() method. Below shows the 15 comparisons for the day response variable.

-
-binaryComparisons(d,cls = 'day')
-#>  [1] "1~2" "1~3" "1~4" "1~5" "1~H" "2~3" "2~4" "2~5" "2~H" "3~4" "3~5" "3~H"
-#> [13] "4~5" "4~H" "5~H"
-

For this example we will only use the binary comparisons containing the H class.

-
-binary_comparisons <- binaryComparisons(d,cls = 'day') %>% 
-  .[stringr::str_detect(.,'H')]
-

The binary comparisons can then be performed using the following.

-
-binary_rf <- d %>%
-  randomForest(cls = 'day',
-               comparisons = list(day = binary_comparisons))
-
-print(binary_rf)
-#> 
-#> Random forest classification 
-#> 
-#> Samples:  120 
-#> Features:     500 
-#> Response:     day 
-#> # comparisons:    5
-

To run all possible binary comparisons, the binary = TRUE argument could instead be used.

-

The MDS plots for each comparison can be visualised to inspect the comparisons.

-
-binary_rf %>% 
-  plotMDS(cls = 'day')
-

-

These plots show good separation in all the comparisons except H~1 which is also shown by the plot of the performance metrics below. Each of the comparisons are showing perfect performance for the accuracy, Cohen’s kappa and ROC-AUC metrics as well as very high margin values except for the H~1 comparison.

-
-binary_rf %>% 
-  plotMetrics()
-

-

The explanatory features for these comparisons can be extracted as below using the false positive rate metric and a cut-off threshold of 0.05. This gives a total of 251 explanatory features.

-
-binary_rf %>% 
-  explanatoryFeatures(metric = 'FalsePositiveRate',
-                      threshold = 0.05)
-#> # A tibble: 251 × 5
-#>    Response Comparison Feature Metric               Value
-#>    <chr>    <chr>      <chr>   <chr>                <dbl>
-#>  1 day      2~H        N341    FalsePositiveRate 7.34e-52
-#>  2 day      2~H        N439    FalsePositiveRate 1.80e-45
-#>  3 day      3~H        N342    FalsePositiveRate 2.71e-39
-#>  4 day      2~H        N327    FalsePositiveRate 1.06e-35
-#>  5 day      3~H        N439    FalsePositiveRate 1.06e-35
-#>  6 day      2~H        N477    FalsePositiveRate 1.60e-34
-#>  7 day      3~H        N377    FalsePositiveRate 1.60e-34
-#>  8 day      4~H        N477    FalsePositiveRate 7.40e-34
-#>  9 day      2~H        N447    FalsePositiveRate 6.48e-30
-#> 10 day      3~H        N163    FalsePositiveRate 6.48e-30
-#> # … with 241 more rows
-

A heatmap of these explanatory features can be plotted to show their mean relative intensities across the experiment time points. Here, the classes are also refactored to customise the order of the classes on the x-axis.

-
-refactor_cls <- clsExtract(binary_rf,
-                           cls = 'day') %>% 
-  factor(.,levels = c('H','1','2','3','4','5'))
-
-binary_rf <- clsReplace(binary_rf,
-                        value = refactor_cls,
-                        cls = 'day')
-binary_rf %>% 
-  plotExplanatoryHeatmap(metric = 'FalsePositiveRate',
-                      threshold = 0.05,
-                      featureNames = TRUE)
-

-
-
-
-

Regression -

-

Random forest regression can be used to assess the extent of association of the metabolomic data with continuous response variables.

-

In this example, the extent of association of injection order with the example data will be assessed.

-
-regression_rf <- d %>% 
-  randomForest(cls = 'injorder')
-
-print(regression_rf)
-#> 
-#> Random forest regression 
-#> 
-#> Samples:  120 
-#> Features:     500 
-#> Response:     injorder
-

The regression model performance metrics, based on the OOB prediction error, can be extracted using the following:

-
-regression_rf %>% 
-  metrics()
-#> # A tibble: 5 × 4
-#>   Response .metric .estimator .estimate
-#>   <chr>    <chr>   <chr>          <dbl>
-#> 1 injorder rsq     standard       0.476
-#> 2 injorder mae     standard      23.5  
-#> 3 injorder mape    standard     154.   
-#> 4 injorder rmse    standard      26.5  
-#> 5 injorder ccc     standard       0.508
-

These regression metrics include R2 (rsq), mean absolute error (mae), mean absolute percentage error (mape), root mean squared error (rmse) and the concordance correlation coefficient (ccc).

-

The R2 and concordance correlation coefficient metrics suggest that there is some association of features with the injection order, although this is weak. This is in agreement with mean absolute error metric that shows that on average, the injection order could only be predicted to an accuracy of 23 injection order positions.

-

The MDS plot belows the relative proximities of the samples based on this injection order regression model. This shows that for the most part, there is little correspondence of the sample positions with their injection order. However, there is a small grouping of samples towards the end of the run around sample ~99 to 120. It suggests that there could have been some analytical issues, for certain features, towards the end of the mass spectral analytical run.

-
-regression_rf %>% 
-  plotMDS(cls = NULL,
-          ellipses = FALSE,
-          label = 'injorder',
-          labelSize = 3)
-#> Warning: ggrepel: 40 unlabeled data points (too many overlaps). Consider
-#> increasing max.overlaps
-

-

The available feature importance metrics for this regression model can be listed.

-
-regression_rf %>% 
-  importanceMetrics()
-#> [1] "%IncMSE"       "IncNodePurity"
-

The feature importance metrics can be plotted to give an overview of their distribution. The following will plot the percentage increase in the mean squared error (%IncMSE) importance metric.

-
-regression_rf %>% 
-  plotImportance(metric = "%IncMSE", 
-                 rank = FALSE)
-

-

This shows that there are only a few features that are contributing to the association with injection order. These explanatory features can be extracted with the following, using a threshold of above 5.

-
-regression_rf %>% 
-  explanatoryFeatures(metric = '%IncMSE',
-                      threshold = 5)
-#> # A tibble: 7 × 4
-#>   Response Feature Metric  Value
-#>   <chr>    <chr>   <chr>   <dbl>
-#> 1 injorder N283    %IncMSE 19.9 
-#> 2 injorder N135    %IncMSE  8.71
-#> 3 injorder N451    %IncMSE  5.58
-#> 4 injorder N161    %IncMSE  5.51
-#> 5 injorder N306    %IncMSE  5.49
-#> 6 injorder N118    %IncMSE  5.22
-#> 7 injorder N297    %IncMSE  5.07
-

This returned a total of 7 explanatory features above this threshold. The top ranked feature N283 can be plotted to investigate it’s trend in relation to injection order.

-
-regression_rf %>% 
-  plotFeature(feature = 'N283',
-              cls = 'injorder')
-

-

This shows an increase in the intensity of that feature for samples above 100 in the injection order which corresponds with the cluster that was seen in the MDS plot above.

-
-
-
-

Univariate analyses -

-

Univariate methods select features, explanatory for response variables, with features tested on an individual basis. These methods offer simplicity and easy interpretation in their use, however they provide no information as to how features may interact.

-

The univariate methods currently available in metabolyseR include Welch’s t-test, analysis of variance (ANOVA) and linear regression. The following sections will provide brief examples of the use of each of these methods.

-
-

Welch’s t-test -

-

Welch’s t-test can be used to select explanatory metabolome features for binary comparisons of discrete variables. By default, all the possible binary comparisons for the categories of a response variable will be tested.

-

Below shows the possible binary comparisons for the day response variable for the example data set.

-
-binaryComparisons(d,
-                  cls = 'day')
-#>  [1] "1~2" "1~3" "1~4" "1~5" "1~H" "2~3" "2~4" "2~5" "2~H" "3~4" "3~5" "3~H"
-#> [13] "4~5" "4~H" "5~H"
-

For the following example, only a subset of comparisons will be tested. These will be selected by supplying a list to the comparisons argument.

-
-ttest_analysis <- ttest(d,
-                        cls = 'day',
-                        comparisons = list(day = c('H~1',
-                                                   'H~2',
-                                                   'H~5')))
-
-print(ttest_analysis)
-#> 
-#> Univariate t-test analysis
-#> 
-#> Samples:  120 
-#> Features:     500 
-#> Responses:    day 
-#> # comparisons:    3
-

The explanatory features that show a significant difference between the response categories can be extracted as shown below.

-
-explanatoryFeatures(ttest_analysis,
-                    threshold = 0.05)
-#> # A tibble: 73 × 14
-#>    Response Comparison Feature estimate estimate1 estimate2 statistic  p.value
-#>    <chr>    <chr>      <chr>      <dbl>     <dbl>     <dbl>     <dbl>    <dbl>
-#>  1 day      H~5        N163      -735.       19.5   755.       -13.8  1.43e-11
-#>  2 day      H~5        N341      2445.     2537.     92.6       13.6  2.88e-11
-#>  3 day      H~5        N133      1055.     1077.     21.9       13.0  5.44e-11
-#>  4 day      H~2        N341       200.      293.     92.6       10.6  1.38e-10
-#>  5 day      H~5        N171        62.6      64.7     2.15      11.9  2.62e-10
-#>  6 day      H~5        N119        17.2      17.9     0.763     11.0  8.54e-10
-#>  7 day      H~5        N342       243.      247.      4.13      10.8  1.42e- 9
-#>  8 day      H~5        N343        27.4      28.3     0.961      9.83 5.99e- 9
-#>  9 day      H~5        N377       152.      157.      5.05       9.81 6.75e- 9
-#> 10 day      H~5        N477       103.      129.     26.1        9.30 1.05e- 8
-#> # … with 63 more rows, and 6 more variables: parameter <dbl>, conf.low <dbl>,
-#> #   conf.high <dbl>, method <chr>, alternative <chr>, adjusted.p.value <dbl>
-

This will threshold the features based on their adjusted p-value, found in the adjusted.p.value column of the table. The results of all of the features can be returned using the importance() method.

-

A heat map of the explanatory features can be plotted to inspect the relative trends of the explanatory features in relation to the response variable.

-
-plotExplanatoryHeatmap(ttest_analysis)
-

-
-
-

ANOVA -

-

ANOVA can be used to select explanatory features for discrete response variables with 3 or more categories. The following example will compare all the categories in the day response variable. However, the comparisons argument can be used to select particular comparisons of interest.

-
-anova_analysis <- anova(d,
-                        cls = 'day')
-
-print(anova_analysis)
-#> 
-#> Univariate ANOVA analysis
-#> 
-#> Samples:  120 
-#> Features:     500 
-#> Responses:    day 
-#> # comparisons:    1
-

The explanatory features that are significantly different between the categories can then be extracted.

-
-explanatoryFeatures(anova_analysis,
-                    threshold = 0.05)
-#> # A tibble: 110 × 10
-#>    Response Comparison  Feature term        df   sumsq meansq statistic  p.value
-#>    <chr>    <chr>       <chr>   <chr>    <dbl>   <dbl>  <dbl>     <dbl>    <dbl>
-#>  1 day      1~2~3~4~5~H N341    response     5  1.09e8 2.17e7     124.  1.90e-44
-#>  2 day      1~2~3~4~5~H N163    response     5  1.25e7 2.51e6     113.  1.71e-42
-#>  3 day      1~2~3~4~5~H N133    response     5  1.96e7 3.92e6     108.  1.71e-41
-#>  4 day      1~2~3~4~5~H N171    response     5  6.29e4 1.26e4      88.8 1.16e-37
-#>  5 day      1~2~3~4~5~H N342    response     5  1.04e6 2.07e5      85.1 7.61e-37
-#>  6 day      1~2~3~4~5~H N343    response     5  1.19e4 2.38e3      66.1 4.43e-32
-#>  7 day      1~2~3~4~5~H N119    response     5  4.92e3 9.83e2      53.8 2.07e-28
-#>  8 day      1~2~3~4~5~H N497    response     5  1.10e5 2.20e4      49.6 4.83e-27
-#>  9 day      1~2~3~4~5~H N137    response     5  6.32e3 1.26e3      39.9 1.59e-23
-#> 10 day      1~2~3~4~5~H N277    response     5  6.31e4 1.26e4      39.1 3.14e-23
-#> # … with 100 more rows, and 1 more variable: adjusted.p.value <dbl>
-

The top ranked explanatory feature N341 can be plotted to inspect it’s trend relative to the day response variable.

-
-plotFeature(anova_analysis,
-            feature = 'N341',
-            cls = 'day')
-

-
-
-

Linear regression -

-

Univariate linear regression can be used to associate a continuous response variable with metabolome features. In the example below, the example data will be regressed against injection order to identify any linearly associated metabolome features.

-
-lr_analysis <- linearRegression(d,
-                                cls = 'injorder')
-
-print(lr_analysis)
-#> 
-#> Univariate linear regression analysis
-#> 
-#> Samples:  120 
-#> Features:     500 
-#> Responses:    injorder
-

The explanatory features can then be extracted.

-
-explanatoryFeatures(lr_analysis)
-#> # A tibble: 8 × 15
-#>   Response Feature r.squared adj.r.squared sigma statistic  p.value    df logLik
-#>   <chr>    <chr>       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl>
-#> 1 injorder N283        0.310         0.304  4.27      53.0 4.10e-11     1  -343.
-#> 2 injorder N135        0.165         0.157 78.7       23.2 4.31e- 6     1  -693.
-#> 3 injorder N221        0.140         0.133  5.87      19.3 2.50e- 5     1  -382.
-#> 4 injorder N473        0.135         0.127  7.24      18.3 3.78e- 5     1  -407.
-#> 5 injorder N335        0.132         0.124 20.1       17.9 4.59e- 5     1  -529.
-#> 6 injorder N452        0.120         0.112  4.00      16.0 1.10e- 4     1  -335.
-#> 7 injorder N255        0.119         0.111 11.1       15.9 1.17e- 4     1  -458.
-#> 8 injorder N267        0.118         0.111 26.4       15.8 1.22e- 4     1  -562.
-#> # … with 6 more variables: AIC <dbl>, BIC <dbl>, deviance <dbl>,
-#> #   df.residual <int>, nobs <int>, adjusted.p.value <dbl>
-

The top ranked explanatory feature N283 can be plotted to inspect inspects it’s association with injection order.

-
-plotFeature(lr_analysis,
-            feature = 'N283',
-            cls = 'injorder')
-

-
-
-
-

Routine analyses -

-

For routine analyses, the initial analysis parameters for pre-treatment of the data and then the modelling can be selected.

-
-p <- analysisParameters(c('pre-treatment','modelling'))
-

More specific parameters for pre-treatment of the example data can be declared using the following.

-
-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
-    keep = 'classes',
-    occupancyFilter = 'maximum',
-    transform = 'TICnorm' 
-  )
-)
-

The modellingMethods() function can be used to list the modelling methods that are currently available in metabolyseR.

-
-modellingMethods()
-#> [1] "anova"            "ttest"            "linearRegression" "randomForest"
-

The modellingParameters() function can be used to retrieve the default parameters for specific modelling methods. Below, the default modelling parameters for the randomForest and ttest methods are specified.

-
-parameters(p,'modelling') <- modellingParameters(c('randomForest','ttest'))
-

The class parameters can the be universily specified for both the pre-treatment and modelling elements. For this example, the day response variable will be used with just the H and 2 classes.

-
-changeParameter(p,'cls') <- 'day'
-changeParameter(p,'classes') <- c('H','2')
-

This gives the following parameters for the analysis.

-
-p
-#> Parameters:
-#> pre-treatment
-#>  keep
-#>      classes
-#>          cls = day
-#>          classes = c("H", "2")
-#>  occupancyFilter
-#>      maximum
-#>          cls = day
-#>          occupancy = 2/3
-#>  transform
-#>      TICnorm
-#> 
-#> modelling
-#>  randomForest
-#>      cls = day
-#>      rf = list()
-#>      reps = 1
-#>      binary = FALSE
-#>      comparisons = list()
-#>      perm = 0
-#>      returnModels = FALSE
-#>      seed = 1234
-#>  ttest
-#>      cls = day
-#>      pAdjust = bonferroni
-#>      comparisons = list()
-#>      returnModels = FALSE
-

The analysis can then be executed.

-
analysis <- metabolyse(abr1$neg,abr1$fact,p)
-#> 
[34m
-#> metabolyseR 
[39m 
[31mv0.14.9
[39m Thu Jan 27 12:00:15 2022
-#> ________________________________________________________________________________
-#> 
[33m
[33mParameters:
[33m
[39m
-#> pre-treatment
-#>  keep
-#>      classes
-#>          cls = day
-#>          classes = c("H", "2")
-#>  occupancyFilter
-#>      maximum
-#>          cls = day
-#>          occupancy = 2/3
-#>  transform
-#>      TICnorm
-#> 
-#> modelling
-#>  randomForest
-#>      cls = day
-#>      rf = list()
-#>      reps = 1
-#>      binary = FALSE
-#>      comparisons = list()
-#>      perm = 0
-#>      returnModels = FALSE
-#>      seed = 1234
-#>  ttest
-#>      cls = day
-#>      pAdjust = bonferroni
-#>      comparisons = list()
-#>      returnModels = FALSE
-#> ________________________________________________________________________________
-#> 
[34mPre-treatment 
[39m…
-
-
[34mPre-treatment 
[39m    
[32m✓
[39m [4.6S]
-#> 
[34mModelling 
[39m…
-
[34m
-Modelling 
[39m 
[32m✓
[39m [3.1S]
-#> ________________________________________________________________________________
-#> 
-#> 
[32mComplete! 
[39m[7.8S]
-

The results for the modelling can be specifically extracted using the following.

-
-analysisResults(analysis,'modelling')
-#> $randomForest
-#> 
-#> Random forest classification 
-#> 
-#> Samples:  40 
-#> Features:     1713 
-#> Response:     day 
-#> # comparisons:    1 
-#> 
-#> 
-#> $ttest
-#> 
-#> Univariate t-test analysis
-#> 
-#> Samples:  40 
-#> Features:     1713 
-#> Responses:    day 
-#> # comparisons:    1
-

This returns the results as a list containing the modelling results objects for each specified method.

-

Alternatively, the modelling results can be assess directly from the Analysis object. Below shows the extraction of the explanatory features, using default parameters for each method, with the results returned in a single table.

-
-explanatory_features <- analysis %>% 
-  explanatoryFeatures()
-
-print(explanatory_features)
-#> # A tibble: 100 × 17
-#>    Method       Response Comparison Feature Metric      Value estimate estimate1
-#>    <chr>        <chr>    <chr>      <chr>   <chr>       <dbl>    <dbl>     <dbl>
-#>  1 randomForest day      2~H        N341    FalsePo… 8.06e-28       NA        NA
-#>  2 randomForest day      2~H        N377    FalsePo… 5.70e-18       NA        NA
-#>  3 randomForest day      2~H        N447    FalsePo… 5.70e-18       NA        NA
-#>  4 randomForest day      2~H        N579    FalsePo… 5.70e-18       NA        NA
-#>  5 randomForest day      2~H        N1084   FalsePo… 1.19e-16       NA        NA
-#>  6 randomForest day      2~H        N327    FalsePo… 2.33e-15       NA        NA
-#>  7 randomForest day      2~H        N580    FalsePo… 4.32e-14       NA        NA
-#>  8 randomForest day      2~H        N1083   FalsePo… 7.49e-13       NA        NA
-#>  9 randomForest day      2~H        N1085   FalsePo… 7.49e-13       NA        NA
-#> 10 randomForest day      2~H        N503    FalsePo… 7.49e-13       NA        NA
-#> # … with 90 more rows, and 9 more variables: estimate2 <dbl>, statistic <dbl>,
-#> #   p.value <dbl>, parameter <dbl>, conf.low <dbl>, conf.high <dbl>,
-#> #   method <chr>, alternative <chr>, adjusted.p.value <dbl>
-

Heat maps of the explanatory features can also be plotted for both the modelling methods.

-
-plotExplanatoryHeatmap(analysis) %>% 
-  patchwork::wrap_plots()
-

-
-
- - - -
- - - - -
- - - - - - - - diff --git a/docs/articles/modelling_files/figure-html/anova-feature-1.png b/docs/articles/modelling_files/figure-html/anova-feature-1.png deleted file mode 100644 index 8bf0c4bd..00000000 Binary files a/docs/articles/modelling_files/figure-html/anova-feature-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/binary-heatmap-1.png b/docs/articles/modelling_files/figure-html/binary-heatmap-1.png deleted file mode 100644 index 9418b01c..00000000 Binary files a/docs/articles/modelling_files/figure-html/binary-heatmap-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/binary-mds-1.png b/docs/articles/modelling_files/figure-html/binary-mds-1.png deleted file mode 100644 index 6e555e6d..00000000 Binary files a/docs/articles/modelling_files/figure-html/binary-mds-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/binary-metrics-1.png b/docs/articles/modelling_files/figure-html/binary-metrics-1.png deleted file mode 100644 index ff84b6e4..00000000 Binary files a/docs/articles/modelling_files/figure-html/binary-metrics-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/classification-comparison-mds-roc-1.png b/docs/articles/modelling_files/figure-html/classification-comparison-mds-roc-1.png deleted file mode 100644 index 2a6c30ab..00000000 Binary files a/docs/articles/modelling_files/figure-html/classification-comparison-mds-roc-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/explanatory-heatmap-1.png b/docs/articles/modelling_files/figure-html/explanatory-heatmap-1.png deleted file mode 100644 index 3ca7a260..00000000 Binary files a/docs/articles/modelling_files/figure-html/explanatory-heatmap-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/linear-regression-feature-1.png b/docs/articles/modelling_files/figure-html/linear-regression-feature-1.png deleted file mode 100644 index 83b3551e..00000000 Binary files a/docs/articles/modelling_files/figure-html/linear-regression-feature-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/multinomial-mds-1.png b/docs/articles/modelling_files/figure-html/multinomial-mds-1.png deleted file mode 100644 index e8cc701c..00000000 Binary files a/docs/articles/modelling_files/figure-html/multinomial-mds-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/multinomial-multiple-mds-1.png b/docs/articles/modelling_files/figure-html/multinomial-multiple-mds-1.png deleted file mode 100644 index 96d2d1fa..00000000 Binary files a/docs/articles/modelling_files/figure-html/multinomial-multiple-mds-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/multinomial-roc-1.png b/docs/articles/modelling_files/figure-html/multinomial-roc-1.png deleted file mode 100644 index c5a36237..00000000 Binary files a/docs/articles/modelling_files/figure-html/multinomial-roc-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/outlier-detect-1.png b/docs/articles/modelling_files/figure-html/outlier-detect-1.png deleted file mode 100644 index 941476b3..00000000 Binary files a/docs/articles/modelling_files/figure-html/outlier-detect-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/regression-feature-1.png b/docs/articles/modelling_files/figure-html/regression-feature-1.png deleted file mode 100644 index 83b3551e..00000000 Binary files a/docs/articles/modelling_files/figure-html/regression-feature-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/regression-importance-plot-1.png b/docs/articles/modelling_files/figure-html/regression-importance-plot-1.png deleted file mode 100644 index 2d6f5863..00000000 Binary files a/docs/articles/modelling_files/figure-html/regression-importance-plot-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/regression-mds-1.png b/docs/articles/modelling_files/figure-html/regression-mds-1.png deleted file mode 100644 index 68791ea1..00000000 Binary files a/docs/articles/modelling_files/figure-html/regression-mds-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/t-test-heatmap-1.png b/docs/articles/modelling_files/figure-html/t-test-heatmap-1.png deleted file mode 100644 index acb21d94..00000000 Binary files a/docs/articles/modelling_files/figure-html/t-test-heatmap-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/unsupervised-feature-1.png b/docs/articles/modelling_files/figure-html/unsupervised-feature-1.png deleted file mode 100644 index be8b1882..00000000 Binary files a/docs/articles/modelling_files/figure-html/unsupervised-feature-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/figure-html/unsupervised-rf-1.png b/docs/articles/modelling_files/figure-html/unsupervised-rf-1.png deleted file mode 100644 index 7c66d01e..00000000 Binary files a/docs/articles/modelling_files/figure-html/unsupervised-rf-1.png and /dev/null differ diff --git a/docs/articles/modelling_files/header-attrs-2.10/header-attrs.js b/docs/articles/modelling_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/modelling_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/modelling_files/header-attrs-2.11/header-attrs.js b/docs/articles/modelling_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/modelling_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/modelling_files/header-attrs-2.7/header-attrs.js b/docs/articles/modelling_files/header-attrs-2.7/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/modelling_files/header-attrs-2.7/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/pre_treatment.html b/docs/articles/pre_treatment.html deleted file mode 100644 index 91eefe6f..00000000 --- a/docs/articles/pre_treatment.html +++ /dev/null @@ -1,592 +0,0 @@ - - - - - - - -Metabolomics data pre-treatment • metabolyseR - - - - - - - - - - - - -
-
- - - - -
-
- - - - -
-

Introduction -

-

Metabolomics data from any analytical technique requires various data pre-treatment steps prior to subsequent data mining or other downstream analyses. This aids both the data quality and integrity. It is important that appropriate pre-treatment strategies are used not only for the analytical technique being applied but are also suitable for the statistical or machine learning analyses that are to be utilised. Careful consideration of the pre-treatment steps to be undertaken are required as they can have a substantial influence on the results and inferences taken from metabolomic analyses.

-

Data pre-treatment is the most faceted aspect of the analysis elements in metabolyseR. It is itself made up of a number of elements, which themselves are made up of methods. The following document will outline the application of each of these pre-treatment elements for use in exploratory analyses then outline how to apply them in routine analyses. For an introduction to the usage of metabolyseR for both exploratory and routine analyses, see the introduction vignette using:

-
-vignette('introduction','metabolyseR')
-

To further supplement this document, a quick start example analysis is also available as a vignette:

-
-vignette('quick_start','metabolyseR')
-

To begin, the package can be loaded using:

-
-library(metabolyseR)
-#> 
-#> Attaching package: 'metabolyseR'
-#> The following object is masked from 'package:stats':
-#> 
-#>     anova
-#> The following objects are masked from 'package:base':
-#> 
-#>     raw, split
-
-

Example data -

-

The examples used here will use the abr1 data set from the metaboData package. This is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The pipe %>% from the magrittr package will also be used. The example data can be loaded using:

- -

Only the negative acquisition mode data (abr1$neg) will be used along with the sample meta-information (abr1$fact). Create an AnalysisData class object, assigned to the variable d, using the following:

-
-d <- analysisData(abr1$neg,abr1$fact)
-
-print(d)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 2000 
-#> Info: 9
-

As can be seen above the data set contains a total of 120 samples and 2000 features.

-
-
-

Parallel processing -

-

The package supports parallel processing using the future package.

-

By default, processing by metabolyseR will be done seqentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel implementation using plan(). The following example specifies using the multisession implementation (muliple background R sessions) with two worker processes.

-
-plan(future::multisession,workers = 2)
-

See the future package documentation for more information on the types of parallel implementations that are available.

-
-
-
-

Pre-treatment elements -

-

The following sections will outline the numerous pre-treatment elements available within metabolyseR. There will be examples of their application during exploratory analyses along with useful visualisations. These can aid interpretation of when particular treatments should be applied as well as their effect once they have been used.

-
-

Removal of samples, classes or features -

-

In many situations, it will be necessary to exclude either individual samples, sample classes or certain features from further analysis.

-

Individual samples can be removed using removeSamples() as below, where the idx argument stipulates the sample information column cotaining the sample indexes and the samples argument a vector of sample indexes to remove.

-
-d %>%
-  removeSamples(idx = 'injorder',samples = 1)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 119 
-#> Features: 2000 
-#> Info: 9
-

The removeClasses function can be used similarly to remove whole classes from further analysis:

-
-d %>%
-  removeClasses(cls = 'day',classes = 'H')
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 100 
-#> Features: 2000 
-#> Info: 9
-

The following will enable the removal of specified features as a vector supplied to the features argument:

-
-d %>%
-  removeFeatures(features = c('N1','N2'))
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 1998 
-#> Info: 9
-

There could be occasions where the numbers of samples, classes or features to remove are greater than the numbers of samples, classes or features that are to be retained. In these situations it will be more convenient to directly specify the samples, classes or features to retain. Keeping samples, classes or features is outlined in the following section.

-
-
-

Keeping samples, classes or features -

-

Often it will be necessary to retain only particular samples, sample classes or certain features for further analysis.

-

Individual samples can be kept using keepSamples() as below, where the idx argument stipulates the sample information column cotaining the sample indexes and the samples argument, a vector of sample indexes to keep.

-
-d %>%
-  keepSamples(idx = 'injorder',samples = 1)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 1 
-#> Features: 2000 
-#> Info: 9
-

The keepClasses() method can be used similarly to keep whole classes for further analysis:

-
-d %>%
-  keepClasses(cls = 'day',classes = 'H')
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 20 
-#> Features: 2000 
-#> Info: 9
-

The following will specify features to keep, with a vector of feature names supplied to the features argument:

-
-d %>%
-  keepFeatures(features = c('N1','N2'))
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 2 
-#> Info: 9
-

There are likely to be occasions where the numbers of samples, classes or features to keep are greater than the numbers of samples, classes or features that are to be excluded. In these situations it will be more convenient to directly specify the samples, classes or features to remove. Removing samples, classes or features is outlined in the previous section.

-
-
-

Feature filtering based on occupancy -

-

Occupancy provides a useful metric by which to filter poorly represented features (features containing a majority zero or missing values). An occupancy threshold provides a means of specifying this majority with variables below the threshold excluded from further analyses. However, this can be complicated by an underlying class structure present within the data where a variable may be well represented within one class but not in another.

-

The proportional occupancy for each feature within a data set for a given class structure can be calculated using the occupancy() method, specifying the sample information column using the cls argument.

-
-d %>%
-  occupancy(cls = 'day')
-#> # A tibble: 11,914 × 5
-#>    day   Feature     N `Class total` Occupancy
-#>    <fct> <chr>   <dbl>         <int>     <dbl>
-#>  1 1     N1          0            20         0
-#>  2 1     N10         0            20         0
-#>  3 1     N100        0            20         0
-#>  4 1     N1000      20            20         1
-#>  5 1     N1001      20            20         1
-#>  6 1     N1002      20            20         1
-#>  7 1     N1003      20            20         1
-#>  8 1     N1004      20            20         1
-#>  9 1     N1005      20            20         1
-#> 10 1     N1006      20            20         1
-#> # … with 11,904 more rows
-

Alternatively the occupancy distributions can be plotted providing a useful overview of the data set:

-
-d %>%
-  plotOccupancy(cls = 'day')
-

-

It can be seen that there are a number of unoccupied features across all the sample classes with a small rise in the density distribution near 0.

-

There are two strategies for thresholding occupancy. The first is a maximum theshold; where the maximum occupancy across all classes is above the threshold. Therefore, for a feature to be retained, only a single class needs to have an occupancy above the threshold. It is this strategy that will be appropriate for most applications. A two-thirds maximum occupancy filter can be applied to the day sample information column of our data using:

-
-maximum_occupancy_filtered <- d %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3)
-

It can be seen below that this removes 240 features.

-
-print(maximum_occupancy_filtered)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 1760 
-#> Info: 9
-

Plotting the occupancy distributions shows that all the low occupancy features have now been removed.

-
-maximum_occupancy_filtered %>%
-  plotOccupancy(cls = 'day')
-

-

The alternative strategy is by applying a minimum threshold; where the minimum occupancy across all classes is required to be above the threshold. Therefore, for a feature to be retained, all classes would need to have an occupancy above the threshold. A two-thirds minimum occupancy filter can be applied to the day sample information column of our data using:

-
-minimum_occupancy_filtered <- d %>%
-  occupancyMinimum(cls = 'day',occupancy = 2/3)
-

It can be seen below that this removes 344 features.

-
-print(minimum_occupancy_filtered)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 1656 
-#> Info: 9
-
-
-

Data transformation -

-

Prior to downstream analyses, metabolomics data often require transformation to fulfill the assumptions of a particular statistical/data mining technique.

-

There are a wide range of transformation methods available that are commonly used for the analysis of metabolomics data. These methods are all named with the prefix transform.

-

The effects of a transformation on a data set can be assessed using a supervised classifcation approach. The following performs a supervised random forest analysis of the example data and plots the results using both multidimensional scaling (MDS) and reciever operator characteristic (ROC) curves.

-
-d %>%
-  plotSupervisedRF(cls = 'day')
-

-

Alternatively a log10 transformation can be applied prior to analysis:

-
-d %>%
-  transformLog10() %>%
-  plotSupervisedRF(cls = 'day')
-

-

Or a total ion count (TIC) normalisation where each individual sample is corrected by its TIC. This is one method that can be used to account for small variablility in sample concentration.

- -

-

The margin value is a metric that can be used to assess model perfomance. Positive values indicate a models ability, on average, to correctly predict the class labels of the analysed data.

-

As can be seen in the plots above, the transformations have little effect on the overall structure of the data set. However, there are small increases in the margins of the transformed data (model improvement). Note that here, a non-parametric machine learning approach has been applied to assess the effects of the transformations on the data. Using a different approach such as the parametric analysis Of variance (ANOVA) which different underlying assumptions will likely give different results to the assessment above.

-
-
-

Sample aggregation -

-

Sample aggregation allows the electronic pooling of samples based on a grouping variable. This is useful in situations such as the presence of technical replicates that can be aggregated to reduce the effects of pseudo replication. metabolyseR provides methods for mean, median and sum aggregation and each starts with the aggregate prefix.

-

Below shows a principle component analysis (PCA) plot of the example data coloured by the classes of the day sample information column. It is first maximum occupancy filtered to remove empty features.

-
-d %>%
-  occupancyMaximum(cls = 'day') %>%
-  plotPCA(cls = 'day')
-

-

The example below shows the mean aggregation of the data using the experimental classes within the day sample information column.

-
-day_mean <- d %>%
-  occupancyMaximum(cls = 'day') %>%
-  aggregateMean(cls = 'day')
-

The PCA plot below shows these class averages of the data.

-
-plotPCA(day_mean,cls = 'day',ellipses = FALSE)
-

-
-
-

Batch/block correction -

-

There can sometimes be artificial batch related variability introduced into metabolomics analyses as a result of analytical instrumentation or sample preparation. With appropriate sample randomisation (see section on feature filtering based on QC samples), batch related variability can be corrected for using an average centring correction method, applied to the individual features.

-

The plot below shows differences in the TIC distributions for each of the classes in the day sample information column.

-
-d %>%
-  plotTIC(by = 'day',colour = 'day')
-

-

The data can then be corrected by class average centring as shown below.

-
-corrected_data <- d %>%
-  correctionCenter(block = 'day',type = 'median')
-

The plot of the TICs below shows that the inter-class variability has been removed but the intra-class variability has been retained.

-
-plotTIC(corrected_data,
-        by = 'day',
-        colour = 'day')
-

-
-
-

Imputation of missing data -

-

Missing values can have an important influence on downstream analyses with zero values heavily influencing the outcomes of parametric tests. Where and how they are imputed are important considerations and this is highly related to variable occupancy. The methods provided here allow both these aspects to be taken into account and utilise Random Forest imputation using the missForest package.

-

Below shows a Linear Discriminant Analysis (LDA) plot of the example data. The eigenvalue (Tw) gives a comparable indication of the separation between the sample classes.

-
-d %>%
-  keepClasses(cls = 'day',classes = c('H','5')) %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
-  plotLDA(cls = 'day')
-

-

The following shows the same, except there is an application of imputation prior to the LDA. The imputed data is based on the data of all the samples present on the data set. It shows a very slight drop in the eigenvalue and therefore reduced separation between the sample classes.

-
-d %>%
-  keepClasses(cls = 'day',classes = c('H','5')) %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
-  imputeAll(parallel = 'variables') %>%
-  plotLDA(cls = 'day')
-

-

Imputation accuracy is likely to be reduced if data is sparse or there is underlying class structure where there is significant discrimination. Below shows the application imputation prior the LDA, except this time the imputation is class-wise. The imputed data is based only on the values of other samples within the class.

-
-d %>%
-  keepClasses(cls = 'day',classes = c('H','5')) %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
-  imputeClass(cls = 'day') %>%
-  plotLDA(cls = 'day')
-

-

This shows a slight increase in the eigenvalue with the classes showing greater separation. This is likely due to the increased accuracy of the imputed data relative to the class structure.

-
-
-

Feature filtering based on quality control (QC) samples -

-

A QC sample is an average pooled sample, equally representative in composition of all the samples present within an experimental set. Within an analytical run, the QC sample is analysed at equal intervals throughout the run. If there is class structure within the run, this should be randomised within a block fashion so that the classes are equally represented in each block throughout the run. A QC sample can then be injected and analysed between these randomised blocks. This provides a set of technical injections that allows the variability in instrument performance over the run to be accounted for and the robustness of the acquired variables to be assessed.

-

The technical reproducibility of an acquired variable can be assessed using it’s relative standard deviation (RSD) within the QC samples. The variable RSDs can then be filtered below a threshold value to remove metabolome features that are poorly reproducible across the analytical runs. This variable filtering strategy has an advantage over that of occupancy alone as it is not dependent on underlying class structure. Therefore, the variables and variable numbers will not alter if a new class structure is imposed upon the data.

-

The example data set does not include QC samples. For this example, the H class will be used.

-

Firstly, the RSD distribution will be assessed for the only H class. The following retains only the H class samples to aid visualisation.

-
-QC <- d %>%
-  keepClasses(cls = 'day',classes = 'H')
-

The table of RSD values for each of the features can be computed as below.

-
-QC %>%
-  rsd(cls = 'day')
-#> # A tibble: 2,000 × 5
-#>    day   Feature  Mean    SD   RSD
-#>    <fct> <chr>   <dbl> <dbl> <dbl>
-#>  1 H     N1        0     0   NaN  
-#>  2 H     N10       0     0   NaN  
-#>  3 H     N100      0     0   NaN  
-#>  4 H     N1000   114.   19.4  17.0
-#>  5 H     N1001    99.2  21.6  21.7
-#>  6 H     N1002    86.7  23.9  27.6
-#>  7 H     N1003    82.3  18.0  21.9
-#>  8 H     N1004    91.6  18.8  20.5
-#>  9 H     N1005    78.2  14.0  17.9
-#> 10 H     N1006    78.6  21.3  27.1
-#> # … with 1,990 more rows
-

The distributions of the feature RSD values can be plotted for the H class.

-
-QC %>%
-  plotRSD(cls = 'day')
-#> Warning: Removed 123 rows containing non-finite values (stat_density).
-#> Warning: Removed 1 row(s) containing missing values (geom_path).
-

-

This shows that there are a number of features with very high RSD values and therefore poor analytical robustness. Many of these are likely to be as a result of poor occupancy and zero values. Applying an occupancy filter prior to plotting does indeed show a reduction in the upper range of RSD values retained.

-
-QC %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
-  plotRSD(cls = 'day')
-

-

metabolyseR contains a number of methods for applying pre-treatment routines specifically on QC samples and are all prefixed with QC. These include methods for feature filtering of a data set based the occupancy of the QC class, imputation of the QC class only, feature filtering based in the RSD values of the QC class and removal of only the QC class.

-

Below shows an example of applying some of these QC methods. This will first filter the features in the data set based on the occupancy of the QC class. Then the features are filtered based on the RSD values of the QC class using an RSD threshold of 50%. The class index of the QC samples is specified using the QCidx argument.

-
-QC_filtered <- d %>%
-  QCoccupancy(cls = 'day',QCidx = 'H',occupancy = 2/3) %>%
-  QCrsdFilter(cls = 'day',QCidx = 'H',RSDthresh = 50)
-

This removes a total of 637 features.

-
-print(QC_filtered)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 1363 
-#> Info: 9
-
-
-
-

Routine analyses -

-

For routine analyses, the available pre-treatment elements can retreived using:

-
-preTreatmentElements()
-#> [1] "aggregate"       "correction"      "impute"          "keep"           
-#> [5] "occupancyFilter" "QC"              "remove"          "transform"
-

The available methods for a specified pre-treatment element can be viewed using:

-
-preTreatmentMethods('remove')
-#> [1] "classes"  "features" "samples"
-

The default pre-treatment parameters can first be assigned to the variable p.

-
-p <- analysisParameters('pre-treatment')
-

The preTreatmentParameters() function allows the parameters for particular pre-treatment elements to be specified. The following specifies the pre-treatment elements that will be used for this data set. These will include the keeping of certain sample classes, the filtering of features based on class occupancy and the application of a TIC normalisation. These will be assigned to the p variable using the parameters() method.

-
-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
-    keep = 'classes',
-    occupancyFilter = 'maximum',
-    transform = 'TICnorm' 
-  )
-)
-

Printing p shows these pre-treatment steps.

-
-print(p)
-#> Parameters:
-#> pre-treatment
-#>  keep
-#>      classes
-#>          cls = class
-#>          classes = c()
-#>  occupancyFilter
-#>      maximum
-#>          cls = class
-#>          occupancy = 2/3
-#>  transform
-#>      TICnorm
-

Next, the day sample information column can be specified, along with the classes to be kept which will be the H, the 1 and the 2 classes.

-
-changeParameter(p,'cls') <- 'day'
-changeParameter(p,'classes') <- c('H','1','2')
-

Printing p shows the final pre-treatment parameters that will be used for this analysis.

-
-print(p)
-#> Parameters:
-#> pre-treatment
-#>  keep
-#>      classes
-#>          cls = day
-#>          classes = c("H", "1", "2")
-#>  occupancyFilter
-#>      maximum
-#>          cls = day
-#>          occupancy = 2/3
-#>  transform
-#>      TICnorm
-

The pre-treatment routine can then be executed.

-
analysis <- metabolyse(abr1$neg,abr1$fact,p)
-#> 
[34m
-#> metabolyseR 
[39m 
[31mv0.14.9
[39m Thu Jan 27 12:03:18 2022
-#> ________________________________________________________________________________
-#> 
[33m
[33mParameters:
[33m
[39m
-#> pre-treatment
-#>  keep
-#>      classes
-#>          cls = day
-#>          classes = c("H", "1", "2")
-#>  occupancyFilter
-#>      maximum
-#>          cls = day
-#>          occupancy = 2/3
-#>  transform
-#>      TICnorm
-#> ________________________________________________________________________________
-#> 
[34mPre-treatment 
[39m…
-
-
[34mPre-treatment 
[39m    
[32m✓
[39m [6.5S]
-#> ________________________________________________________________________________
-#> 
-#> 
[32mComplete! 
[39m[6.5S]
-

Printing the analysis object shows the resulting data from the pre-treatment routine.

-
-print(analysis)
-#> 
-#> metabolyseR v0.14.9
-#> Analysis:
-#>     Thu Jan 27 12:03:18 2022
-#> 
-#>  Raw Data:
-#>      No. samples = 120
-#>      No. features = 2000
-#> 
-#>  Pre-treated Data:
-#>      Thu Jan 27 12:03:24 2022
-#>      No. samples = 60
-#>      No. features = 1723
-

The pre-treated data can be extracted from the Analysis object using several methods.

-

Firstly the analysisResults() method.

-
-analysisResults(analysis,'pre-treatment')
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 60 
-#> Features: 1723 
-#> Info: 9
-

And secondly the preTreated() method.

-
-preTreated(analysis)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 60 
-#> Features: 1723 
-#> Info: 9
-

A supervised random forest analysis can be used to visualise the structure of the resulting pre-treated data.

-
-analysis %>%
-  plotSupervisedRF(cls = 'day',type = 'pre-treated')
-

-
-
- - - -
- - - - -
- - - - - - - - diff --git a/docs/articles/pre_treatment_files/figure-html/QC_occupancy_rsd-1.png b/docs/articles/pre_treatment_files/figure-html/QC_occupancy_rsd-1.png deleted file mode 100644 index 25e57b3a..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/QC_occupancy_rsd-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/QC_rsd_plot-1.png b/docs/articles/pre_treatment_files/figure-html/QC_rsd_plot-1.png deleted file mode 100644 index ad181fd3..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/QC_rsd_plot-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/TICnorm_RF-1.png b/docs/articles/pre_treatment_files/figure-html/TICnorm_RF-1.png deleted file mode 100644 index ef10ac83..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/TICnorm_RF-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/corrected-TIC plot-1.png b/docs/articles/pre_treatment_files/figure-html/corrected-TIC plot-1.png deleted file mode 100644 index ab117ecb..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/corrected-TIC plot-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/day_TICs-1.png b/docs/articles/pre_treatment_files/figure-html/day_TICs-1.png deleted file mode 100644 index 3375cdfb..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/day_TICs-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/day_mean_pca-1.png b/docs/articles/pre_treatment_files/figure-html/day_mean_pca-1.png deleted file mode 100644 index d0dece15..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/day_mean_pca-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/impute_all_lda-1.png b/docs/articles/pre_treatment_files/figure-html/impute_all_lda-1.png deleted file mode 100644 index b5bf7242..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/impute_all_lda-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/imputed_class_lda-1.png b/docs/articles/pre_treatment_files/figure-html/imputed_class_lda-1.png deleted file mode 100644 index 45fc8fd7..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/imputed_class_lda-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/lda-1.png b/docs/articles/pre_treatment_files/figure-html/lda-1.png deleted file mode 100644 index 47d29d86..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/lda-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/log10_RF-1.png b/docs/articles/pre_treatment_files/figure-html/log10_RF-1.png deleted file mode 100644 index 8337d02c..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/log10_RF-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/pca-1.png b/docs/articles/pre_treatment_files/figure-html/pca-1.png deleted file mode 100644 index 42e1aa81..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/pca-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/plot_filtered_occupancy-1.png b/docs/articles/pre_treatment_files/figure-html/plot_filtered_occupancy-1.png deleted file mode 100644 index 244cf3a3..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/plot_filtered_occupancy-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/plot_occupancy-1.png b/docs/articles/pre_treatment_files/figure-html/plot_occupancy-1.png deleted file mode 100644 index c9b8d217..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/plot_occupancy-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/supervised-rf-1.png b/docs/articles/pre_treatment_files/figure-html/supervised-rf-1.png deleted file mode 100644 index 33e3124a..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/supervised-rf-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/figure-html/transform_RF-1.png b/docs/articles/pre_treatment_files/figure-html/transform_RF-1.png deleted file mode 100644 index a9201b9c..00000000 Binary files a/docs/articles/pre_treatment_files/figure-html/transform_RF-1.png and /dev/null differ diff --git a/docs/articles/pre_treatment_files/header-attrs-2.10/header-attrs.js b/docs/articles/pre_treatment_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/pre_treatment_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/pre_treatment_files/header-attrs-2.11/header-attrs.js b/docs/articles/pre_treatment_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/pre_treatment_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/pre_treatment_files/header-attrs-2.7/header-attrs.js b/docs/articles/pre_treatment_files/header-attrs-2.7/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/pre_treatment_files/header-attrs-2.7/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/quick_start.html b/docs/articles/quick_start.html deleted file mode 100644 index 00d1ac5c..00000000 --- a/docs/articles/quick_start.html +++ /dev/null @@ -1,216 +0,0 @@ - - - - - - - -Quick start example analysis • metabolyseR - - - - - - - - - - - - -
-
- - - - -
-
- - - - -

This example analysis will use the abr1 data set from the metaboData package. It is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The analysis will also include use of the pipe %>% from the magrittr package. First load the necessary packages.

- -

For this example we will use only the negative acquisition mode data (abr1$neg) and sample meta-information (abr1$fact). Create an AnalysisData class object using the following:

-
-d <- analysisData(abr1$neg,abr1$fact)
-

The data includes 120 samples and 2000 mass spectral features as shown below.

-
-d
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 2000 
-#> Info: 9
-

The clsAvailable() function can be used to identify the columns available in our meta-information table.

-
-clsAvailable(d)
-#> [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
-#> [8] "day"      "class"
-

For this analysis, we will be using the infection time course class information contained in the day column. This can be extracted and the class frequencies tabulated using the following:

-
-d %>%
-  clsExtract(cls = 'day') %>%
-  table()
-#> .
-#>  1  2  3  4  5  H 
-#> 20 20 20 20 20 20
-

As can be seen above, the experiment is made up of six infection time point classes that includes a healthy control class (H) and five day infection time points (1-5), each with 20 replicates.

-

For data pre-treatment prior to statistical analysis, a two-thirds maximum class occupancy filter can be applied. Features where the maximum proportion of non-missing data per class is above two-thirds are retained. A total ion count normalisation will also be applied.

-
-d <- d %>%
-  occupancyMaximum(cls = 'day', occupancy = 2/3) %>%
-  transformTICnorm()
-
-d
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 1760 
-#> Info: 9
-

This has reduced the data set to 1760 relevant features.

-

The structure of the data can be visualised using both unsupervised and supervised methods. For instance, the first two principle components from a principle component analysis (PCA) of the data with the sample points coloured by infection class can be plotted using:

-
-plotPCA(d,cls = 'day',xAxis = 'PC1',yAxis = 'PC2')
-

-

And similarly, multidimensional scaling (MDS) of sample proximity values from a supervised random forest classification model along with receiver operator characteristic (ROC) curves.

-
-plotSupervisedRF(d,cls = 'day')
-

-

A progression can clearly be seen from the earliest to latest infected time points.

-

For feature selection, one-way analysis of variance (ANOVA) can be performed for each feature to identify features significantly explanatory for the infection time point.

-
-anova_results <- d %>%
-  anova(cls = 'day')
-

A table of the significantly explanatory features can be extracted with a bonferroni correction adjusted p value < 0.05 using:

-
-explan_feat <- explanatoryFeatures(anova_results,threshold = 0.05)
-
-explan_feat
-#> # A tibble: 379 × 10
-#>    Response Comparison  Feature term       df   sumsq  meansq statistic  p.value
-#>    <chr>    <chr>       <chr>   <chr>   <dbl>   <dbl>   <dbl>     <dbl>    <dbl>
-#>  1 day      1~2~3~4~5~H N341    respon…     5 3.88e-4 7.76e-5     137.  1.55e-46
-#>  2 day      1~2~3~4~5~H N133    respon…     5 7.00e-5 1.40e-5     126.  8.63e-45
-#>  3 day      1~2~3~4~5~H N163    respon…     5 6.01e-5 1.20e-5     117.  2.95e-43
-#>  4 day      1~2~3~4~5~H N1087   respon…     5 2.42e-6 4.84e-7      99.8 5.61e-40
-#>  5 day      1~2~3~4~5~H N171    respon…     5 2.25e-7 4.50e-8      95.7 3.84e-39
-#>  6 day      1~2~3~4~5~H N513    respon…     5 3.38e-6 6.76e-7      95.3 4.78e-39
-#>  7 day      1~2~3~4~5~H N1025   respon…     5 2.78e-6 5.56e-7      91.0 3.91e-38
-#>  8 day      1~2~3~4~5~H N342    respon…     5 3.71e-6 7.41e-7      90.3 5.32e-38
-#>  9 day      1~2~3~4~5~H N1083   respon…     5 5.11e-5 1.02e-5      89.0 1.06e-37
-#> 10 day      1~2~3~4~5~H N1085   respon…     5 1.10e-5 2.19e-6      83.4 1.92e-36
-#> # … with 369 more rows, and 1 more variable: adjusted.p.value <dbl>
-

The ANOVA has identified 379 features significantly explanatory over the infection time course. A heat map of the mean relative intensity for each class of these explanatory features can be plotted to visualise their trends between the infection time point classes.

-
-plotExplanatoryHeatmap(anova_results,
-                       threshold = 0.05,
-                       featureNames = FALSE)
-

-

Many of the explanatory features can be seen to be most highly abundant in the final infection time point 5.

-

Finally, box plots of the trends of individual features can be plotted, such as the N341 feature below.

-
-plotFeature(anova_results,feature = 'N341',cls = 'day')
-

-
- - - -
- - - - -
- - - - - - - - diff --git a/docs/articles/quick_start_files/figure-html/feature_plot-1.png b/docs/articles/quick_start_files/figure-html/feature_plot-1.png deleted file mode 100644 index f9b45610..00000000 Binary files a/docs/articles/quick_start_files/figure-html/feature_plot-1.png and /dev/null differ diff --git a/docs/articles/quick_start_files/figure-html/pca-1.png b/docs/articles/quick_start_files/figure-html/pca-1.png deleted file mode 100644 index d34e0c67..00000000 Binary files a/docs/articles/quick_start_files/figure-html/pca-1.png and /dev/null differ diff --git a/docs/articles/quick_start_files/figure-html/rf_heatmap-1.png b/docs/articles/quick_start_files/figure-html/rf_heatmap-1.png deleted file mode 100644 index fb5dd9be..00000000 Binary files a/docs/articles/quick_start_files/figure-html/rf_heatmap-1.png and /dev/null differ diff --git a/docs/articles/quick_start_files/figure-html/supervised_RF-1.png b/docs/articles/quick_start_files/figure-html/supervised_RF-1.png deleted file mode 100644 index 397c5b51..00000000 Binary files a/docs/articles/quick_start_files/figure-html/supervised_RF-1.png and /dev/null differ diff --git a/docs/articles/quick_start_files/header-attrs-2.10/header-attrs.js b/docs/articles/quick_start_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/quick_start_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/quick_start_files/header-attrs-2.11/header-attrs.js b/docs/articles/quick_start_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/quick_start_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/quick_start_files/header-attrs-2.7/header-attrs.js b/docs/articles/quick_start_files/header-attrs-2.7/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/quick_start_files/header-attrs-2.7/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/authors.html b/docs/authors.html deleted file mode 100644 index c5036eb5..00000000 --- a/docs/authors.html +++ /dev/null @@ -1,114 +0,0 @@ - -Authors and Citation • metabolyseR - - -
-
- - - -
-
-
- - - -
  • -

    Jasen Finch. Author, maintainer. -

    -
  • -
-
-
-

Citation

- Source: DESCRIPTION -
-
- - -

Finch J (2022). -metabolyseR: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data. -R package version 0.14.9, https://jasenfinch.github.io/metabolyseR. -

-
@Manual{,
-  title = {metabolyseR: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data},
-  author = {Jasen Finch},
-  year = {2022},
-  note = {R package version 0.14.9},
-  url = {https://jasenfinch.github.io/metabolyseR},
-}
- -
- -
- - - -
- - - - - - - - diff --git a/docs/bootstrap-toc.css b/docs/bootstrap-toc.css deleted file mode 100644 index 5a859415..00000000 --- a/docs/bootstrap-toc.css +++ /dev/null @@ -1,60 +0,0 @@ -/*! - * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) - * Copyright 2015 Aidan Feldman - * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ - -/* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ - -/* All levels of nav */ -nav[data-toggle='toc'] .nav > li > a { - display: block; - padding: 4px 20px; - font-size: 13px; - font-weight: 500; - color: #767676; -} -nav[data-toggle='toc'] .nav > li > a:hover, -nav[data-toggle='toc'] .nav > li > a:focus { - padding-left: 19px; - color: #563d7c; - text-decoration: none; - background-color: transparent; - border-left: 1px solid #563d7c; -} -nav[data-toggle='toc'] .nav > .active > a, -nav[data-toggle='toc'] .nav > .active:hover > a, -nav[data-toggle='toc'] .nav > .active:focus > a { - padding-left: 18px; - font-weight: bold; - color: #563d7c; - background-color: transparent; - border-left: 2px solid #563d7c; -} - -/* Nav: second level (shown on .active) */ -nav[data-toggle='toc'] .nav .nav { - display: none; /* Hide by default, but at >768px, show it */ - padding-bottom: 10px; -} -nav[data-toggle='toc'] .nav .nav > li > a { - padding-top: 1px; - padding-bottom: 1px; - padding-left: 30px; - font-size: 12px; - font-weight: normal; -} -nav[data-toggle='toc'] .nav .nav > li > a:hover, -nav[data-toggle='toc'] .nav .nav > li > a:focus { - padding-left: 29px; -} -nav[data-toggle='toc'] .nav .nav > .active > a, -nav[data-toggle='toc'] .nav .nav > .active:hover > a, -nav[data-toggle='toc'] .nav .nav > .active:focus > a { - padding-left: 28px; - font-weight: 500; -} - -/* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ -nav[data-toggle='toc'] .nav > .active > ul { - display: block; -} diff --git a/docs/bootstrap-toc.js b/docs/bootstrap-toc.js deleted file mode 100644 index 1cdd573b..00000000 --- a/docs/bootstrap-toc.js +++ /dev/null @@ -1,159 +0,0 @@ -/*! - * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) - * Copyright 2015 Aidan Feldman - * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ -(function() { - 'use strict'; - - window.Toc = { - helpers: { - // return all matching elements in the set, or their descendants - findOrFilter: function($el, selector) { - // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ - // http://stackoverflow.com/a/12731439/358804 - var $descendants = $el.find(selector); - return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); - }, - - generateUniqueIdBase: function(el) { - var text = $(el).text(); - var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); - return anchor || el.tagName.toLowerCase(); - }, - - generateUniqueId: function(el) { - var anchorBase = this.generateUniqueIdBase(el); - for (var i = 0; ; i++) { - var anchor = anchorBase; - if (i > 0) { - // add suffix - anchor += '-' + i; - } - // check if ID already exists - if (!document.getElementById(anchor)) { - return anchor; - } - } - }, - - generateAnchor: function(el) { - if (el.id) { - return el.id; - } else { - var anchor = this.generateUniqueId(el); - el.id = anchor; - return anchor; - } - }, - - createNavList: function() { - return $(''); - }, - - createChildNavList: function($parent) { - var $childList = this.createNavList(); - $parent.append($childList); - return $childList; - }, - - generateNavEl: function(anchor, text) { - var $a = $(''); - $a.attr('href', '#' + anchor); - $a.text(text); - var $li = $('
  • '); - $li.append($a); - return $li; - }, - - generateNavItem: function(headingEl) { - var anchor = this.generateAnchor(headingEl); - var $heading = $(headingEl); - var text = $heading.data('toc-text') || $heading.text(); - return this.generateNavEl(anchor, text); - }, - - // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). - getTopLevel: function($scope) { - for (var i = 1; i <= 6; i++) { - var $headings = this.findOrFilter($scope, 'h' + i); - if ($headings.length > 1) { - return i; - } - } - - return 1; - }, - - // returns the elements for the top level, and the next below it - getHeadings: function($scope, topLevel) { - var topSelector = 'h' + topLevel; - - var secondaryLevel = topLevel + 1; - var secondarySelector = 'h' + secondaryLevel; - - return this.findOrFilter($scope, topSelector + ',' + secondarySelector); - }, - - getNavLevel: function(el) { - return parseInt(el.tagName.charAt(1), 10); - }, - - populateNav: function($topContext, topLevel, $headings) { - var $context = $topContext; - var $prevNav; - - var helpers = this; - $headings.each(function(i, el) { - var $newNav = helpers.generateNavItem(el); - var navLevel = helpers.getNavLevel(el); - - // determine the proper $context - if (navLevel === topLevel) { - // use top level - $context = $topContext; - } else if ($prevNav && $context === $topContext) { - // create a new level of the tree and switch to it - $context = helpers.createChildNavList($prevNav); - } // else use the current $context - - $context.append($newNav); - - $prevNav = $newNav; - }); - }, - - parseOps: function(arg) { - var opts; - if (arg.jquery) { - opts = { - $nav: arg - }; - } else { - opts = arg; - } - opts.$scope = opts.$scope || $(document.body); - return opts; - } - }, - - // accepts a jQuery object, or an options object - init: function(opts) { - opts = this.helpers.parseOps(opts); - - // ensure that the data attribute is in place for styling - opts.$nav.attr('data-toggle', 'toc'); - - var $topContext = this.helpers.createChildNavList(opts.$nav); - var topLevel = this.helpers.getTopLevel(opts.$scope); - var $headings = this.helpers.getHeadings(opts.$scope, topLevel); - this.helpers.populateNav($topContext, topLevel, $headings); - } - }; - - $(function() { - $('nav[data-toggle="toc"]').each(function(i, el) { - var $nav = $(el); - Toc.init($nav); - }); - }); -})(); diff --git a/docs/docsearch.css b/docs/docsearch.css deleted file mode 100644 index e5f1fe1d..00000000 --- a/docs/docsearch.css +++ /dev/null @@ -1,148 +0,0 @@ -/* Docsearch -------------------------------------------------------------- */ -/* - Source: https://github.com/algolia/docsearch/ - License: MIT -*/ - -.algolia-autocomplete { - display: block; - -webkit-box-flex: 1; - -ms-flex: 1; - flex: 1 -} - -.algolia-autocomplete .ds-dropdown-menu { - width: 100%; - min-width: none; - max-width: none; - padding: .75rem 0; - background-color: #fff; - background-clip: padding-box; - border: 1px solid rgba(0, 0, 0, .1); - box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); -} - -@media (min-width:768px) { - .algolia-autocomplete .ds-dropdown-menu { - width: 175% - } -} - -.algolia-autocomplete .ds-dropdown-menu::before { - display: none -} - -.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { - padding: 0; - background-color: rgb(255,255,255); - border: 0; - max-height: 80vh; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { - margin-top: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion { - padding: 0; - overflow: visible -} - -.algolia-autocomplete .algolia-docsearch-suggestion--category-header { - padding: .125rem 1rem; - margin-top: 0; - font-size: 1.3em; - font-weight: 500; - color: #00008B; - border-bottom: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { - float: none; - padding-top: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { - float: none; - width: auto; - padding: 0; - text-align: left -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content { - float: none; - width: auto; - padding: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content::before { - display: none -} - -.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { - padding-top: .75rem; - margin-top: .75rem; - border-top: 1px solid rgba(0, 0, 0, .1) -} - -.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { - display: block; - padding: .1rem 1rem; - margin-bottom: 0.1; - font-size: 1.0em; - font-weight: 400 - /* display: none */ -} - -.algolia-autocomplete .algolia-docsearch-suggestion--title { - display: block; - padding: .25rem 1rem; - margin-bottom: 0; - font-size: 0.9em; - font-weight: 400 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--text { - padding: 0 1rem .5rem; - margin-top: -.25rem; - font-size: 0.8em; - font-weight: 400; - line-height: 1.25 -} - -.algolia-autocomplete .algolia-docsearch-footer { - width: 110px; - height: 20px; - z-index: 3; - margin-top: 10.66667px; - float: right; - font-size: 0; - line-height: 0; -} - -.algolia-autocomplete .algolia-docsearch-footer--logo { - background-image: url("data:image/svg+xml;utf8,"); - background-repeat: no-repeat; - background-position: 50%; - background-size: 100%; - overflow: hidden; - text-indent: -9000px; - width: 100%; - height: 100%; - display: block; - transform: translate(-8px); -} - -.algolia-autocomplete .algolia-docsearch-suggestion--highlight { - color: #FF8C00; - background: rgba(232, 189, 54, 0.1) -} - - -.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { - box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) -} - -.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { - background-color: rgba(192, 192, 192, .15) -} diff --git a/docs/docsearch.js b/docs/docsearch.js deleted file mode 100644 index b35504cd..00000000 --- a/docs/docsearch.js +++ /dev/null @@ -1,85 +0,0 @@ -$(function() { - - // register a handler to move the focus to the search bar - // upon pressing shift + "/" (i.e. "?") - $(document).on('keydown', function(e) { - if (e.shiftKey && e.keyCode == 191) { - e.preventDefault(); - $("#search-input").focus(); - } - }); - - $(document).ready(function() { - // do keyword highlighting - /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ - var mark = function() { - - var referrer = document.URL ; - var paramKey = "q" ; - - if (referrer.indexOf("?") !== -1) { - var qs = referrer.substr(referrer.indexOf('?') + 1); - var qs_noanchor = qs.split('#')[0]; - var qsa = qs_noanchor.split('&'); - var keyword = ""; - - for (var i = 0; i < qsa.length; i++) { - var currentParam = qsa[i].split('='); - - if (currentParam.length !== 2) { - continue; - } - - if (currentParam[0] == paramKey) { - keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); - } - } - - if (keyword !== "") { - $(".contents").unmark({ - done: function() { - $(".contents").mark(keyword); - } - }); - } - } - }; - - mark(); - }); -}); - -/* Search term highlighting ------------------------------*/ - -function matchedWords(hit) { - var words = []; - - var hierarchy = hit._highlightResult.hierarchy; - // loop to fetch from lvl0, lvl1, etc. - for (var idx in hierarchy) { - words = words.concat(hierarchy[idx].matchedWords); - } - - var content = hit._highlightResult.content; - if (content) { - words = words.concat(content.matchedWords); - } - - // return unique words - var words_uniq = [...new Set(words)]; - return words_uniq; -} - -function updateHitURL(hit) { - - var words = matchedWords(hit); - var url = ""; - - if (hit.anchor) { - url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; - } else { - url = hit.url + '?q=' + escape(words.join(" ")); - } - - return url; -} diff --git a/docs/index.html b/docs/index.html deleted file mode 100644 index 9acbeb07..00000000 --- a/docs/index.html +++ /dev/null @@ -1,278 +0,0 @@ - - - - - - - -Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data • metabolyseR - - - - - - - - - - - - -
    -
    - - - - -
    -
    -
    - - - -
    -

    A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data.

    -
    -
    -

    Overview -

    -

    This package provides a tool kit of methods for metabolomics analyses that includes:

    -
      -
    • data pre-treatment
    • -
    • multivariate and univariate modelling/data mining techniques
    • -
    • correlation analysis
    • -
    -
    -
    -

    Installation -

    -

    The metabolyseR package can be installed from GitHub using the following:

    -
    -devtools::install_github('jasenfinch/metabolyseR',build_vignettes = TRUE)
    -
    -
    -

    Learn more -

    -

    The package documentation can be browsed online at https://jasenfinch.github.io/metabolyseR/.

    -

    If this is your first time using metabolyseR see the Introduction vignette or the quick start analysis below for information on how to get started.

    -

    If you believe you’ve found a bug in metabolyseR, please file a bug (and, if possible, a reproducible example) at https://github.com/jasenfinch/metabolyseR/issues.

    -
    -
    -

    Quick start example analysis -

    -

    This example analysis will use the abr1 data set from the metaboData package. It is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The analysis will also include use of the pipe %>% from the magrittr package. First load the necessary packages.

    - -

    For this example we will use only the negative acquisition mode data (abr1$neg) and sample meta-information (abr1$fact). Create an AnalysisData class object using the following:

    -
    -d <- analysisData(abr1$neg,abr1$fact)
    -

    The data includes 120 samples and 2000 mass spectral features as shown below.

    -
    -d
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 120 
    -#> Features: 2000 
    -#> Info: 9
    -

    The clsAvailable() function can be used to identify the columns available in our meta-information table.

    -
    -clsAvailable(d)
    -#> [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
    -#> [8] "day"      "class"
    -

    For this analysis, we will be using the infection time course class information contained in the day column. This can be extracted and the class frequencies tabulated using the following:

    -
    -d %>%
    -  clsExtract(cls = 'day') %>%
    -  table()
    -#> .
    -#>  1  2  3  4  5  H 
    -#> 20 20 20 20 20 20
    -

    As can be seen above, the experiment is made up of six infection time point classes that includes a healthy control class (H) and five day infection time points (1-5), each with 20 replicates.

    -

    For data pre-treatment prior to statistical analysis, a two-thirds maximum class occupancy filter can be applied. Features where the maximum proportion of non-missing data per class is above two-thirds are retained. A total ion count normalisation will also be applied.

    -
    -d <- d %>%
    -  occupancyMaximum(cls = 'day', occupancy = 2/3) %>%
    -  transformTICnorm()
    -
    -d
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 120 
    -#> Features: 1760 
    -#> Info: 9
    -

    This has reduced the data set to 1760 relevant features.

    -

    The structure of the data can be visualised using both unsupervised and supervised methods. For instance, the first two principle components from a principle component analysis (PCA) of the data with the sample points coloured by infection class can be plotted using:

    -
    -plotPCA(d,cls = 'day',xAxis = 'PC1',yAxis = 'PC2')
    -

    -

    And similarly, multidimensional scaling (MDS) of sample proximity values from a supervised random forest classification model along with receiver operator characteristic (ROC) curves.

    -
    -plotSupervisedRF(d,cls = 'day')
    -

    -

    A progression can clearly be seen from the earliest to latest infected time points.

    -

    For feature selection, one-way analysis of variance (ANOVA) can be performed for each feature to identify features significantly explanatory for the infection time point.

    -
    -anova_results <- d %>%
    -  anova(cls = 'day')
    -

    A table of the significantly explanatory features can be extracted with a bonferroni correction adjusted p value < 0.05 using:

    -
    -explan_feat <- explanatoryFeatures(anova_results,threshold = 0.05)
    -
    -explan_feat
    -#> # A tibble: 379 × 10
    -#>    Response Comparison  Feature term      df    sumsq  meansq statistic  p.value
    -#>    <chr>    <chr>       <chr>   <chr>  <dbl>    <dbl>   <dbl>     <dbl>    <dbl>
    -#>  1 day      1~2~3~4~5~H N341    respo…     5  3.88e-4 7.76e-5     137.  1.55e-46
    -#>  2 day      1~2~3~4~5~H N133    respo…     5  7.00e-5 1.40e-5     126.  8.63e-45
    -#>  3 day      1~2~3~4~5~H N163    respo…     5  6.01e-5 1.20e-5     117.  2.95e-43
    -#>  4 day      1~2~3~4~5~H N1087   respo…     5  2.42e-6 4.84e-7      99.8 5.61e-40
    -#>  5 day      1~2~3~4~5~H N171    respo…     5  2.25e-7 4.50e-8      95.7 3.84e-39
    -#>  6 day      1~2~3~4~5~H N513    respo…     5  3.38e-6 6.76e-7      95.3 4.78e-39
    -#>  7 day      1~2~3~4~5~H N1025   respo…     5  2.78e-6 5.56e-7      91.0 3.91e-38
    -#>  8 day      1~2~3~4~5~H N342    respo…     5  3.71e-6 7.41e-7      90.3 5.32e-38
    -#>  9 day      1~2~3~4~5~H N1083   respo…     5  5.11e-5 1.02e-5      89.0 1.06e-37
    -#> 10 day      1~2~3~4~5~H N1085   respo…     5  1.10e-5 2.19e-6      83.4 1.92e-36
    -#> # … with 369 more rows, and 1 more variable: adjusted.p.value <dbl>
    -

    The ANOVA has identified 379 features significantly explanatory over the infection time course. A heat map of the mean relative intensity for each class of these explanatory features can be plotted to visualise their trends between the infection time point classes.

    -
    -plotExplanatoryHeatmap(anova_results,
    -                       threshold = 0.05,
    -                       featureNames = FALSE)
    -

    -

    Many of the explanatory features can be seen to be most highly abundant in the final infection time point 5.

    -

    Finally, box plots of the trends of individual features can be plotted, such as the N341 feature below.

    -
    -plotFeature(anova_results,feature = 'N341',cls = 'day')
    -

    -
    -
    -
    - - -
    - - -
    - -
    -

    -

    Site built with pkgdown 2.0.2.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/link.svg b/docs/link.svg deleted file mode 100644 index 88ad8276..00000000 --- a/docs/link.svg +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - diff --git a/docs/news/index.html b/docs/news/index.html deleted file mode 100644 index 987df62e..00000000 --- a/docs/news/index.html +++ /dev/null @@ -1,171 +0,0 @@ - -Changelog • metabolyseR - - -
    -
    - - - -
    -
    - - -
    - -
    • Suppressed name repair console message encountered during random forest permutation testing.

    • -
    • Added the proximity() method for extracting sample proximities from the RandomForest S4 class.

    • -
    • Added the mds() method to perform multidimensional scaling on sample proximities from the RandomForest S4 class.

    • -
    • Added the roc() method to calculate receiver-operator characteristic curves from the RandomForest S4 class.

    • -
    -
    - -
    • An error is now thrown during random forest classification when less than two classes are specified.

    • -
    • plotSupervisedRF() now skips plotting if errors are encountered during random forest training.

    • -
    -
    - -
    • Single replicate classes now automatically removed by plotLDA().
    • -
    -
    - -
    -
    - -
    • Correlation analysis results now include an absolute correlation coefficient column by which the results are also arranged in descending order.
    • -
    -
    - -
    -
    - -
    -
    - -
    • Package version, creation date and verbose argument added to prototype of Analysis class.

    • -
    • All generics are now defined as standard generics.

    • -
    • Added metrics method for Analysis class.

    • -
    • metrics method for lists now ignores list elements that are not of class RandomForest.

    • -
    -
    - -
    • Changed the RSDthresh argument default to 50% instead of 0.5% in QCrsdFilter generic.
    • -
    -
    - -
    • Added a NEWS.md file to track changes to the package.

    • -
    • pkgdown site now available at https://jasenfinch.github.io/metabolyseR/.

    • -
    • Bug reports and issues URL at https://github.com/jasenfinch/metabolyseR/issues added to package DESCRIPTION.

    • -
    • Dedicated vignettes now available for a quick start example analysis, data pre-treatment and data modelling.

    • -
    • Function examples added to all documentation pages.

    • -
    • Unit test coverage increased to > 95%.

    • -
    • Parallel processing is now implemented using the future package.

    • -
    • plan() from the future package is re-exported.

    • -
    • RandomForest and Univariate classes now inherit from class the AnalysisData class.

    • -
    • Improvements to plot theme aesthetics.

    • -
    • type argument added to plotPCA(), plotLDA(), plotUnsupervisedRF() and plotSupervisedRF() methods for the Analysis class.

    • -
    • "pre-treated" for specifying type argument in Analysis class methods now used over "preTreated"

    • -
    • Added clsRename() method for renaming class information columns.

    • -
    • plotMeasures() method renamed to plotMetrics().

    • -
    • Added plotMDS(), plotImportance() and plotMetrics() methods for lists of RandomForest class objects.

    • -
    • Added plotExplanatoryHeatmap() method for lists of RandomForest or Univariate class objects.

    • -
    • Renamed keepVariables() and removeVariables() methods to keepFeatures() and removeFeatures().

    • -
    • Added the helper functions preTreatmentElements(), preTreatmentMethods() and preTreatParameters() for declaring pre-treatment parameters for the AnalysisParameters class.

    • -
    • Added the helper functions modellingMethods() and modellingParameters() for declaring modelling parameters for the AnalysisParameters class.

    • -
    • Added helper function correlationsParameters() for declaring correlations parameters for the AnalysisParameters class.

    • -
    • Added binaryComparisons() method for retrieving all possible binary class comparisons from an AnalysisData class object.

    • -
    • changeParameter() now assigns parameter values through direct assignment.

    • -
    • Added analysisResults() method from extracting analysis elements results from the Analysis class.

    • -
    • Added exportParameters() method for exporting analysis parameters to YAML file format.

    • -
    • Added dat() and sinfo() accessor methods for the Analysis class.

    • -
    • Relative standard deviation (RSD) values are now specified and returned as percentages.

    • -
    -
    - - - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/pkgdown.css b/docs/pkgdown.css deleted file mode 100644 index 80ea5b83..00000000 --- a/docs/pkgdown.css +++ /dev/null @@ -1,384 +0,0 @@ -/* Sticky footer */ - -/** - * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ - * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css - * - * .Site -> body > .container - * .Site-content -> body > .container .row - * .footer -> footer - * - * Key idea seems to be to ensure that .container and __all its parents__ - * have height set to 100% - * - */ - -html, body { - height: 100%; -} - -body { - position: relative; -} - -body > .container { - display: flex; - height: 100%; - flex-direction: column; -} - -body > .container .row { - flex: 1 0 auto; -} - -footer { - margin-top: 45px; - padding: 35px 0 36px; - border-top: 1px solid #e5e5e5; - color: #666; - display: flex; - flex-shrink: 0; -} -footer p { - margin-bottom: 0; -} -footer div { - flex: 1; -} -footer .pkgdown { - text-align: right; -} -footer p { - margin-bottom: 0; -} - -img.icon { - float: right; -} - -/* Ensure in-page images don't run outside their container */ -.contents img { - max-width: 100%; - height: auto; -} - -/* Fix bug in bootstrap (only seen in firefox) */ -summary { - display: list-item; -} - -/* Typographic tweaking ---------------------------------*/ - -.contents .page-header { - margin-top: calc(-60px + 1em); -} - -dd { - margin-left: 3em; -} - -/* Section anchors ---------------------------------*/ - -a.anchor { - display: none; - margin-left: 5px; - width: 20px; - height: 20px; - - background-image: url(./link.svg); - background-repeat: no-repeat; - background-size: 20px 20px; - background-position: center center; -} - -h1:hover .anchor, -h2:hover .anchor, -h3:hover .anchor, -h4:hover .anchor, -h5:hover .anchor, -h6:hover .anchor { - display: inline-block; -} - -/* Fixes for fixed navbar --------------------------*/ - -.contents h1, .contents h2, .contents h3, .contents h4 { - padding-top: 60px; - margin-top: -40px; -} - -/* Navbar submenu --------------------------*/ - -.dropdown-submenu { - position: relative; -} - -.dropdown-submenu>.dropdown-menu { - top: 0; - left: 100%; - margin-top: -6px; - margin-left: -1px; - border-radius: 0 6px 6px 6px; -} - -.dropdown-submenu:hover>.dropdown-menu { - display: block; -} - -.dropdown-submenu>a:after { - display: block; - content: " "; - float: right; - width: 0; - height: 0; - border-color: transparent; - border-style: solid; - border-width: 5px 0 5px 5px; - border-left-color: #cccccc; - margin-top: 5px; - margin-right: -10px; -} - -.dropdown-submenu:hover>a:after { - border-left-color: #ffffff; -} - -.dropdown-submenu.pull-left { - float: none; -} - -.dropdown-submenu.pull-left>.dropdown-menu { - left: -100%; - margin-left: 10px; - border-radius: 6px 0 6px 6px; -} - -/* Sidebar --------------------------*/ - -#pkgdown-sidebar { - margin-top: 30px; - position: -webkit-sticky; - position: sticky; - top: 70px; -} - -#pkgdown-sidebar h2 { - font-size: 1.5em; - margin-top: 1em; -} - -#pkgdown-sidebar h2:first-child { - margin-top: 0; -} - -#pkgdown-sidebar .list-unstyled li { - margin-bottom: 0.5em; -} - -/* bootstrap-toc tweaks ------------------------------------------------------*/ - -/* All levels of nav */ - -nav[data-toggle='toc'] .nav > li > a { - padding: 4px 20px 4px 6px; - font-size: 1.5rem; - font-weight: 400; - color: inherit; -} - -nav[data-toggle='toc'] .nav > li > a:hover, -nav[data-toggle='toc'] .nav > li > a:focus { - padding-left: 5px; - color: inherit; - border-left: 1px solid #878787; -} - -nav[data-toggle='toc'] .nav > .active > a, -nav[data-toggle='toc'] .nav > .active:hover > a, -nav[data-toggle='toc'] .nav > .active:focus > a { - padding-left: 5px; - font-size: 1.5rem; - font-weight: 400; - color: inherit; - border-left: 2px solid #878787; -} - -/* Nav: second level (shown on .active) */ - -nav[data-toggle='toc'] .nav .nav { - display: none; /* Hide by default, but at >768px, show it */ - padding-bottom: 10px; -} - -nav[data-toggle='toc'] .nav .nav > li > a { - padding-left: 16px; - font-size: 1.35rem; -} - -nav[data-toggle='toc'] .nav .nav > li > a:hover, -nav[data-toggle='toc'] .nav .nav > li > a:focus { - padding-left: 15px; -} - -nav[data-toggle='toc'] .nav .nav > .active > a, -nav[data-toggle='toc'] .nav .nav > .active:hover > a, -nav[data-toggle='toc'] .nav .nav > .active:focus > a { - padding-left: 15px; - font-weight: 500; - font-size: 1.35rem; -} - -/* orcid ------------------------------------------------------------------- */ - -.orcid { - font-size: 16px; - color: #A6CE39; - /* margins are required by official ORCID trademark and display guidelines */ - margin-left:4px; - margin-right:4px; - vertical-align: middle; -} - -/* Reference index & topics ----------------------------------------------- */ - -.ref-index th {font-weight: normal;} - -.ref-index td {vertical-align: top; min-width: 100px} -.ref-index .icon {width: 40px;} -.ref-index .alias {width: 40%;} -.ref-index-icons .alias {width: calc(40% - 40px);} -.ref-index .title {width: 60%;} - -.ref-arguments th {text-align: right; padding-right: 10px;} -.ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px} -.ref-arguments .name {width: 20%;} -.ref-arguments .desc {width: 80%;} - -/* Nice scrolling for wide elements --------------------------------------- */ - -table { - display: block; - overflow: auto; -} - -/* Syntax highlighting ---------------------------------------------------- */ - -pre, code, pre code { - background-color: #f8f8f8; - color: #333; -} -pre, pre code { - white-space: pre-wrap; - word-break: break-all; - overflow-wrap: break-word; -} - -pre { - border: 1px solid #eee; -} - -pre .img, pre .r-plt { - margin: 5px 0; -} - -pre .img img, pre .r-plt img { - background-color: #fff; -} - -code a, pre a { - color: #375f84; -} - -a.sourceLine:hover { - text-decoration: none; -} - -.fl {color: #1514b5;} -.fu {color: #000000;} /* function */ -.ch,.st {color: #036a07;} /* string */ -.kw {color: #264D66;} /* keyword */ -.co {color: #888888;} /* comment */ - -.error {font-weight: bolder;} -.warning {font-weight: bolder;} - -/* Clipboard --------------------------*/ - -.hasCopyButton { - position: relative; -} - -.btn-copy-ex { - position: absolute; - right: 0; - top: 0; - visibility: hidden; -} - -.hasCopyButton:hover button.btn-copy-ex { - visibility: visible; -} - -/* headroom.js ------------------------ */ - -.headroom { - will-change: transform; - transition: transform 200ms linear; -} -.headroom--pinned { - transform: translateY(0%); -} -.headroom--unpinned { - transform: translateY(-100%); -} - -/* mark.js ----------------------------*/ - -mark { - background-color: rgba(255, 255, 51, 0.5); - border-bottom: 2px solid rgba(255, 153, 51, 0.3); - padding: 1px; -} - -/* vertical spacing after htmlwidgets */ -.html-widget { - margin-bottom: 10px; -} - -/* fontawesome ------------------------ */ - -.fab { - font-family: "Font Awesome 5 Brands" !important; -} - -/* don't display links in code chunks when printing */ -/* source: https://stackoverflow.com/a/10781533 */ -@media print { - code a:link:after, code a:visited:after { - content: ""; - } -} - -/* Section anchors --------------------------------- - Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 -*/ - -div.csl-bib-body { } -div.csl-entry { - clear: both; -} -.hanging-indent div.csl-entry { - margin-left:2em; - text-indent:-2em; -} -div.csl-left-margin { - min-width:2em; - float:left; -} -div.csl-right-inline { - margin-left:2em; - padding-left:1em; -} -div.csl-indent { - margin-left: 2em; -} diff --git a/docs/pkgdown.js b/docs/pkgdown.js deleted file mode 100644 index 6f0eee40..00000000 --- a/docs/pkgdown.js +++ /dev/null @@ -1,108 +0,0 @@ -/* http://gregfranko.com/blog/jquery-best-practices/ */ -(function($) { - $(function() { - - $('.navbar-fixed-top').headroom(); - - $('body').css('padding-top', $('.navbar').height() + 10); - $(window).resize(function(){ - $('body').css('padding-top', $('.navbar').height() + 10); - }); - - $('[data-toggle="tooltip"]').tooltip(); - - var cur_path = paths(location.pathname); - var links = $("#navbar ul li a"); - var max_length = -1; - var pos = -1; - for (var i = 0; i < links.length; i++) { - if (links[i].getAttribute("href") === "#") - continue; - // Ignore external links - if (links[i].host !== location.host) - continue; - - var nav_path = paths(links[i].pathname); - - var length = prefix_length(nav_path, cur_path); - if (length > max_length) { - max_length = length; - pos = i; - } - } - - // Add class to parent
  • , and enclosing
  • if in dropdown - if (pos >= 0) { - var menu_anchor = $(links[pos]); - menu_anchor.parent().addClass("active"); - menu_anchor.closest("li.dropdown").addClass("active"); - } - }); - - function paths(pathname) { - var pieces = pathname.split("/"); - pieces.shift(); // always starts with / - - var end = pieces[pieces.length - 1]; - if (end === "index.html" || end === "") - pieces.pop(); - return(pieces); - } - - // Returns -1 if not found - function prefix_length(needle, haystack) { - if (needle.length > haystack.length) - return(-1); - - // Special case for length-0 haystack, since for loop won't run - if (haystack.length === 0) { - return(needle.length === 0 ? 0 : -1); - } - - for (var i = 0; i < haystack.length; i++) { - if (needle[i] != haystack[i]) - return(i); - } - - return(haystack.length); - } - - /* Clipboard --------------------------*/ - - function changeTooltipMessage(element, msg) { - var tooltipOriginalTitle=element.getAttribute('data-original-title'); - element.setAttribute('data-original-title', msg); - $(element).tooltip('show'); - element.setAttribute('data-original-title', tooltipOriginalTitle); - } - - if(ClipboardJS.isSupported()) { - $(document).ready(function() { - var copyButton = ""; - - $("div.sourceCode").addClass("hasCopyButton"); - - // Insert copy buttons: - $(copyButton).prependTo(".hasCopyButton"); - - // Initialize tooltips: - $('.btn-copy-ex').tooltip({container: 'body'}); - - // Initialize clipboard: - var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { - text: function(trigger) { - return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); - } - }); - - clipboardBtnCopies.on('success', function(e) { - changeTooltipMessage(e.trigger, 'Copied!'); - e.clearSelection(); - }); - - clipboardBtnCopies.on('error', function() { - changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); - }); - }); - } -})(window.jQuery || window.$) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml deleted file mode 100644 index d33f1e92..00000000 --- a/docs/pkgdown.yml +++ /dev/null @@ -1,13 +0,0 @@ -pandoc: 2.14.0.3 -pkgdown: 2.0.2 -pkgdown_sha: ~ -articles: - metabolyseR: metabolyseR.html - modelling: modelling.html - pre_treatment: pre_treatment.html - quick_start: quick_start.html -last_built: 2022-01-27T11:58Z -urls: - reference: https://jasenfinch.github.io/metabolyseR/reference - article: https://jasenfinch.github.io/metabolyseR/articles - diff --git a/docs/reference/Analysis-class.html b/docs/reference/Analysis-class.html deleted file mode 100644 index 5c9a4c3d..00000000 --- a/docs/reference/Analysis-class.html +++ /dev/null @@ -1,125 +0,0 @@ - -Analysis S4 class — Analysis-class • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    An S4 class to store analysis results.

    -
    - - -
    -

    Slots

    - - -
    log
    -

    list containing analysis dates and time

    - - -
    parameters
    -

    class AnalysisParameters containing the analysis parameters

    - - -
    raw
    -

    list containing info and raw data

    - - -
    pre-treated
    -

    list containing preTreated info and raw data

    - - -
    modelling
    -

    list containing modelling results

    - - -
    correlations
    -

    tibble containing weighted edgelist of correlations

    - - -
    - -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/AnalysisData-class.html b/docs/reference/AnalysisData-class.html deleted file mode 100644 index ccad2458..00000000 --- a/docs/reference/AnalysisData-class.html +++ /dev/null @@ -1,109 +0,0 @@ - -AnalysisData S4 class — AnalysisData-class • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    An S4 class for metabolomic data and sample meta information.

    -
    - - -
    -

    Slots

    - - -
    data
    -

    sample metabolomic data

    - - -
    info
    -

    sample meta information

    - - -
    - -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/AnalysisParameters-class.html b/docs/reference/AnalysisParameters-class.html deleted file mode 100644 index 46af2a52..00000000 --- a/docs/reference/AnalysisParameters-class.html +++ /dev/null @@ -1,113 +0,0 @@ - -AnalysisParameters S4 class — AnalysisParameters-class • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    An S4 class to store analysis parameters.

    -
    - - -
    -

    Slots

    - - -
    pre-treatment
    -

    list containing parameters for data pre-treatment

    - - -
    modelling
    -

    list containing parameters for modelling

    - - -
    correlations
    -

    list containing parameters for correlations

    - - -
    - -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/QC-1.png b/docs/reference/QC-1.png deleted file mode 100644 index e8005cb7..00000000 Binary files a/docs/reference/QC-1.png and /dev/null differ diff --git a/docs/reference/QC-2.png b/docs/reference/QC-2.png deleted file mode 100644 index 5af9a1ab..00000000 Binary files a/docs/reference/QC-2.png and /dev/null differ diff --git a/docs/reference/QC.html b/docs/reference/QC.html deleted file mode 100644 index da1cd34e..00000000 --- a/docs/reference/QC.html +++ /dev/null @@ -1,209 +0,0 @@ - -Quality control (QC) sample treatments — QCimpute • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Quality control (QC) sample pre-treatment methods.

    -
    - -
    -
    QCimpute(
    -  d,
    -  cls = "class",
    -  QCidx = "QC",
    -  occupancy = 2/3,
    -  parallel = "variables",
    -  seed = 1234
    -)
    -
    -# S4 method for AnalysisData
    -QCimpute(
    -  d,
    -  cls = "class",
    -  QCidx = "QC",
    -  occupancy = 2/3,
    -  parallel = "variables",
    -  seed = 1234
    -)
    -
    -QCoccupancy(d, cls = "class", QCidx = "QC", occupancy = 2/3)
    -
    -# S4 method for AnalysisData
    -QCoccupancy(d, cls = "class", QCidx = "QC", occupancy = 2/3)
    -
    -QCremove(d, cls = "class", QCidx = "QC")
    -
    -# S4 method for AnalysisData
    -QCremove(d, cls = "class", QCidx = "QC")
    -
    -QCrsdFilter(d, cls = "class", QCidx = "QC", RSDthresh = 50)
    -
    -# S4 method for AnalysisData
    -QCrsdFilter(d, cls = "class", QCidx = "QC", RSDthresh = 50)
    -
    - -
    -

    Arguments

    -
    d
    -

    S4 object of class AnalysisData

    -
    cls
    -

    info column to use for class labels

    -
    QCidx
    -

    QC sample label

    -
    occupancy
    -

    occupancy threshold for filtering

    -
    parallel
    -

    parallel type to use. See ?missForest for details

    -
    seed
    -

    random number seed

    -
    RSDthresh
    -

    RSD (%) threshold for filtering

    -
    -
    -

    Value

    -

    An S4 object of class AnalysisData containing QC treated data.

    -
    -
    -

    Details

    -

    A QC sample is an average pooled sample, equally representative in composition of all the samples present within an experimental set. -Within an analytical run, the QC sample is analysed at equal intervals throughout the run. -If there is class structure within the run, this should be randomised within a block fashion so that the classes are equally represented in each block throughout the run. -A QC sample can then be injected and analysed between these randomised blocks. -This provides a set of technical injections that allows the variability in instrument performance over the run to be accounted for and the robustness of the acquired variables to be assessed.

    -

    The technical reproducibility of an acquired variable can be assessed using it's relative standard deviation (RSD) within the QC samples. -The variable RSDs can then be filtered below a threshold value to remove metabolome features that are poorly reproducible across the analytical runs. -This variable filtering strategy has an advantage over that of occupancy alone as it is not dependent on underlying class structure. -Therefore, the variables and variable numbers will not alter if a new class structure is imposed upon the data.

    -
    -
    -

    Methods

    - - -
    • QCimpute: Missing value imputation of QC samples.

    • -
    • QCoccupancy: Feature maximum occupancy filtering based on QC samples.

    • -
    • QCremove: Remove QC samples.

    • -
    • QCrsdFilter: Feature filtering based RSD of QC sample features.

    • -
    - -
    -

    Examples

    -
    
    -## Initial example data preparation
    -library(metaboData)
    -d <- analysisData(abr1$neg[,1:1000],abr1$fact)
    -
    -## Plot the feature RSD distributions of the H class only
    -d %>% 
    - keepClasses(cls = 'day',classes = 'H') %>% 
    - plotRSD(cls = 'day')
    -#> Warning: Removed 119 rows containing non-finite values (stat_density).
    -#> Warning: Removed 1 row(s) containing missing values (geom_path).
    -
    -
    -## Apply QC feature occupancy filtering and QC feature RSD filtering
    -QC_treated <- d %>% 
    - QCoccupancy(cls = 'day',QCidx = 'H',occupancy = 2/3) %>%
    - QCrsdFilter(cls = 'day',QCidx = 'H',RSDthresh = 50)
    -
    -print(QC_treated)
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 120 
    -#> Features: 404 
    -#> Info: 9 
    -#> 
    -
    -## Plot the feature RSD distributions of the H class after QC treatments
    -QC_treated %>% 
    - keepClasses(cls = 'day',classes = 'H') %>% 
    - plotRSD(cls = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/QCimpute.html b/docs/reference/QCimpute.html deleted file mode 100644 index 1be5d19c..00000000 --- a/docs/reference/QCimpute.html +++ /dev/null @@ -1,217 +0,0 @@ - - - - - - - - -QCimpute — QCimpute • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    QC imputation.

    -
    - -
    QCimpute(
    -  d,
    -  cls = "class",
    -  QCidx = "QC",
    -  occupancy = 2/3,
    -  parallel = "variables",
    -  seed = 1234
    -)
    -
    -# S4 method for AnalysisData
    -QCimpute(
    -  d,
    -  cls = "class",
    -  QCidx = "QC",
    -  occupancy = 2/3,
    -  parallel = "variables",
    -  seed = 1234
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    cls

    info column to use for class labels

    QCidx

    QC sample label

    occupancy

    occupancy threshold for imputation

    parallel

    parallel type to use. See ?missForest for details

    seed

    random number seed

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/QCoccupancy.html b/docs/reference/QCoccupancy.html deleted file mode 100644 index ca08e049..00000000 --- a/docs/reference/QCoccupancy.html +++ /dev/null @@ -1,195 +0,0 @@ - - - - - - - - -QCoccupancy — QCoccupancy • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    QC maximum occupancy filter.

    -
    - -
    QCoccupancy(d, cls = "class", QCidx = "QC", occupancy = 2/3)
    -
    -# S4 method for AnalysisData
    -QCoccupancy(d, cls = "class", QCidx = "QC", occupancy = 2/3)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    cls

    info column to use for class labels

    QCidx

    QC sample label

    occupancy

    occupancy threshold for filtering

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/QCremove.html b/docs/reference/QCremove.html deleted file mode 100644 index 60251952..00000000 --- a/docs/reference/QCremove.html +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - -QCremove — QCremove • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Remove QC samples.

    -
    - -
    QCremove(d, cls = "class", QCidx = "QC")
    -
    -# S4 method for AnalysisData
    -QCremove(d, cls = "class", QCidx = "QC")
    - -

    Arguments

    - - - - - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    cls

    info column to use for class labels

    QCidx

    QC sample label

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/QCrsdFilter.html b/docs/reference/QCrsdFilter.html deleted file mode 100644 index e374e705..00000000 --- a/docs/reference/QCrsdFilter.html +++ /dev/null @@ -1,195 +0,0 @@ - - - - - - - - -QCrsdFilter — QCrsdFilter • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    QC relative standard deviation (RSD) filtering..

    -
    - -
    QCrsdFilter(d, cls = "class", QCidx = "QC", RSDthresh = 0.5)
    -
    -# S4 method for AnalysisData
    -QCrsdFilter(d, cls = "class", QCidx = "QC", RSDthresh = 50)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    cls

    info column to use for class labels

    QCidx

    QC sample label

    RSDthresh

    RSD (%) threshold for filtering

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/RandomForest-class.html b/docs/reference/RandomForest-class.html deleted file mode 100644 index da9017e7..00000000 --- a/docs/reference/RandomForest-class.html +++ /dev/null @@ -1,133 +0,0 @@ - -RandomForest S4 class — RandomForest-class • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    An S4 class for random forest results and models.

    -
    - - -
    -

    Slots

    - - -
    type
    -

    random forest type

    - - -
    response
    -

    response variable name

    - - -
    results
    -

    list of measure and importance results tables

    - - -
    predictions
    -

    tibble of model observation predictions

    - - -
    permutations
    -

    list of permutations measure and importance results tables

    - - -
    importances
    -

    tibble of model feature importances

    - - -
    proximities
    -

    tibble of model observation proximities

    - - -
    models
    -

    list of random forest models

    - - -
    - -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/Rplot001.png b/docs/reference/Rplot001.png deleted file mode 100644 index 17a35806..00000000 Binary files a/docs/reference/Rplot001.png and /dev/null differ diff --git a/docs/reference/Rplot002.png b/docs/reference/Rplot002.png deleted file mode 100644 index 85434edd..00000000 Binary files a/docs/reference/Rplot002.png and /dev/null differ diff --git a/docs/reference/Rplot003.png b/docs/reference/Rplot003.png deleted file mode 100644 index 69ddbc48..00000000 Binary files a/docs/reference/Rplot003.png and /dev/null differ diff --git a/docs/reference/Rplot004.png b/docs/reference/Rplot004.png deleted file mode 100644 index d37c1332..00000000 Binary files a/docs/reference/Rplot004.png and /dev/null differ diff --git a/docs/reference/Rplot005.png b/docs/reference/Rplot005.png deleted file mode 100644 index 936d3a89..00000000 Binary files a/docs/reference/Rplot005.png and /dev/null differ diff --git a/docs/reference/Rplot006.png b/docs/reference/Rplot006.png deleted file mode 100644 index 86a79a53..00000000 Binary files a/docs/reference/Rplot006.png and /dev/null differ diff --git a/docs/reference/Rplot007.png b/docs/reference/Rplot007.png deleted file mode 100644 index 10a8138c..00000000 Binary files a/docs/reference/Rplot007.png and /dev/null differ diff --git a/docs/reference/Rplot008.png b/docs/reference/Rplot008.png deleted file mode 100644 index e56132c9..00000000 Binary files a/docs/reference/Rplot008.png and /dev/null differ diff --git a/docs/reference/Rplot009.png b/docs/reference/Rplot009.png deleted file mode 100644 index dcb68042..00000000 Binary files a/docs/reference/Rplot009.png and /dev/null differ diff --git a/docs/reference/Rplot010.png b/docs/reference/Rplot010.png deleted file mode 100644 index fe1a30bf..00000000 Binary files a/docs/reference/Rplot010.png and /dev/null differ diff --git a/docs/reference/Rplot011.png b/docs/reference/Rplot011.png deleted file mode 100644 index 4fa05438..00000000 Binary files a/docs/reference/Rplot011.png and /dev/null differ diff --git a/docs/reference/Rplot012.png b/docs/reference/Rplot012.png deleted file mode 100644 index 92c3f71e..00000000 Binary files a/docs/reference/Rplot012.png and /dev/null differ diff --git a/docs/reference/Univariate-class.html b/docs/reference/Univariate-class.html deleted file mode 100644 index aa95c5f8..00000000 --- a/docs/reference/Univariate-class.html +++ /dev/null @@ -1,113 +0,0 @@ - -Univariate S4 class — Univariate-class • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    An S4 class for univariate test models and results.

    -
    - - -
    -

    Slots

    - - -
    type
    -

    univariate test type

    - - -
    models
    -

    list of model objects

    - - -
    results
    -

    tibble containing test results

    - - -
    - -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/aggregate-1.png b/docs/reference/aggregate-1.png deleted file mode 100644 index 41ade7d0..00000000 Binary files a/docs/reference/aggregate-1.png and /dev/null differ diff --git a/docs/reference/aggregate-2.png b/docs/reference/aggregate-2.png deleted file mode 100644 index 37774e44..00000000 Binary files a/docs/reference/aggregate-2.png and /dev/null differ diff --git a/docs/reference/aggregate-3.png b/docs/reference/aggregate-3.png deleted file mode 100644 index d43c8db3..00000000 Binary files a/docs/reference/aggregate-3.png and /dev/null differ diff --git a/docs/reference/aggregate-4.png b/docs/reference/aggregate-4.png deleted file mode 100644 index 37774e44..00000000 Binary files a/docs/reference/aggregate-4.png and /dev/null differ diff --git a/docs/reference/aggregate.html b/docs/reference/aggregate.html deleted file mode 100644 index 3d13f65a..00000000 --- a/docs/reference/aggregate.html +++ /dev/null @@ -1,170 +0,0 @@ - -Sample aggregation — aggregateMean • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Aggregation of sample features based on a grouping variable.

    -
    - -
    -
    aggregateMean(d, cls = "class")
    -
    -# S4 method for AnalysisData
    -aggregateMean(d, cls = "class")
    -
    -aggregateMedian(d, cls = "class")
    -
    -# S4 method for AnalysisData
    -aggregateMedian(d, cls = "class")
    -
    -aggregateSum(d, cls = "class")
    -
    -# S4 method for AnalysisData
    -aggregateSum(d, cls = "class")
    -
    - -
    -

    Arguments

    -
    d
    -

    S4 object of class AnalysisData

    -
    cls
    -

    info column to use for class data

    -
    -
    -

    Value

    -

    An S4 object of class AnalysisData containing the aggregated data.

    -
    -
    -

    Details

    -

    Sample aggregation allows the electronic pooling of sample features based on a grouping variable. -This is useful in situations such as the presence of technical replicates that can be aggregated to reduce the effects of pseudo replication.

    -
    -
    -

    Methods

    - - -
    • aggregateMean: Aggregate sample features to the group mean.

    • -
    • aggregateMedian: Aggregate sample features to the group median.

    • -
    • aggregateSum: Aggregate sample features to the group total.

    • -
    - -
    -

    Examples

    -
    ## Each of the following examples shows the application of the aggregation method and then 
    -## a Principle Component Analysis is plotted to show it's effect on the data structure.
    -
    -## Initial example data preparation
    -library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% 
    - occupancyMaximum(occupancy = 2/3)
    -
    -d %>% 
    - plotPCA(cls = 'day')
    -
    - 
    -## Mean aggregation
    -d %>% 
    - aggregateMean(cls = 'day') %>% 
    - plotPCA(cls = 'day',ellipses = FALSE)
    -
    - 
    -## Median aggregation
    -d %>% 
    - aggregateMedian(cls = 'day') %>% 
    - plotPCA(cls = 'day',ellipses = FALSE)
    -
    - 
    -## Sum aggregation
    -d %>% 
    - aggregateSum(cls = 'day') %>% 
    - plotPCA(cls = 'day',ellipses = FALSE)
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/aggregateMean.html b/docs/reference/aggregateMean.html deleted file mode 100644 index ce16c1b3..00000000 --- a/docs/reference/aggregateMean.html +++ /dev/null @@ -1,187 +0,0 @@ - - - - - - - - -aggregateMean — aggregateMean • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Mean aggregation of sample data.

    -
    - -
    aggregateMean(d, cls = "class")
    -
    -# S4 method for AnalysisData
    -aggregateMean(d, cls = "class")
    - -

    Arguments

    - - - - - - - - - - -
    d

    S4 object of class Data

    cls

    info column to use for class data

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/aggregateMedian.html b/docs/reference/aggregateMedian.html deleted file mode 100644 index c79bd2bd..00000000 --- a/docs/reference/aggregateMedian.html +++ /dev/null @@ -1,187 +0,0 @@ - - - - - - - - -aggregateMedian — aggregateMedian • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Median aggregation of sample data.

    -
    - -
    aggregateMedian(d, cls = "class")
    -
    -# S4 method for AnalysisData
    -aggregateMedian(d, cls = "class")
    - -

    Arguments

    - - - - - - - - - - -
    d

    S4 object of class Data

    cls

    info column to use for class data

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/aggregateSum.html b/docs/reference/aggregateSum.html deleted file mode 100644 index eac7c48d..00000000 --- a/docs/reference/aggregateSum.html +++ /dev/null @@ -1,187 +0,0 @@ - - - - - - - - -aggregateSum — aggregateSum • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Sum aggregation of sample data.

    -
    - -
    aggregateSum(d, cls = "class")
    -
    -# S4 method for AnalysisData
    -aggregateSum(d, cls = "class")
    - -

    Arguments

    - - - - - - - - - - -
    d

    S4 object of class Data

    cls

    info column to use for class data

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/analysis-accessors.html b/docs/reference/analysis-accessors.html deleted file mode 100644 index 3de471ab..00000000 --- a/docs/reference/analysis-accessors.html +++ /dev/null @@ -1,283 +0,0 @@ - -AnalysisData and Analysis class accessors — dat • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Accessor methods for the AnalysisData and Analysis S4 classes.

    -
    - -
    -
    dat(x, ...)
    -
    -# S4 method for AnalysisData
    -dat(x)
    -
    -# S4 method for Analysis
    -dat(x, type = c("raw", "pre-treated"))
    -
    -dat(x, ...) <- value
    -
    -# S4 method for AnalysisData
    -dat(x) <- value
    -
    -# S4 method for Analysis
    -dat(x, type = c("raw", "pre-treated")) <- value
    -
    -sinfo(x, ...)
    -
    -# S4 method for AnalysisData
    -sinfo(x)
    -
    -# S4 method for Analysis
    -sinfo(x, type = c("raw", "pre-treated"), value)
    -
    -sinfo(x, ...) <- value
    -
    -# S4 method for AnalysisData
    -sinfo(x) <- value
    -
    -# S4 method for Analysis
    -sinfo(x, type = c("raw", "pre-treated")) <- value
    -
    -raw(x)
    -
    -# S4 method for Analysis
    -raw(x)
    -
    -raw(x) <- value
    -
    -# S4 method for Analysis
    -raw(x) <- value
    -
    -preTreated(x)
    -
    -# S4 method for Analysis
    -preTreated(x)
    -
    -preTreated(x) <- value
    -
    -# S4 method for Analysis
    -preTreated(x) <- value
    -
    -features(x, ...)
    -
    -# S4 method for AnalysisData
    -features(x)
    -
    -# S4 method for Analysis
    -features(x, type = c("raw", "pre-treated"))
    -
    -nSamples(x, ...)
    -
    -# S4 method for AnalysisData
    -nSamples(x)
    -
    -# S4 method for Analysis
    -nSamples(x, type = c("raw", "pre-treated"))
    -
    -nFeatures(x, ...)
    -
    -# S4 method for AnalysisData
    -nFeatures(x)
    -
    -# S4 method for Analysis
    -nFeatures(x, type = c("raw", "pre-treated"))
    -
    -analysisResults(x, element)
    -
    -# S4 method for Analysis
    -analysisResults(x, element)
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class AnalysisData or Analysis

    -
    ...
    -

    arguments to pass to the appropriate method

    -
    type
    -

    get or set raw or pre-treated data

    -
    value
    -

    value to set

    -
    element
    -

    analysis element results to return

    -
    -
    -

    Methods

    - - -
    • dat: Return a metabolomic data table.

    • -
    • dat<-: Set a metabolomic data table.

    • -
    • sinfo: Return a sample information data table.

    • -
    • sinfo<-: Set a sample information data table.

    • -
    • raw: Return the AnalysisData object containing unprocessed metabolomic data from an Analysis object.

    • -
    • raw<-: Set an AnalysisData object to the raw slot of an Analysis class object.

    • -
    • preTreated: Return the AnalysisData object containing pre-treated metabolomic data from an Analysis object.

    • -
    • preTreated<-: Set an AnalysisData object to the pre-treated slot of an Analysis class object.

    • -
    • features: Return the features names.

    • -
    • nSamples: Return the number of samples.

    • -
    • nFeatures: Return the number of features.

    • -
    • analysisResults: Return results from an Analysis object of an analysis element.

    • -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    -
    -## Return the metabolomic data
    -dat(d)
    -#> # A tibble: 120 × 101
    -#>     N200  N201  N202  N203  N204  N205   N206  N207  N208  N209  N210   N211
    -#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
    -#>  1     0  0    0     2.98  0     0     0      0.468 0     1.43  0     0.170 
    -#>  2     0  0    0     1.30  0     1.15  0      0     0     0.492 0     0     
    -#>  3     0  0    0     6.08  0.214 2.53  0      1.85  0     1.06  0.184 0.0827
    -#>  4     0  4.24 0     1.48  0     0     0.147  0     0     0.929 0     0.286 
    -#>  5     0  0    0     0.530 0     0.233 0.376  1.41  0     0.274 0     0.139 
    -#>  6     0  0    0     0     0     0.438 0      0     0.219 0.325 0     0     
    -#>  7     0  0    0     0.547 0     0     0      0     0     0     0     0     
    -#>  8     0  0    0.195 1.37  0.594 1.11  0.0902 0     0     0     0.162 0     
    -#>  9     0  0    0     1.24  0     0.196 0.675  0.528 0.128 2.61  0.294 2.66  
    -#> 10     0  0    0     0.113 0     1.06  0      0     0     1.76  2.96  0     
    -#> # … with 110 more rows, and 89 more variables: N212 <dbl>, N213 <dbl>,
    -#> #   N214 <dbl>, N215 <dbl>, N216 <dbl>, N217 <dbl>, N218 <dbl>, N219 <dbl>,
    -#> #   N220 <dbl>, N221 <dbl>, N222 <dbl>, N223 <dbl>, N224 <dbl>, N225 <dbl>,
    -#> #   N226 <dbl>, N227 <dbl>, N228 <dbl>, N229 <dbl>, N230 <dbl>, N231 <dbl>,
    -#> #   N232 <dbl>, N233 <dbl>, N234 <dbl>, N235 <dbl>, N236 <dbl>, N237 <dbl>,
    -#> #   N238 <dbl>, N239 <dbl>, N240 <dbl>, N241 <dbl>, N242 <dbl>, N243 <dbl>,
    -#> #   N244 <dbl>, N245 <dbl>, N246 <dbl>, N247 <dbl>, N248 <dbl>, N249 <dbl>, …
    -
    -## Set the metabolomic data
    -dat(d) <- abr1$neg[,300:400]
    -
    -## Return the sample information
    -sinfo(d)
    -#> # A tibble: 120 × 9
    -#>    injorder pathcdf              filecdf name.org remark name    rep day   class
    -#>       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
    -#>  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
    -#>  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
    -#>  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
    -#>  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
    -#>  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
    -#>  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
    -#>  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
    -#>  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
    -#>  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
    -#> 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
    -#> # … with 110 more rows
    -
    -## Set the sample information
    -sinfo(d) <- abr1$fact
    -
    -## Return the feature names
    -features(d)
    -#>   [1] "N300" "N301" "N302" "N303" "N304" "N305" "N306" "N307" "N308" "N309"
    -#>  [11] "N310" "N311" "N312" "N313" "N314" "N315" "N316" "N317" "N318" "N319"
    -#>  [21] "N320" "N321" "N322" "N323" "N324" "N325" "N326" "N327" "N328" "N329"
    -#>  [31] "N330" "N331" "N332" "N333" "N334" "N335" "N336" "N337" "N338" "N339"
    -#>  [41] "N340" "N341" "N342" "N343" "N344" "N345" "N346" "N347" "N348" "N349"
    -#>  [51] "N350" "N351" "N352" "N353" "N354" "N355" "N356" "N357" "N358" "N359"
    -#>  [61] "N360" "N361" "N362" "N363" "N364" "N365" "N366" "N367" "N368" "N369"
    -#>  [71] "N370" "N371" "N372" "N373" "N374" "N375" "N376" "N377" "N378" "N379"
    -#>  [81] "N380" "N381" "N382" "N383" "N384" "N385" "N386" "N387" "N388" "N389"
    -#>  [91] "N390" "N391" "N392" "N393" "N394" "N395" "N396" "N397" "N398" "N399"
    -#> [101] "N400"
    -
    -## Return the number of samples
    -nSamples(d)
    -#> [1] 120
    -
    -## Return the number of features
    -nFeatures(d)
    -#> [1] 101
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/analysisData.html b/docs/reference/analysisData.html deleted file mode 100644 index 792a3f55..00000000 --- a/docs/reference/analysisData.html +++ /dev/null @@ -1,125 +0,0 @@ - -AnalysisData class constructor — analysisData • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Create an AnalysisData S4 object.

    -
    - -
    -
    analysisData(data, info)
    -
    - -
    -

    Arguments

    -
    data
    -

    table containing sample metabolomic data

    -
    info
    -

    table containing sample meta information

    -
    -
    -

    Value

    -

    An S4 object of class Analysis.

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -d <- analysisData(data = abr1$neg,info = abr1$fact)
    -
    -print(d)
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 120 
    -#> Features: 2000 
    -#> Info: 9 
    -#> 
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/analysisElements.html b/docs/reference/analysisElements.html deleted file mode 100644 index 5e4356ae..00000000 --- a/docs/reference/analysisElements.html +++ /dev/null @@ -1,109 +0,0 @@ - -Analysis elements — analysisElements • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Return the analysis elements available in metabolyseR.

    -
    - -
    -
    analysisElements()
    -
    - -
    -

    Value

    -

    A character vector of analysis elements.

    -
    - -
    -

    Examples

    -
    analysisElements()
    -#> [1] "pre-treatment" "modelling"     "correlations" 
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/analysisParameters.html b/docs/reference/analysisParameters.html deleted file mode 100644 index f56fb4fd..00000000 --- a/docs/reference/analysisParameters.html +++ /dev/null @@ -1,162 +0,0 @@ - -Create an AnalysisParameters S4 class object — analysisParameters • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Initiate an AnalysisParameters object with the default analysis parameters for each of the analysis elements.

    -
    - -
    -
    analysisParameters(elements = analysisElements())
    -
    - -
    -

    Arguments

    -
    elements
    -

    character vector containing elements for analysis.

    -
    -
    -

    Value

    -

    An S4 object of class AnalysisParameters containing the default analysis parameters.

    -
    - -
    -

    Examples

    -
    p <- analysisParameters()
    -
    -print(p)
    -#> Parameters:
    -#> pre-treatment
    -#> 	QC
    -#> 		occupancyFilter
    -#> 			cls = class
    -#> 			QCidx = QC
    -#> 			occupancy = 2/3
    -#> 		impute
    -#> 			cls = class
    -#> 			QCidx = QC
    -#> 			occupancy = 2/3
    -#> 			parallel = variables
    -#> 			seed = 1234
    -#> 		RSDfilter
    -#> 			cls = class
    -#> 			QCidx = QC
    -#> 			RSDthresh = 50
    -#> 		removeQC
    -#> 			cls = class
    -#> 			QCidx = QC
    -#> 	occupancyFilter
    -#> 		maximum
    -#> 			cls = class
    -#> 			occupancy = 2/3
    -#> 	impute
    -#> 		class
    -#> 			cls = class
    -#> 			occupancy = 2/3
    -#> 			seed = 1234
    -#> 	transform
    -#> 		TICnorm
    -#> 
    -#> modelling
    -#> 	randomForest
    -#> 		cls = class
    -#> 		rf = list()
    -#> 		reps = 1
    -#> 		binary = FALSE
    -#> 		comparisons = list()
    -#> 		perm = 0
    -#> 		returnModels = FALSE
    -#> 		seed = 1234
    -#> 
    -#> correlations
    -#> 	method = pearson
    -#> 	pAdjustMethod = bonferroni
    -#> 	corPvalue = 0.05
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/analysisResults.html b/docs/reference/analysisResults.html deleted file mode 100644 index 3e6b5511..00000000 --- a/docs/reference/analysisResults.html +++ /dev/null @@ -1,188 +0,0 @@ - - - - - - - - -analysisResults — analysisResults • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Extract analysis results for a given analysis element.

    -
    - -
    analysisResults(x, element)
    -
    -# S4 method for Analysis
    -analysisResults(x, element)
    - -

    Arguments

    - - - - - - - - - - -
    x

    S4 object of class Analysis

    element

    Analysis element to extract. -Should be one of those returned analysisElements().

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/anova.html b/docs/reference/anova.html deleted file mode 100644 index 2bfb03de..00000000 --- a/docs/reference/anova.html +++ /dev/null @@ -1,154 +0,0 @@ - -ANOVA — anova • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    One-way analysis of variance (ANOVA).

    -
    - -
    -
    anova(
    -  x,
    -  cls = "class",
    -  pAdjust = "bonferroni",
    -  comparisons = list(),
    -  returnModels = FALSE
    -)
    -
    -# S4 method for AnalysisData
    -anova(
    -  x,
    -  cls = "class",
    -  pAdjust = "bonferroni",
    -  comparisons = list(),
    -  returnModels = FALSE
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class AnalysisData

    -
    cls
    -

    a vector of sample info column names to analyse

    -
    pAdjust
    -

    p value adjustment method

    -
    comparisons
    -

    list of comparisons to perform

    -
    returnModels
    -

    should models be returned

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    -
    -## Perform ANOVA
    -anova_analysis <- anova(d,cls = 'day')
    -
    -## Extract significant features
    -explanatoryFeatures(anova_analysis)
    -#> # A tibble: 21 × 10
    -#>    Response Comparison  Feature term        df  sumsq  meansq statistic  p.value
    -#>    <chr>    <chr>       <chr>   <chr>    <dbl>  <dbl>   <dbl>     <dbl>    <dbl>
    -#>  1 day      1~2~3~4~5~H N277    response     5 63072. 12614.       39.1 3.14e-23
    -#>  2 day      1~2~3~4~5~H N229    response     5 43549.  8710.       18.1 3.54e-13
    -#>  3 day      1~2~3~4~5~H N299    response     5  1211.   242.       16.4 3.87e-12
    -#>  4 day      1~2~3~4~5~H N295    response     5   271.    54.2      13.6 2.02e-10
    -#>  5 day      1~2~3~4~5~H N281    response     5   192.    38.5      12.5 1.16e- 9
    -#>  6 day      1~2~3~4~5~H N245    response     5  6268.  1254.       11.6 4.38e- 9
    -#>  7 day      1~2~3~4~5~H N255    response     5  5363.  1073.       11.0 1.14e- 8
    -#>  8 day      1~2~3~4~5~H N278    response     5   277.    55.4      10.9 1.48e- 8
    -#>  9 day      1~2~3~4~5~H N259    response     5  1236.   247.       10.8 1.72e- 8
    -#> 10 day      1~2~3~4~5~H N279    response     5   810.   162.       10.5 2.77e- 8
    -#> # … with 11 more rows, and 1 more variable: adjusted.p.value <dbl>
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/binaryComparisons.html b/docs/reference/binaryComparisons.html deleted file mode 100644 index c5bc6d2f..00000000 --- a/docs/reference/binaryComparisons.html +++ /dev/null @@ -1,189 +0,0 @@ - - - - - - - - -binaryComparisons — binaryComparisons • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return a vector of possible binary comparisons for a -given sample information column.

    -
    - -
    binaryComparisons(x, cls = "class")
    -
    -# S4 method for AnalysisData
    -binaryComparisons(x, cls = "class")
    - -

    Arguments

    - - - - - - - - - - -
    x

    S4 object of class AnalysisData.

    cls

    sample information column to use

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/bind.html b/docs/reference/bind.html deleted file mode 100644 index b95ee797..00000000 --- a/docs/reference/bind.html +++ /dev/null @@ -1,129 +0,0 @@ - -Bind AnalysisData objects by row — bindRows • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Bind the rows of AnalysisData objects contained within a list.

    -
    - -
    -
    bindRows(d)
    -
    -# S4 method for list
    -bindRows(d)
    -
    - -
    -

    Arguments

    -
    d
    -

    list object containing S4 objects of class AnalysisData to be bound

    -
    -
    -

    Value

    -

    An S4 object of class AnalysisData containg the bound data sets.

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -d <- list(
    - negative = analysisData(abr1$neg,abr1$fact),
    - positive = analysisData(abr1$pos,abr1$fact)
    - )
    -
    -bindRows(d)
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 240 
    -#> Features: 4000 
    -#> Info: 9 
    -#> 
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/bindAnalysesRows.html b/docs/reference/bindAnalysesRows.html deleted file mode 100644 index cf468a13..00000000 --- a/docs/reference/bindAnalysesRows.html +++ /dev/null @@ -1,185 +0,0 @@ - - - - - - - - -bindAnalysesRows — bindAnalysesRows • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Bind rows of objects of class AnalysisData -contained within a list.

    -
    - -
    bindAnalysesRows(x)
    -
    -# S4 method for list
    -bindAnalysesRows(x)
    - -

    Arguments

    - - - - - - -
    x

    list object containing S4 objects of class AnalysisData to be bound

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/changeParameter.html b/docs/reference/changeParameter.html deleted file mode 100644 index 94ebec32..00000000 --- a/docs/reference/changeParameter.html +++ /dev/null @@ -1,164 +0,0 @@ - -Change analysis parameters — changeParameter<- • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Change analysis parameters.

    -
    - -
    -
    changeParameter(x, parameterName, elements = analysisElements()) <- value
    -
    -# S4 method for AnalysisParameters
    -changeParameter(x, parameterName, elements = analysisElements()) <- value
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class AnalysisParameters

    -
    parameterName
    -

    name of the parameter to change

    -
    elements
    -

    character vector of analysis elements to target parameter -change. Can be any returned by analysisElements().

    -
    value
    -

    New value of the parameter

    -
    -
    -

    Value

    -

    An S4 object of class AnalysisParameters.

    -
    -
    -

    Details

    -

    For the parameter name selected, all parameters with that name will -be altered.

    -
    - -
    -

    Examples

    -
    p <- analysisParameters('pre-treatment')
    -
    -changeParameter(p,'cls') <- 'day'
    -
    -print(p)
    -#> Parameters:
    -#> pre-treatment
    -#> 	QC
    -#> 		occupancyFilter
    -#> 			cls = day
    -#> 			QCidx = QC
    -#> 			occupancy = 2/3
    -#> 		impute
    -#> 			cls = day
    -#> 			QCidx = QC
    -#> 			occupancy = 2/3
    -#> 			parallel = variables
    -#> 			seed = 1234
    -#> 		RSDfilter
    -#> 			cls = day
    -#> 			QCidx = QC
    -#> 			RSDthresh = 50
    -#> 		removeQC
    -#> 			cls = day
    -#> 			QCidx = QC
    -#> 	occupancyFilter
    -#> 		maximum
    -#> 			cls = day
    -#> 			occupancy = 2/3
    -#> 	impute
    -#> 		class
    -#> 			cls = day
    -#> 			occupancy = 2/3
    -#> 			seed = 1234
    -#> 	transform
    -#> 		TICnorm
    -#> 
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/cls.html b/docs/reference/cls.html deleted file mode 100644 index 59d0de0d..00000000 --- a/docs/reference/cls.html +++ /dev/null @@ -1,255 +0,0 @@ - -Sample meta information wrangling — clsAdd • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Query or alter sample meta information in AnalysisData or Analysis class objects.

    -

    Replace a given sample info column from an Analysis or -AnalysisData object.

    -
    - -
    -
    clsAdd(d, cls, value, ...)
    -
    -# S4 method for AnalysisData
    -clsAdd(d, cls, value)
    -
    -# S4 method for Analysis
    -clsAdd(d, cls, value, type = c("raw", "pre-treated"))
    -
    -clsArrange(d, cls = "class", descending = FALSE, ...)
    -
    -# S4 method for AnalysisData
    -clsArrange(d, cls = "class", descending = FALSE)
    -
    -# S4 method for Analysis
    -clsArrange(
    -  d,
    -  cls = "class",
    -  descending = FALSE,
    -  type = c("raw", "pre-treated")
    -)
    -
    -clsAvailable(d, ...)
    -
    -# S4 method for AnalysisData
    -clsAvailable(d)
    -
    -# S4 method for Analysis
    -clsAvailable(d, type = c("raw", "pre-treated"))
    -
    -clsExtract(d, cls = "class", ...)
    -
    -# S4 method for AnalysisData
    -clsExtract(d, cls = "class")
    -
    -# S4 method for Analysis
    -clsExtract(d, cls = "class", type = c("raw", "pre-treated"))
    -
    -clsRemove(d, cls, ...)
    -
    -# S4 method for AnalysisData
    -clsRemove(d, cls)
    -
    -# S4 method for Analysis
    -clsRemove(d, cls, type = c("raw", "pre-treated"))
    -
    -clsRename(d, cls, newName, ...)
    -
    -# S4 method for AnalysisData
    -clsRename(d, cls, newName)
    -
    -# S4 method for Analysis
    -clsRename(d, cls, newName, type = c("raw", "pre-treated"))
    -
    -clsReplace(d, value, cls = "class", ...)
    -
    -# S4 method for AnalysisData
    -clsReplace(d, value, cls = "class")
    -
    -# S4 method for Analysis
    -clsReplace(d, value, cls = "class", type = c("raw", "pre-treated"))
    -
    - -
    -

    Arguments

    -
    d
    -

    S4 object of class Analysis or AnalysisData

    -
    cls
    -

    sample info column to extract

    -
    value
    -

    vactor of new sample information for replacement

    -
    ...
    -

    arguments to pass to specific method

    -
    type
    -

    raw or pre-treated sample information

    -
    descending
    -

    TRUE/FALSE, arrange samples in descending order

    -
    newName
    -

    new column name

    -
    -
    -

    Methods

    - - -
    • clsAdd: Add a sample information column.

    • -
    • clsArrange: Arrange sample row order by a specified sample information column.

    • -
    • clsAvailable: Retrieve the names of the available sample information columns.

    • -
    • clsExtract: Extract the values of a specified sample information column.

    • -
    • clsRemove: Remove a sample information column.

    • -
    • clsRename: Rename a sample information column.

    • -
    • clsReplace: Replace a sample information column.

    • -
    - -
    -

    Examples

    -
    library(metaboData)
    -d <- analysisData(abr1$neg,abr1$fact)
    -
    -## Add a sample information column named 'new'
    -d <- clsAdd(d,'new',1:nSamples(d))
    -
    -print(d)
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 120 
    -#> Features: 2000 
    -#> Info: 10 
    -#> 
    -
    -## Arrange the row orders by the 'day' column
    -d <- clsArrange(d,'day')
    -
    -clsExtract(d,'day')
    -#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
    -#>  [38] 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
    -#>  [75] 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 H H H H H H H H H H H
    -#> [112] H H H H H H H H H
    -#> Levels: 1 2 3 4 5 H
    -
    -## Retreive the available sample information column names
    -clsAvailable(d)
    -#>  [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"    
    -#>  [7] "rep"      "day"      "class"    "new"     
    -
    -## Extract the values of the 'day' column
    -clsExtract(d,'day')
    -#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
    -#>  [38] 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
    -#>  [75] 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 H H H H H H H H H H H
    -#> [112] H H H H H H H H H
    -#> Levels: 1 2 3 4 5 H
    -
    -## Remove the 'class' column
    -d <- clsRemove(d,'class')
    -
    -clsAvailable(d)
    -#> [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
    -#> [8] "day"      "new"     
    -
    -## Rename the 'day' column to 'treatment'
    -d <- clsRename(d,'day','treatment')
    -
    -clsAvailable(d)
    -#> [1] "injorder"  "pathcdf"   "filecdf"   "name.org"  "remark"    "name"     
    -#> [7] "rep"       "treatment" "new"      
    -
    -## Replace the values of the 'treatment' column
    -d <- clsReplace(d,rep(1,nSamples(d)),'treatment')
    -
    -clsExtract(d,'treatment')
    -#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    -#>  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    -#>  [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    -#> [112] 1 1 1 1 1 1 1 1 1
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/clsAdd.html b/docs/reference/clsAdd.html deleted file mode 100644 index 327c3bbc..00000000 --- a/docs/reference/clsAdd.html +++ /dev/null @@ -1,204 +0,0 @@ - - - - - - - - -clsAdd — clsAdd • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Add a sample info column to a Analysis or -AnalysisData object.

    -
    - -
    clsAdd(x, cls, value, ...)
    -
    -# S4 method for AnalysisData
    -clsAdd(x, cls, value)
    -
    -# S4 method for Analysis
    -clsAdd(x, cls, value, type = "raw")
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    x

    S4 object of class Analysis or AnalysisData

    cls

    name of new sample information column

    value

    new sample information to add

    ...

    arguments to pass to specific method

    type

    raw or preTreated sample information

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/clsArrange.html b/docs/reference/clsArrange.html deleted file mode 100644 index 37605d3a..00000000 --- a/docs/reference/clsArrange.html +++ /dev/null @@ -1,204 +0,0 @@ - - - - - - - - -clsArrange — clsArrange • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Order samples within an object of class AnalysisData or -Analysis by a given sample information column.

    -
    - -
    clsArrange(x, cls = "class", descending = FALSE, ...)
    -
    -# S4 method for AnalysisData
    -clsArrange(x, cls = "class", descending = FALSE)
    -
    -# S4 method for Analysis
    -clsArrange(x, cls = "class", descending = FALSE, type = "raw")
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    x

    S4 object of class AnalysisData or Analysis

    cls

    name of sample information column to arrange by

    descending

    TRUE/FALSE, arrange samples in descending order

    ...

    arguments to pass to specific method

    type

    raw or preTreated sample information

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/clsAvailable.html b/docs/reference/clsAvailable.html deleted file mode 100644 index b09cd7f8..00000000 --- a/docs/reference/clsAvailable.html +++ /dev/null @@ -1,196 +0,0 @@ - - - - - - - - -clsAvailable — clsAvailable • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return available sample info columns from an Analysis or -AnalysisData object.

    -
    - -
    clsAvailable(x, ...)
    -
    -# S4 method for AnalysisData
    -clsAvailable(x)
    -
    -# S4 method for Analysis
    -clsAvailable(x, type = "raw")
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    S4 object of class Analysis or AnalysisData

    ...

    arguments to pass to specific method

    type

    raw or preTreated sample information

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/clsExtract.html b/docs/reference/clsExtract.html deleted file mode 100644 index 965b93d5..00000000 --- a/docs/reference/clsExtract.html +++ /dev/null @@ -1,200 +0,0 @@ - - - - - - - - -clsExtract — clsExtract • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Extract a given sample info column from an Analysis or -AnalysisData object.

    -
    - -
    clsExtract(x, cls = "class", ...)
    -
    -# S4 method for AnalysisData
    -clsExtract(x, cls = "class")
    -
    -# S4 method for Analysis
    -clsExtract(x, cls = "class", type = "raw")
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    x

    S4 object of class Analysis or AnalysisData

    cls

    sample info column to extract

    ...

    arguments to pass to specific method

    type

    raw or preTreated sample information

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/clsRemove.html b/docs/reference/clsRemove.html deleted file mode 100644 index fe9dfc8e..00000000 --- a/docs/reference/clsRemove.html +++ /dev/null @@ -1,200 +0,0 @@ - - - - - - - - -clsRemove — clsRemove • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Remove a sample info column from a Analysis or -AnalysisData object.

    -
    - -
    clsRemove(x, cls, ...)
    -
    -# S4 method for AnalysisData
    -clsRemove(x, cls)
    -
    -# S4 method for Analysis
    -clsRemove(x, cls, type = "raw")
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    x

    S4 object of class Analysis or AnalysisData

    cls

    name of sample information column to remove

    ...

    arguments to pass to specific method

    type

    raw or preTreated sample information

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/clsRename.html b/docs/reference/clsRename.html deleted file mode 100644 index 18c2edce..00000000 --- a/docs/reference/clsRename.html +++ /dev/null @@ -1,204 +0,0 @@ - - - - - - - - -clsRename — clsRename • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Rename a sample information column within an object of -AnalysisData or Analysis.

    -
    - -
    clsRename(x, cls, newName, ...)
    -
    -# S4 method for AnalysisData
    -clsRename(x, cls, newName)
    -
    -# S4 method for Analysis
    -clsRename(x, cls, newName, type = "raw")
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    x

    S4 object of class Analysis or AnalysisData

    cls

    sample information column to rename

    newName

    new column name

    ...

    arguments to pass to specific method

    type

    raw or preTreated sample information

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/clsReplace.html b/docs/reference/clsReplace.html deleted file mode 100644 index f6c2a89b..00000000 --- a/docs/reference/clsReplace.html +++ /dev/null @@ -1,204 +0,0 @@ - - - - - - - - -clsReplace — clsReplace • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Replace a given sample info column from an Analysis or -AnalysisData object.

    -
    - -
    clsReplace(x, value, cls = "class", ...)
    -
    -# S4 method for AnalysisData
    -clsReplace(x, value, cls = "class")
    -
    -# S4 method for Analysis
    -clsReplace(x, value, cls = "class", type = "raw")
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    x

    S4 object of class Analysis or AnalysisData

    value

    vactor of new sample information for replacement

    cls

    sample info column to replace

    ...

    arguments to pass to specific method

    type

    raw or preTreated sample information

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/correction-1.png b/docs/reference/correction-1.png deleted file mode 100644 index 7dc64b38..00000000 Binary files a/docs/reference/correction-1.png and /dev/null differ diff --git a/docs/reference/correction-2.png b/docs/reference/correction-2.png deleted file mode 100644 index 6328a65d..00000000 Binary files a/docs/reference/correction-2.png and /dev/null differ diff --git a/docs/reference/correction.html b/docs/reference/correction.html deleted file mode 100644 index 8e9ae0f3..00000000 --- a/docs/reference/correction.html +++ /dev/null @@ -1,148 +0,0 @@ - -Batch/block correction — correctionCenter • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Correction of batch/block differences.

    -
    - -
    -
    correctionCenter(d, block = "block", type = c("mean", "median"))
    -
    -# S4 method for AnalysisData
    -correctionCenter(d, block = "block", type = c("mean", "median"))
    -
    - -
    -

    Arguments

    -
    d
    -

    S4 object of class AnalysisData

    -
    block
    -

    sample information column name to use containing sample block -groupings

    -
    type
    -

    type of average to use

    -
    -
    -

    Value

    -

    An S4 object of class AnalysisData containing the corrected data.

    -
    -
    -

    Details

    -

    There can sometimes be artificial batch related variability introduced into metabolomics analyses as a result of analytical instrumentation or sample preparation. -With an appropriate randomised block design of sample injection order, batch related variability can be corrected using an average centring correction method of the individual features.

    -
    -
    -

    Methods

    - - -
    • correctionCenter: Correction using group average centring.

    • -
    - -
    -

    Examples

    -
    
    -## Initial example data preparation
    -library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% 
    - occupancyMaximum(occupancy = 2/3)
    - 
    -## Group total ion count distributions prior to correction
    -d %>% 
    - plotTIC(by = 'day',colour = 'day')
    -
    - 
    -## Group total ion count distributions after group median correction
    -d %>% 
    - correctionCenter(block = 'day',type = 'median') %>% 
    - plotTIC(by = 'day',colour = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/correctionCenter.html b/docs/reference/correctionCenter.html deleted file mode 100644 index 1787d2fb..00000000 --- a/docs/reference/correctionCenter.html +++ /dev/null @@ -1,173 +0,0 @@ - - - - - - - - -correctionCenter — correctionCenter,AnalysisData-method • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Batch correction using average centering.

    -
    - -
    # S4 method for AnalysisData
    -correctionCenter(d, block = "block", type = c("mean", "median"))
    - - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/correlations.html b/docs/reference/correlations.html deleted file mode 100644 index 34e8374b..00000000 --- a/docs/reference/correlations.html +++ /dev/null @@ -1,156 +0,0 @@ - -Feature correlation analysis — correlations • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Feature correlation analysis.

    -
    - -
    -
    correlations(d, ...)
    -
    -# S4 method for AnalysisData
    -correlations(
    -  d,
    -  method = "pearson",
    -  pAdjustMethod = "bonferroni",
    -  corPvalue = 0.05
    -)
    -
    -# S4 method for Analysis
    -correlations(d)
    -
    - -
    -

    Arguments

    -
    d
    -

    S4 object of class AnalysisData

    -
    ...
    -

    arguments to pass to specific method

    -
    method
    -

    correlation method. One of pearson or spearman.

    -
    pAdjustMethod
    -

    p-value adjustment method. See ?p.adjust for available methods.

    -
    corPvalue
    -

    p-value cut-off threshold for significance

    -
    -
    -

    Value

    -

    A tibble containing results of significantly correlated features.

    -
    -
    -

    Details

    -

    Correlation analyses can be used to identify associated features within data sets. -This can be useful to identifying clusters of related features that can be used to annotate metabolites within data sets. -All features are compared and the returned table of correlations are p-value thresholded using the specified cut-off.

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    -
    -correlations(d)
    -#> # A tibble: 130 × 7
    -#>    Feature1 Feature2 log2IntensityRatio     r `|r|`           p     n
    -#>    <chr>    <chr>                 <dbl> <dbl> <dbl>       <dbl> <int>
    -#>  1 N212     N227                 -0.884 0.980 0.980 0.0107          7
    -#>  2 N224     N286                  1.85  0.971 0.971 0.00612         8
    -#>  3 N215     N276                  0.227 0.965 0.965 0.0419          7
    -#>  4 N224     N265                  0.576 0.943 0.943 0.00138        11
    -#>  5 N201     N275                 -1.59  0.909 0.909 0.0264         10
    -#>  6 N213     N231                 -1.63  0.883 0.883 0             108
    -#>  7 N224     N225                 -0.792 0.863 0.863 0.000000176    29
    -#>  8 N258     N263                 -2.89  0.857 0.857 0.0181         13
    -#>  9 N267     N297                 -0.671 0.853 0.853 0             120
    -#> 10 N211     N291                 -1.55  0.831 0.831 0.00106        19
    -#> # … with 120 more rows
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/correlationsParameters.html b/docs/reference/correlationsParameters.html deleted file mode 100644 index 8cd1dc88..00000000 --- a/docs/reference/correlationsParameters.html +++ /dev/null @@ -1,116 +0,0 @@ - -Correlations parameters — correlationsParameters • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Retrieve the default parameters for correlation analysis.

    -
    - -
    -
    correlationsParameters()
    -
    - - -
    -

    Examples

    -
    ## Retrieve the default correlation parameters
    -p <- correlationsParameters()
    -
    -## Assign the correlation parameters to analysis parameters
    -cp <- analysisParameters('correlations')
    -parameters(cp,'correlations') <- p
    -
    -print(cp)
    -#> Parameters:
    -#> correlations
    -#> 	method = pearson
    -#> 	pAdjustMethod = bonferroni
    -#> 	corPvalue = 0.05
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/dat.html b/docs/reference/dat.html deleted file mode 100644 index 88afa7b1..00000000 --- a/docs/reference/dat.html +++ /dev/null @@ -1,209 +0,0 @@ - - - - - - - - -dat — dat • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return or set sample data in an -AnalysisData or Analysis objects.

    -
    - -
    dat(x, ...)
    -
    -dat(x, ...) <- value
    -
    -# S4 method for AnalysisData
    -dat(x)
    -
    -# S4 method for AnalysisData
    -dat(x) <- value
    -
    -# S4 method for Analysis
    -dat(x, type = "pre-treated")
    -
    -# S4 method for Analysis
    -dat(x, type = "pre-treated") <- value
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    x

    S4 object of class AnalysisData or Analysis

    ...

    arguments to pass to the appropriate method

    value

    tibble containing sample data

    type

    data type to extract or set. -Should be one of "raw" or "pre-treated"

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/explanatoryFeatures.html b/docs/reference/explanatoryFeatures.html deleted file mode 100644 index b8334c37..00000000 --- a/docs/reference/explanatoryFeatures.html +++ /dev/null @@ -1,204 +0,0 @@ - - - - - - - - -explanatoryFeatures — explanatoryFeatures • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Extract explanatory features from modelling results.

    -
    - -
    explanatoryFeatures(x, ...)
    -
    -# S4 method for Univariate
    -explanatoryFeatures(x, threshold = 0.05, ...)
    -
    -# S4 method for RandomForest
    -explanatoryFeatures(x, metric = "FalsePositiveRate", threshold = 0.05)
    -
    -# S4 method for list
    -explanatoryFeatures(x, threshold = 0.05, ...)
    -
    -# S4 method for Analysis
    -explanatoryFeatures(x, threshold = 0.05, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    x

    S4 object of class RandomForest or Univariate

    ...

    arguments to parse to method for specific class

    threshold

    threshold below which explanatory features are extracted

    metric

    importance metric on which to retrieve explanatory feautres

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/exportParameters.html b/docs/reference/exportParameters.html deleted file mode 100644 index 923a27d7..00000000 --- a/docs/reference/exportParameters.html +++ /dev/null @@ -1,192 +0,0 @@ - - - - - - - - -exportParameters — exportParameters • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Export analysis parameters from AnalysisParameters or -Analysis objects to YAML format.

    -
    - -
    exportParameters(x, file = "analysis_parameters.yaml")
    -
    -# S4 method for AnalysisParameters
    -exportParameters(x, file = "analysis_parameters.yaml")
    -
    -# S4 method for Analysis
    -exportParameters(x, file = "analysis_parameters.yaml")
    - -

    Arguments

    - - - - - - - - - - -
    x

    S4 object of class AnalysisParameters or Analysis

    file

    File name and path to export to

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/features.html b/docs/reference/features.html deleted file mode 100644 index dd0a3f8e..00000000 --- a/docs/reference/features.html +++ /dev/null @@ -1,194 +0,0 @@ - - - - - - - - -features — features • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return a vector of the feature names.

    -
    - -
    features(x, ...)
    -
    -# S4 method for AnalysisData
    -features(x)
    -
    -# S4 method for Analysis
    -features(x, type = "raw")
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    S4 object of class AnalysisData or Analysis

    ...

    arguments to pass to the appropriate method

    type

    return features from "raw" or "pre-treated" data

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/figures/README-feature_plot-1.png b/docs/reference/figures/README-feature_plot-1.png deleted file mode 100644 index d9724559..00000000 Binary files a/docs/reference/figures/README-feature_plot-1.png and /dev/null differ diff --git a/docs/reference/figures/README-pca-1.png b/docs/reference/figures/README-pca-1.png deleted file mode 100644 index 2445ed52..00000000 Binary files a/docs/reference/figures/README-pca-1.png and /dev/null differ diff --git a/docs/reference/figures/README-rf_heatmap-1.png b/docs/reference/figures/README-rf_heatmap-1.png deleted file mode 100644 index a0046758..00000000 Binary files a/docs/reference/figures/README-rf_heatmap-1.png and /dev/null differ diff --git a/docs/reference/figures/README-supervised_RF-1.png b/docs/reference/figures/README-supervised_RF-1.png deleted file mode 100644 index 86497dbc..00000000 Binary files a/docs/reference/figures/README-supervised_RF-1.png and /dev/null differ diff --git a/docs/reference/importance.html b/docs/reference/importance.html deleted file mode 100644 index 9b1ce9fd..00000000 --- a/docs/reference/importance.html +++ /dev/null @@ -1,194 +0,0 @@ - - - - - - - - -importance — importance • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    return feature importance resutls from a RandomForest -or Univariate classes

    -
    - -
    importance(x)
    -
    -# S4 method for RandomForest
    -importance(x)
    -
    -# S4 method for Univariate
    -importance(x)
    -
    -# S4 method for list
    -importance(x)
    -
    -# S4 method for Analysis
    -importance(x)
    - -

    Arguments

    - - - - - - -
    x

    S4 object of class RandomForest or Univariate

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/importanceMetrics.html b/docs/reference/importanceMetrics.html deleted file mode 100644 index 328d3295..00000000 --- a/docs/reference/importanceMetrics.html +++ /dev/null @@ -1,185 +0,0 @@ - - - - - - - - -importanceMetrics — importanceMetrics • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return available importance measures from an object -of class RandomForest.

    -
    - -
    importanceMetrics(x)
    -
    -# S4 method for RandomForest
    -importanceMetrics(x)
    - -

    Arguments

    - - - - - - -
    x

    S4 object of class RandomForest

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/impute-1.png b/docs/reference/impute-1.png deleted file mode 100644 index 6e123140..00000000 Binary files a/docs/reference/impute-1.png and /dev/null differ diff --git a/docs/reference/impute-2.png b/docs/reference/impute-2.png deleted file mode 100644 index 6eb864fa..00000000 Binary files a/docs/reference/impute-2.png and /dev/null differ diff --git a/docs/reference/impute-3.png b/docs/reference/impute-3.png deleted file mode 100644 index db8e8b78..00000000 Binary files a/docs/reference/impute-3.png and /dev/null differ diff --git a/docs/reference/impute.html b/docs/reference/impute.html deleted file mode 100644 index acafa5cc..00000000 --- a/docs/reference/impute.html +++ /dev/null @@ -1,165 +0,0 @@ - -Missing data imputation — imputeAll • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Impute missing values using random forest imputation.

    -
    - -
    -
    imputeAll(d, occupancy = 2/3, parallel = "variables", seed = 1234)
    -
    -# S4 method for AnalysisData
    -imputeAll(d, occupancy = 2/3, parallel = "variables", seed = 1234)
    -
    -imputeClass(d, cls = "class", occupancy = 2/3, seed = 1234)
    -
    -# S4 method for AnalysisData
    -imputeClass(d, cls = "class", occupancy = 2/3, seed = 1234)
    -
    - -
    -

    Arguments

    -
    d
    -

    S4 object of class AnalysisData

    -
    occupancy
    -

    occupancy threshold above which missing values of a feature will be imputed

    -
    parallel
    -

    parallel type to use. See ?missForest for details

    -
    seed
    -

    random number seed

    -
    cls
    -

    info column to use for class labels

    -
    -
    -

    Value

    -

    An S4 object of class AnalysisData containing the data after imputation.

    -
    -
    -

    Details

    -

    Missing values can have an important influence on downstream analyses with zero values heavily influencing the outcomes of parametric tests. -Where and how they are imputed are important considerations and is highly related to variable occupancy. -The methods provided here allow both these aspects to be taken into account and utilise random forest imputation using the missForest package.

    -
    -
    -

    Methods

    - - -
    • imputeAll: Impute missing values across all sample features.

    • -
    • imputeClass: Impute missing values class-wise.

    • -
    - -
    -

    Examples

    -
    ## Each of the following examples shows the application of each imputation method and then 
    -## a Linear Discriminant Analysis is plotted to show it's effect on the data structure.
    -
    -## Initial example data preparation
    -library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:250],abr1$fact) %>% 
    - occupancyMaximum(occupancy = 2/3)
    -
    -d %>% 
    - plotLDA(cls = 'day')
    -
    - 
    -## Missing value imputation across all samples
    -d %>% 
    - imputeAll(parallel = 'no') %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Missing value imputation class-wise
    -d %>% 
    - imputeClass(cls = 'day') %>% 
    - plotLDA(cls = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/imputeAll.html b/docs/reference/imputeAll.html deleted file mode 100644 index ee006714..00000000 --- a/docs/reference/imputeAll.html +++ /dev/null @@ -1,195 +0,0 @@ - - - - - - - - -imputeAll — imputeAll • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Impute missing values across all samples using Random Forest.

    -
    - -
    imputeAll(d, occupancy = 2/3, parallel = "variables", seed = 1234)
    -
    -# S4 method for AnalysisData
    -imputeAll(d, occupancy = 2/3, parallel = "variables", seed = 1234)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    occupancy

    occupancy threshold for imputation of a given feature

    parallel

    parallel type to use. See ?missForest for details

    seed

    random number seed

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/imputeClass.html b/docs/reference/imputeClass.html deleted file mode 100644 index da93864e..00000000 --- a/docs/reference/imputeClass.html +++ /dev/null @@ -1,195 +0,0 @@ - - - - - - - - -imputeClass — imputeClass • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Impute missing values class-wise using Random Forest.

    -
    - -
    imputeClass(d, cls = "class", occupancy = 2/3, seed = 1234)
    -
    -# S4 method for AnalysisData
    -imputeClass(d, cls = "class", occupancy = 2/3, seed = 1234)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    cls

    info column to use for class labels

    occupancy

    occupancy threshold for imputation

    seed

    random number seed

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/index.html b/docs/reference/index.html deleted file mode 100644 index 9ec7f8ff..00000000 --- a/docs/reference/index.html +++ /dev/null @@ -1,317 +0,0 @@ - -Function reference • metabolyseR - - -
    -
    - - - -
    -
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -

    Analysis S4 classes

    -

    -
    -

    Analysis-class

    -

    Analysis S4 class

    -

    AnalysisData-class

    -

    AnalysisData S4 class

    -

    metabolyse() reAnalyse()

    -

    Perform an analysis

    -

    analysisData()

    -

    AnalysisData class constructor

    -

    dat() `dat<-`() sinfo() `sinfo<-`() raw() `raw<-`() preTreated() `preTreated<-`() features() nSamples() nFeatures() analysisResults()

    -

    AnalysisData and Analysis class accessors

    -

    clsAdd() clsArrange() clsAvailable() clsExtract() clsRemove() clsRename() clsReplace()

    -

    Sample meta information wrangling

    -

    Analysis parameters

    -

    -
    -

    AnalysisParameters-class

    -

    AnalysisParameters S4 class

    -

    analysisElements()

    -

    Analysis elements

    -

    analysisParameters()

    -

    Create an AnalysisParameters S4 class object

    -

    parameters() `parameters<-`()

    -

    Get or set analysis parameters

    -

    `changeParameter<-`()

    -

    Change analysis parameters

    -

    parseParameters() exportParameters()

    -

    Parse/export analysis parameters

    -

    preTreatmentElements() preTreatmentMethods() preTreatmentParameters()

    -

    Pre-treatment parameters

    -

    modellingMethods() modellingParameters()

    -

    Modelling parameters

    -

    correlationsParameters()

    -

    Correlations parameters

    -

    Pre-treatment

    -

    -
    -

    aggregateMean() aggregateMedian() aggregateSum()

    -

    Sample aggregation

    -

    correctionCenter()

    -

    Batch/block correction

    -

    imputeAll() imputeClass()

    -

    Missing data imputation

    -

    keepClasses() keepFeatures() keepSamples()

    -

    Keep samples, classes or features

    -

    occupancyMaximum() occupancyMinimum()

    -

    Feature occupancy filtering

    -

    QCimpute() QCoccupancy() QCremove() QCrsdFilter()

    -

    Quality control (QC) sample treatments

    -

    removeClasses() removeFeatures() removeSamples()

    -

    Remove samples, classes or features

    -

    transformArcSine() transformAuto() transformCenter() transformLevel() transformLn() transformLog10() transformPareto() transformRange() transformSQRT() transformTICnorm() transformVast()

    -

    Scaling, transformation and normalisation methods

    -

    Modelling

    -

    -
    -

    RandomForest-class

    -

    RandomForest S4 class

    -

    Univariate-class

    -

    Univariate S4 class

    -

    randomForest()

    -

    Random forest analysis

    -

    anova()

    -

    ANOVA

    -

    ttest()

    -

    Welch's t-test

    -

    linearRegression()

    -

    Linear regression

    -

    binaryComparisons() type() response() metrics() importanceMetrics() importance() proximity() explanatoryFeatures()

    -

    Modelling accessor methods

    -

    mds()

    -

    Multidimensional scaling (MDS)

    -

    roc()

    -

    Receiver-operator characteristic (ROC) curves

    -

    Correlations

    -

    -
    -

    correlations()

    -

    Feature correlation analysis

    -

    Plotting

    -

    -
    -

    plotFeature()

    -

    Plot a feature

    -

    plotOccupancy()

    -

    Plot class occupancy distributions

    -

    plotRSD()

    -

    Plot RSD distributions

    -

    plotTIC()

    -

    Plot sample total ion counts

    -

    plotPCA()

    -

    Principle Component Analysis plot

    -

    plotLDA()

    -

    Principle Component - Linear Discriminant Analysis plot

    -

    plotUnsupervisedRF()

    -

    Unsupervised random forest MDS plot

    -

    plotSupervisedRF()

    -

    Supervised random forest MDS plot

    -

    plotMDS()

    -

    Multidimensional scaling (MDS) plot

    -

    plotROC()

    -

    Plot receiver operator characteristic (ROC) curves

    -

    plotMetrics()

    -

    Plot model performance metrics

    -

    plotImportance()

    -

    Plot feature importance

    -

    plotExplanatoryHeatmap()

    -

    Heatmap plot of explantory features

    -

    Miscellaneous

    -

    -
    -

    bindRows()

    -

    Bind AnalysisData objects by row

    -

    split()

    -

    Split an AnalysisData object

    -

    rsd()

    -

    Calculate feature relative standard deviations

    -

    occupancy()

    -

    Calculate feature class occupancies

    - - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/io-parameters.html b/docs/reference/io-parameters.html deleted file mode 100644 index 090a3493..00000000 --- a/docs/reference/io-parameters.html +++ /dev/null @@ -1,162 +0,0 @@ - -Parse/export analysis parameters — parseParameters • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Import analysis parameters from a .yaml format file or export an AnalysisParameters object to .yaml format.

    -
    - -
    -
    parseParameters(path)
    -
    -exportParameters(d, file = "analysis_parameters.yaml")
    -
    -# S4 method for AnalysisParameters
    -exportParameters(d, file = "analysis_parameters.yaml")
    -
    -# S4 method for Analysis
    -exportParameters(d, file = "analysis_parameters.yaml")
    -
    - -
    -

    Arguments

    -
    path
    -

    file path of .yaml file to parse

    -
    d
    -

    S4 object of class AnalysisParameters or Analysis

    -
    file
    -

    File name and path to export to

    -
    - -
    -

    Examples

    -
    ## Import analysis parameters
    -paramFile <- system.file('defaultParameters.yaml',package = 'metabolyseR')
    -p <- parseParameters(paramFile)
    -p
    -#> Parameters:
    -#> pre-treatment
    -#> 	QC
    -#> 		occupancyFilter
    -#> 			cls = class
    -#> 			QCidx = QC
    -#> 			occupancy = 0.667
    -#> 		impute
    -#> 			cls = class
    -#> 			QCidx = QC
    -#> 			occupancy = 0.667
    -#> 		RSDfilter
    -#> 			cls = class
    -#> 			QCidx = QC
    -#> 			RSDthresh = 0.5
    -#> 		removeQC
    -#> 			cls = class
    -#> 			QCidx = QC
    -#> 	occupancyFilter
    -#> 		maximum
    -#> 			cls = class
    -#> 			occupancy = 0.667
    -#> 	impute
    -#> 		class
    -#> 			cls = class
    -#> 			occupancy = 0.667
    -#> 	transform
    -#> 		TICnorm
    -#> 
    -#> correlations
    -#> 	method = pearson
    -#> 	pAdjustMethod = bonferroni
    -#> 	corPvalue = 0.05
    -
    -if (FALSE) {
    -## Export analysis parameters
    -exportParameters(p,file = 'analysis_parameters.yaml')
    -}
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/keep.html b/docs/reference/keep.html deleted file mode 100644 index b0586288..00000000 --- a/docs/reference/keep.html +++ /dev/null @@ -1,178 +0,0 @@ - -Keep samples, classes or features — keepClasses • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Retain samples, classes or features in an AnalysisData object.

    -
    - -
    -
    keepClasses(d, cls = "class", classes = c())
    -
    -# S4 method for AnalysisData
    -keepClasses(d, cls = "class", classes = c())
    -
    -keepFeatures(d, features = character())
    -
    -# S4 method for AnalysisData
    -keepFeatures(d, features = character())
    -
    -keepSamples(d, idx = "fileOrder", samples = c())
    -
    -# S4 method for AnalysisData
    -keepSamples(d, idx = "fileOrder", samples = c())
    -
    - -
    -

    Arguments

    -
    d
    -

    S4 object of class AnalysisData

    -
    cls
    -

    info column to use for class information

    -
    classes
    -

    classes to keep

    -
    features
    -

    features to remove

    -
    idx
    -

    info column containing sample indexes

    -
    samples
    -

    sample indexes to keep

    -
    -
    -

    Value

    -

    An S4 object of class AnalysisData with specified samples, classes or features retained.

    -
    -
    -

    Methods

    - - -
    • keepClasses: Keep classes.

    • -
    • keepFeatures: Keep features.

    • -
    • keepSamples: Keep samples.

    • -
    - -
    -

    Examples

    -
    library(metaboData)
    - d <- analysisData(abr1$neg[,200:300],abr1$fact)
    - 
    - ## Keep classes
    - d %>% 
    -  keepClasses(cls = 'day',classes = 'H')
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 20 
    -#> Features: 101 
    -#> Info: 9 
    -#> 
    - 
    - ## Keep features
    - d %>% 
    -  keepFeatures(features = c('N200','N201'))
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 120 
    -#> Features: 2 
    -#> Info: 9 
    -#> 
    - 
    - ## Keep samples
    - d %>% 
    -  keepSamples(idx = 'injorder',samples = c(1,10))
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 2 
    -#> Features: 101 
    -#> Info: 9 
    -#> 
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/keepClasses.html b/docs/reference/keepClasses.html deleted file mode 100644 index 09131c08..00000000 --- a/docs/reference/keepClasses.html +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - -keepClasses — keepClasses • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Keep classes from an AnalysisData object.

    -
    - -
    keepClasses(d, cls = "class", classes = c())
    -
    -# S4 method for AnalysisData
    -keepClasses(d, cls = "class", classes = c())
    - -

    Arguments

    - - - - - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    cls

    info column to use for class information

    classes

    classes to keep

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/keepFeatures.html b/docs/reference/keepFeatures.html deleted file mode 100644 index 0487e78d..00000000 --- a/docs/reference/keepFeatures.html +++ /dev/null @@ -1,187 +0,0 @@ - - - - - - - - -keepFeatures — keepFeatures • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Keep features from an AnalysisData object.

    -
    - -
    keepFeatures(d, features = character())
    -
    -# S4 method for AnalysisData
    -keepFeatures(d, features = character())
    - -

    Arguments

    - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    features

    features to keep

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/keepSamples.html b/docs/reference/keepSamples.html deleted file mode 100644 index 79fddf12..00000000 --- a/docs/reference/keepSamples.html +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - -keepSamples — keepSamples • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Keep samples from an AnalysisData object.

    -
    - -
    keepSamples(d, idx = "fileOrder", samples = c())
    -
    -# S4 method for AnalysisData
    -keepSamples(d, idx = "fileOrder", samples = c())
    - -

    Arguments

    - - - - - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    idx

    info column containing sample indexes

    samples

    sample indexes to keep

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/linearRegression.html b/docs/reference/linearRegression.html deleted file mode 100644 index a3b45613..00000000 --- a/docs/reference/linearRegression.html +++ /dev/null @@ -1,150 +0,0 @@ - -Linear regression — linearRegression • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Linear regression

    -
    - -
    -
    linearRegression(
    -  x,
    -  cls = "class",
    -  pAdjust = "bonferroni",
    -  returnModels = FALSE
    -)
    -
    -# S4 method for AnalysisData
    -linearRegression(
    -  x,
    -  cls = "class",
    -  pAdjust = "bonferroni",
    -  returnModels = FALSE
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class AnalysisData

    -
    cls
    -

    vector of sample information column names to regress

    -
    pAdjust
    -

    p value adjustment method

    -
    returnModels
    -

    should models be returned

    -
    -
    -

    Value

    -

    An S4 object of class Univariate.

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    -
    -## Perform linear regression
    -lr_analysis <- linearRegression(d,cls = 'injorder')
    -
    -## Extract significant features
    -explanatoryFeatures(lr_analysis)
    -#> # A tibble: 5 × 15
    -#>   Response Feature r.squared adj.r.squared sigma statistic  p.value    df logLik
    -#>   <chr>    <chr>       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl>
    -#> 1 injorder N283        0.310        0.304   4.27      53.0 4.10e-11     1  -343.
    -#> 2 injorder N221        0.140        0.133   5.87      19.3 2.50e- 5     1  -382.
    -#> 3 injorder N255        0.119        0.111  11.1       15.9 1.17e- 4     1  -458.
    -#> 4 injorder N267        0.118        0.111  26.4       15.8 1.22e- 4     1  -562.
    -#> 5 injorder N297        0.107        0.0995 44.7       14.1 2.65e- 4     1  -625.
    -#> # … with 6 more variables: AIC <dbl>, BIC <dbl>, deviance <dbl>,
    -#> #   df.residual <int>, nobs <int>, adjusted.p.value <dbl>
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/mds.html b/docs/reference/mds.html deleted file mode 100644 index 09ace6e1..00000000 --- a/docs/reference/mds.html +++ /dev/null @@ -1,148 +0,0 @@ - -Multidimensional scaling (MDS) — mds • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Multidimensional scaling of random forest proximities.

    -
    - -
    -
    mds(x, dimensions = 2, idx = NULL)
    -
    -# S4 method for RandomForest
    -mds(x, dimensions = 2, idx = NULL)
    -
    -# S4 method for list
    -mds(x, dimensions = 2, idx = NULL)
    -
    -# S4 method for Analysis
    -mds(x, dimensions = 2, idx = NULL)
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class RandomForest, Analysis or a list

    -
    dimensions
    -

    The number of dimensions by which the data are to be represented.

    -
    idx
    -

    sample information column to use for sample names. If NULL, the sample row number will be used. Sample names should be unique for each row of data.

    -
    -
    -

    Value

    -

    A tibble containing the scaled dimensions.

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
    -       occupancyMaximum(cls = 'day') %>%
    -       transformTICnorm()
    -       
    -rf <- randomForest(x,cls = 'day')
    -
    -mds(rf)
    -#> # A tibble: 120 × 5
    -#>    Response Comparison  Sample `Dimension 1` `Dimension 2`
    -#>    <chr>    <chr>        <dbl>         <dbl>         <dbl>
    -#>  1 day      1~2~3~4~5~H      1       -0.0129       -0.190 
    -#>  2 day      1~2~3~4~5~H      2       -0.101        -0.254 
    -#>  3 day      1~2~3~4~5~H      3       -0.0156        0.173 
    -#>  4 day      1~2~3~4~5~H      4       -0.0896        0.147 
    -#>  5 day      1~2~3~4~5~H      5        0.146        -0.0566
    -#>  6 day      1~2~3~4~5~H      6       -0.132         0.0946
    -#>  7 day      1~2~3~4~5~H      7       -0.0862       -0.195 
    -#>  8 day      1~2~3~4~5~H      8        0.144        -0.0917
    -#>  9 day      1~2~3~4~5~H      9        0.0408       -0.110 
    -#> 10 day      1~2~3~4~5~H     10       -0.146         0.155 
    -#> # … with 110 more rows
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/metabolyse.html b/docs/reference/metabolyse.html deleted file mode 100644 index 51990aad..00000000 --- a/docs/reference/metabolyse.html +++ /dev/null @@ -1,224 +0,0 @@ - -Perform an analysis — metabolyse • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Perform analyses containing multiple analysis element steps.

    -
    - -
    -
    metabolyse(data, info, parameters = analysisParameters(), verbose = TRUE)
    -
    -reAnalyse(analysis, parameters = analysisParameters(), verbose = TRUE)
    -
    -# S4 method for Analysis
    -reAnalyse(analysis, parameters = analysisParameters(), verbose = TRUE)
    -
    - -
    -

    Arguments

    -
    data
    -

    tibble or data.frame containing data to analyse

    -
    info
    -

    tibble or data.frame containing data info or meta data

    -
    parameters
    -

    an object of AnalysisParameters class containing -parameters for analysis. Default calls analysisParameters()

    -
    verbose
    -

    should output be printed to the console

    -
    analysis
    -

    an object of class Analysis containing previous -analysis results

    -
    -
    -

    Value

    -

    An S4 object of class Analysis.

    -
    -
    -

    Details

    -

    Routine analyses are those that are often made up of numerous steps where parameters have likely already been previously established. -The emphasis here is on convenience with as little code as possible required. -In these analyses, the necessary analysis elements, order and parameters are first prepared and then the analysis routine subsequently performed in a single step. -The metabolyse function provides this utility, where the metabolome data, sample meta information and analysis parameters are provided. -The reAnalyse method can be used to perform further analyses on the results.

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -## Generate analysis parameters
    -p <- analysisParameters(c('pre-treatment','modelling'))
    -
    -## Alter pre-treatment and modelling parameters to use different methods
    -parameters(p,'pre-treatment') <- preTreatmentParameters(
    -  list(occupancyFilter = 'maximum',
    -       transform = 'TICnorm')
    -)
    -parameters(p,'modelling') <- modellingParameters('anova')
    -
    -## Change "cls" parameters 
    -changeParameter(p,'cls') <- 'day'
    -
    -## Run analysis using a subset of the abr1 negative mode data set
    -analysis <- metabolyse(abr1$neg[,1:200],
    -                       abr1$fact,
    -                       p)
    -#> 
    -#> metabolyseR  v0.14.9 Thu Jan 27 11:58:32 2022
    -#> ________________________________________________________________________________
    -#> Parameters:
    -#> pre-treatment
    -#> 	occupancyFilter
    -#> 		maximum
    -#> 			cls = day
    -#> 			occupancy = 2/3
    -#> 	transform
    -#> 		TICnorm
    -#> 
    -#> modelling
    -#> 	anova
    -#> 		cls = day
    -#> 		pAdjust = bonferroni
    -#> 		comparisons = list()
    -#> 		returnModels = FALSE
    -#> ________________________________________________________________________________
    -#> Pre-treatment 
    -#> 
    Pre-treatment 	 [0.6S]
    -#> Modelling 
    -#> 
    Modelling 	 [0.7S]
    -#> ________________________________________________________________________________
    -#> 
    -#> Complete! [1.3S]
    -                       
    -## Re-analyse to include correlation analysis
    -analysis <- reAnalyse(analysis,
    -                      parameters = analysisParameters('correlations'))
    -#> 
    -#> metabolyseR v0.14.9 Thu Jan 27 11:58:33 2022
    -#> ________________________________________________________________________________
    -#> Parameters:
    -#> correlations
    -#> 	method = pearson
    -#> 	pAdjustMethod = bonferroni
    -#> 	corPvalue = 0.05
    -#> ________________________________________________________________________________
    -#> 
    -#> Correlations 
    -#> 
    Correlations 	 [0.1S]
    -#> ________________________________________________________________________________
    -#> 
    -#> Complete! [0.1S]
    -#> 
    -
    -print(analysis)
    -#> 
    -#> metabolyseR v0.14.9
    -#> Analysis:
    -#> 	Thu Jan 27 11:58:32 2022
    -#> 
    -#> 	Raw Data:
    -#> 		No. samples = 120
    -#> 		No. features = 200
    -#> 
    -#> 	Pre-treated Data:
    -#> 		Thu Jan 27 11:58:33 2022
    -#> 		No. samples = 120
    -#> 		No. features = 48
    -#> 
    -#> 	Modelling:
    -#> 		Thu Jan 27 11:58:33 2022
    -#> 		Methods: anova
    -#> 
    -#> 	Correlations:
    -#> 		Thu Jan 27 11:58:33 2022
    -#> 		No. correlations = 140
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/metrics.html b/docs/reference/metrics.html deleted file mode 100644 index 820e6f8a..00000000 --- a/docs/reference/metrics.html +++ /dev/null @@ -1,186 +0,0 @@ - - - - - - - - -metrics — metrics • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    return metrics results from a RandomForest object

    -
    - -
    metrics(x)
    -
    -# S4 method for RandomForest
    -metrics(x)
    -
    -# S4 method for list
    -metrics(x)
    - -

    Arguments

    - - - - - - -
    x

    S4 object of class RandomForest

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/modelling-accessors.html b/docs/reference/modelling-accessors.html deleted file mode 100644 index cfa0f6ae..00000000 --- a/docs/reference/modelling-accessors.html +++ /dev/null @@ -1,286 +0,0 @@ - -Modelling accessor methods — binaryComparisons • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Methods for accessing modelling results.

    -
    - -
    -
    binaryComparisons(x, cls = "class")
    -
    -# S4 method for AnalysisData
    -binaryComparisons(x, cls = "class")
    -
    -type(x)
    -
    -# S4 method for RandomForest
    -type(x)
    -
    -response(x)
    -
    -# S4 method for RandomForest
    -response(x)
    -
    -metrics(x)
    -
    -# S4 method for RandomForest
    -metrics(x)
    -
    -# S4 method for list
    -metrics(x)
    -
    -# S4 method for Analysis
    -metrics(x)
    -
    -importanceMetrics(x)
    -
    -# S4 method for RandomForest
    -importanceMetrics(x)
    -
    -importance(x)
    -
    -# S4 method for RandomForest
    -importance(x)
    -
    -# S4 method for Univariate
    -importance(x)
    -
    -# S4 method for list
    -importance(x)
    -
    -# S4 method for Analysis
    -importance(x)
    -
    -proximity(x, idx = NULL)
    -
    -# S4 method for RandomForest
    -proximity(x, idx = NULL)
    -
    -# S4 method for list
    -proximity(x, idx = NULL)
    -
    -# S4 method for Analysis
    -proximity(x, idx = NULL)
    -
    -explanatoryFeatures(x, ...)
    -
    -# S4 method for Univariate
    -explanatoryFeatures(x, threshold = 0.05)
    -
    -# S4 method for RandomForest
    -explanatoryFeatures(x, metric = "FalsePositiveRate", threshold = 0.05)
    -
    -# S4 method for list
    -explanatoryFeatures(x, ...)
    -
    -# S4 method for Analysis
    -explanatoryFeatures(x, ...)
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class AnalysisData,RandomForest, Univariate, Analysis or a list.

    -
    cls
    -

    sample information column to use

    -
    idx
    -

    sample information column to use for sample names. If NULL, the sample row number will be used. Sample names should be unique for each row of data.

    -
    ...
    -

    arguments to parse to method for specific class

    -
    threshold
    -

    threshold below which explanatory features are extracted

    -
    metric
    -

    importance metric for which to retrieve explanatory features

    -
    -
    -

    Methods

    - - -
    • binaryComparisons: Return a vector of all possible binary comparisons for a given sample information column.

    • -
    • type: Return the type of random forest analysis.

    • -
    • response: Return the response variable name used for a random forest analysis.

    • -
    • metrics: Retrieve the model performance metrics for a random forest analysis

    • -
    • importanceMetrics: Retrieve the available feature importance metrics for a random forest analysis.

    • -
    • importance: Retrieve feature importance results.

    • -
    • proximity: Retrieve the random forest sample proximities.

    • -
    • explanatoryFeatures: Retrieve explanatory features.

    • -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    -
    -## Return possible binary comparisons for the 'day' column
    -binaryComparisons(d,cls = 'day')
    -#>  [1] "1~2" "1~3" "1~4" "1~5" "1~H" "2~3" "2~4" "2~5" "2~H" "3~4" "3~5" "3~H"
    -#> [13] "4~5" "4~H" "5~H"
    -
    -## Perform random forest analysis
    -rf_analysis <- randomForest(d,cls = 'day')
    -
    -## Return the type of random forest
    -type(rf_analysis)
    -#> [1] "classification"
    -
    -## Return the response variable name used
    -response(rf_analysis)
    -#> [1] "day"
    -
    -## Retrieve the model performance metrics
    -metrics(rf_analysis)
    -#> # A tibble: 4 × 5
    -#>   Response Comparison  .metric  .estimator .estimate
    -#>   <chr>    <chr>       <chr>    <chr>          <dbl>
    -#> 1 day      1~2~3~4~5~H accuracy multiclass    0.567 
    -#> 2 day      1~2~3~4~5~H kap      multiclass    0.48  
    -#> 3 day      1~2~3~4~5~H roc_auc  hand_till     0.886 
    -#> 4 day      1~2~3~4~5~H margin   NA            0.0424
    -
    -## Show the available feature importance metrics
    -importanceMetrics(rf_analysis)
    -#>  [1] "1"                    "2"                    "3"                   
    -#>  [4] "4"                    "5"                    "FalsePositiveRate"   
    -#>  [7] "H"                    "MeanDecreaseAccuracy" "MeanDecreaseGini"    
    -#> [10] "SelectionFrequency"  
    -
    -## Retrieve the feature importance results
    -importance(rf_analysis)
    -#> # A tibble: 1,010 × 5
    -#>    Response Comparison  Feature Metric                  Value
    -#>    <chr>    <chr>       <chr>   <chr>                   <dbl>
    -#>  1 day      1~2~3~4~5~H N200    1                    0       
    -#>  2 day      1~2~3~4~5~H N200    2                    0       
    -#>  3 day      1~2~3~4~5~H N200    3                    0       
    -#>  4 day      1~2~3~4~5~H N200    4                    0       
    -#>  5 day      1~2~3~4~5~H N200    5                    0       
    -#>  6 day      1~2~3~4~5~H N200    FalsePositiveRate    2.35e-40
    -#>  7 day      1~2~3~4~5~H N200    H                    0       
    -#>  8 day      1~2~3~4~5~H N200    MeanDecreaseAccuracy 0       
    -#>  9 day      1~2~3~4~5~H N200    MeanDecreaseGini     6.00e- 2
    -#> 10 day      1~2~3~4~5~H N200    SelectionFrequency   1.6 e+ 1
    -#> # … with 1,000 more rows
    -
    -## Retrieve the sample proximities
    -proximity(rf_analysis)
    -#> # A tibble: 14,400 × 5
    -#>    Response Comparison  Sample1 Sample2 Proximity
    -#>    <chr>    <chr>         <int>   <dbl>     <dbl>
    -#>  1 day      1~2~3~4~5~H       1       1    1     
    -#>  2 day      1~2~3~4~5~H       1       2    0.0704
    -#>  3 day      1~2~3~4~5~H       1       3    0.0580
    -#>  4 day      1~2~3~4~5~H       1       4    0.0930
    -#>  5 day      1~2~3~4~5~H       1       5    0.0556
    -#>  6 day      1~2~3~4~5~H       1       6    0.0435
    -#>  7 day      1~2~3~4~5~H       1       7    0.0556
    -#>  8 day      1~2~3~4~5~H       1       8    0.0441
    -#>  9 day      1~2~3~4~5~H       1       9    0.106 
    -#> 10 day      1~2~3~4~5~H       1      10    0     
    -#> # … with 14,390 more rows
    -
    -## Retrieve the explanatory features
    -explanatoryFeatures(rf_analysis,metric = 'FalsePositiveRate',threshold = 0.05)
    -#> # A tibble: 35 × 5
    -#>    Response Comparison  Feature Metric                Value
    -#>    <chr>    <chr>       <chr>   <chr>                 <dbl>
    -#>  1 day      1~2~3~4~5~H N229    FalsePositiveRate 5.75e-129
    -#>  2 day      1~2~3~4~5~H N259    FalsePositiveRate 4.88e- 72
    -#>  3 day      1~2~3~4~5~H N277    FalsePositiveRate 3.98e- 67
    -#>  4 day      1~2~3~4~5~H N255    FalsePositiveRate 3.27e- 53
    -#>  5 day      1~2~3~4~5~H N213    FalsePositiveRate 4.92e- 45
    -#>  6 day      1~2~3~4~5~H N200    FalsePositiveRate 2.35e- 40
    -#>  7 day      1~2~3~4~5~H N221    FalsePositiveRate 1.80e- 38
    -#>  8 day      1~2~3~4~5~H N299    FalsePositiveRate 4.91e- 36
    -#>  9 day      1~2~3~4~5~H N245    FalsePositiveRate 9.75e- 27
    -#> 10 day      1~2~3~4~5~H N279    FalsePositiveRate 2.38e- 20
    -#> # … with 25 more rows
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/modelling-parameters.html b/docs/reference/modelling-parameters.html deleted file mode 100644 index 793ca0f3..00000000 --- a/docs/reference/modelling-parameters.html +++ /dev/null @@ -1,131 +0,0 @@ - -Modelling parameters — modellingMethods • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Retrieve the available modelling methods and parameters.

    -
    - -
    -
    modellingMethods()
    -
    -modellingParameters(methods)
    -
    - -
    -

    Arguments

    -
    methods
    -

    character vector of available modelling methods

    -
    - -
    -

    Examples

    -
    ## Retrieve the available modelling methods
    -modellingMethods()
    -#> [1] "anova"            "ttest"            "linearRegression" "randomForest"    
    -
    -## Retrieve the modelling parameters for the anova method
    -p <- modellingParameters('anova')
    -
    -## Assign the modelling parameters to analysis parameters
    -mp <- analysisParameters('modelling')
    -
    -parameters(mp,'modelling') <- p
    -
    -print(mp)
    -#> Parameters:
    -#> modelling
    -#> 	anova
    -#> 		cls = class
    -#> 		pAdjust = bonferroni
    -#> 		comparisons = list()
    -#> 		returnModels = FALSE
    -#> 
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/modellingMethods.html b/docs/reference/modellingMethods.html deleted file mode 100644 index 54cd67e1..00000000 --- a/docs/reference/modellingMethods.html +++ /dev/null @@ -1,172 +0,0 @@ - - - - - - - - -modellingMethods — modellingMethods • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return names of available modelling methods.

    -
    - -
    modellingMethods()
    - - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/modellingParameters.html b/docs/reference/modellingParameters.html deleted file mode 100644 index 341c875d..00000000 --- a/docs/reference/modellingParameters.html +++ /dev/null @@ -1,185 +0,0 @@ - - - - - - - - -modellingParameters — modellingParameters • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return default parameters for a given modelling method.

    -
    - -
    modellingParameters(methods)
    - -

    Arguments

    - - - - - - -
    methods

    character vector of available methods. -Use modellingMethods() to see available methods.

    - - -

    Examples

    -
    p <- analysisParameters() -parameters(p,'modelling') <- modellingParameters('anova') -
    -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/nFeatures.html b/docs/reference/nFeatures.html deleted file mode 100644 index beb88bd8..00000000 --- a/docs/reference/nFeatures.html +++ /dev/null @@ -1,194 +0,0 @@ - - - - - - - - -nFeatures — nFeatures • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return the number of features.

    -
    - -
    nFeatures(x, ...)
    -
    -# S4 method for AnalysisData
    -nFeatures(x)
    -
    -# S4 method for Analysis
    -nFeatures(x, type = "raw")
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    S4 object of class AnalysisData or Analysis

    ...

    arguments to pass to the appropriate method

    type

    return features from "raw" or "pre-treated" data

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/nSamples.html b/docs/reference/nSamples.html deleted file mode 100644 index f313c8de..00000000 --- a/docs/reference/nSamples.html +++ /dev/null @@ -1,194 +0,0 @@ - - - - - - - - -nSamples — nSamples • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return the number of samoles.

    -
    - -
    nSamples(x, ...)
    -
    -# S4 method for AnalysisData
    -nSamples(x)
    -
    -# S4 method for Analysis
    -nSamples(x, type = "raw")
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    S4 object of class AnalysisData or Analysis

    ...

    arguments to pass to the appropriate method

    type

    return features from "raw" or "pre-treated" data

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/occupancy.html b/docs/reference/occupancy.html deleted file mode 100644 index e8e84334..00000000 --- a/docs/reference/occupancy.html +++ /dev/null @@ -1,136 +0,0 @@ - -Calculate feature class occupancies — occupancy • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Calculate the class occupancies of all features in an AnalysisData object.

    -
    - -
    -
    occupancy(d, cls = "class")
    -
    -# S4 method for AnalysisData
    -occupancy(d, cls = "class")
    -
    - -
    -

    Arguments

    -
    d
    -

    S4 object of class AnalysisData

    -
    cls
    -

    sample information column to use for which to compute class occupancies

    -
    -
    -

    Value

    -

    A tibble containing feature class proportional occupancies.

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    -
    -occupancy(d,cls = 'day')
    -#> # A tibble: 596 × 5
    -#>    day   Feature     N `Class total` Occupancy
    -#>    <fct> <chr>   <dbl>         <int>     <dbl>
    -#>  1 1     N200        1            20      0.05
    -#>  2 1     N201        3            20      0.15
    -#>  3 1     N202        3            20      0.15
    -#>  4 1     N203       19            20      0.95
    -#>  5 1     N204        4            20      0.2 
    -#>  6 1     N205       17            20      0.85
    -#>  7 1     N206        4            20      0.2 
    -#>  8 1     N207        8            20      0.4 
    -#>  9 1     N208        7            20      0.35
    -#> 10 1     N209       16            20      0.8 
    -#> # … with 586 more rows
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/occupancyFilter-1.png b/docs/reference/occupancyFilter-1.png deleted file mode 100644 index 41ade7d0..00000000 Binary files a/docs/reference/occupancyFilter-1.png and /dev/null differ diff --git a/docs/reference/occupancyFilter-2.png b/docs/reference/occupancyFilter-2.png deleted file mode 100644 index d53959c0..00000000 Binary files a/docs/reference/occupancyFilter-2.png and /dev/null differ diff --git a/docs/reference/occupancyFilter.html b/docs/reference/occupancyFilter.html deleted file mode 100644 index ae168a37..00000000 --- a/docs/reference/occupancyFilter.html +++ /dev/null @@ -1,158 +0,0 @@ - -Feature occupancy filtering — occupancyMaximum • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Feature filtering based on class occupancy.

    -
    - -
    -
    occupancyMaximum(d, cls = "class", occupancy = 2/3)
    -
    -# S4 method for AnalysisData
    -occupancyMaximum(d, cls = "class", occupancy = 2/3)
    -
    -occupancyMinimum(d, cls = "class", occupancy = 2/3)
    -
    -# S4 method for AnalysisData
    -occupancyMinimum(d, cls = "class", occupancy = 2/3)
    -
    - -
    -

    Arguments

    -
    d
    -

    S4 object of class AnalysisData

    -
    cls
    -

    sample information column name to use for class data

    -
    occupancy
    -

    feature occupancy filtering threshold, below which features will be removed

    -
    -
    -

    Value

    -

    An S4 object of class AnalysisData containing the class occupancy filtered data.

    -
    -
    -

    Details

    -

    Occupancy provides a useful metric by which to filter poorly represented features (features containing a majority zero or missing values). -An occupancy threshold provides a means of specifying this majority with variables below the threshold excluded from further analyses. -However, this can be complicated by an underlying class structure present within the data where a variable may be well represented within one class but not in another.

    -
    -
    -

    Methods

    - - -
    • occupancyMaximium: Maximum occupancy threshold feature filtering. Where the maximum occupancy across all classes is above the threshold. Therefore, for a feature to be retained, only a single class needs to have an occupancy above the threshold.

    • -
    • occupancyMinimum: Minimum occupancy threshold feature filtering. Where the minimum occupancy across all classes is required to be above the threshold. Therefore, for a feature to be retained, all classes would need to have an occupancy above the threshold.

    • -
    - -
    -

    Examples

    -
    ## Each of the following examples shows the application 
    -## of the feature occupancy filtering method method and 
    -## then a Principle Component Analysis is plotted to show 
    -## its effect on the data structure.
    -
    -## Initial example data preparation
    -library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    - 
    -## Maximum occupancy threshold feature filtering
    -d %>% 
    - occupancyMaximum(cls = 'day') %>% 
    - plotPCA(cls = 'day')
    -
    - 
    -## Minimum occupancy threshold feature filtering
    -d %>% 
    - occupancyMinimum(cls = 'day') %>% 
    - plotPCA(cls = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/occupancyMaximum.html b/docs/reference/occupancyMaximum.html deleted file mode 100644 index 96bdd0ee..00000000 --- a/docs/reference/occupancyMaximum.html +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - -occupancyMaximum — occupancyMaximum • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Maximum occupancy filtering of sample data.

    -
    - -
    occupancyMaximum(dat, cls = "class", occupancy = 2/3)
    -
    -# S4 method for AnalysisData
    -occupancyMaximum(dat, cls = "class", occupancy = 2/3)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    dat

    S4 object of class Data

    cls

    info column to use for class data

    occupancy

    occupancy threshold

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/occupancyMinimum.html b/docs/reference/occupancyMinimum.html deleted file mode 100644 index bd87097c..00000000 --- a/docs/reference/occupancyMinimum.html +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - -occupancyMinimum — occupancyMinimum • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Minimum occupancy filtering of sample data.

    -
    - -
    occupancyMinimum(dat, cls = "class", occupancy = 2/3)
    -
    -# S4 method for AnalysisData
    -occupancyMinimum(dat, cls = "class", occupancy = 2/3)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    dat

    S4 object of class Data

    cls

    info column to use for class data

    occupancy

    occupancy threshold

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/parameters.html b/docs/reference/parameters.html deleted file mode 100644 index 649ac57f..00000000 --- a/docs/reference/parameters.html +++ /dev/null @@ -1,238 +0,0 @@ - -Get or set analysis parameters — parameters • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Get or set parameters for AnalysisParameters or Analysis class objects.

    -
    - -
    -
    parameters(d, ...)
    -
    -# S4 method for AnalysisParameters
    -parameters(d, element)
    -
    -# S4 method for Analysis
    -parameters(d)
    -
    -parameters(d, element) <- value
    -
    -# S4 method for AnalysisParameters
    -parameters(d, element) <- value
    -
    -# S4 method for Analysis
    -parameters(d) <- value
    -
    - -
    -

    Arguments

    -
    d
    -

    S4 object of class AnalysisParameters or Analysis

    -
    ...
    -

    arguments to pass to the appropriate method

    -
    element
    -

    analysis element for parameters to extract or assign. -Should be one of those returned by analysisElements()

    -
    value
    -

    list containing parameter values

    -
    - -
    -

    Examples

    -
    p <- analysisParameters('pre-treatment')
    -
    -## extract pre-treatment parameters
    -parameters(p,'pre-treatment')
    -#> $QC
    -#> $QC$occupancyFilter
    -#> $QC$occupancyFilter$cls
    -#> [1] "class"
    -#> 
    -#> $QC$occupancyFilter$QCidx
    -#> [1] "QC"
    -#> 
    -#> $QC$occupancyFilter$occupancy
    -#> 2/3
    -#> 
    -#> 
    -#> $QC$impute
    -#> $QC$impute$cls
    -#> [1] "class"
    -#> 
    -#> $QC$impute$QCidx
    -#> [1] "QC"
    -#> 
    -#> $QC$impute$occupancy
    -#> 2/3
    -#> 
    -#> $QC$impute$parallel
    -#> [1] "variables"
    -#> 
    -#> $QC$impute$seed
    -#> [1] 1234
    -#> 
    -#> 
    -#> $QC$RSDfilter
    -#> $QC$RSDfilter$cls
    -#> [1] "class"
    -#> 
    -#> $QC$RSDfilter$QCidx
    -#> [1] "QC"
    -#> 
    -#> $QC$RSDfilter$RSDthresh
    -#> [1] 50
    -#> 
    -#> 
    -#> $QC$removeQC
    -#> $QC$removeQC$cls
    -#> [1] "class"
    -#> 
    -#> $QC$removeQC$QCidx
    -#> [1] "QC"
    -#> 
    -#> 
    -#> 
    -#> $occupancyFilter
    -#> $occupancyFilter$maximum
    -#> $occupancyFilter$maximum$cls
    -#> [1] "class"
    -#> 
    -#> $occupancyFilter$maximum$occupancy
    -#> 2/3
    -#> 
    -#> 
    -#> 
    -#> $impute
    -#> $impute$class
    -#> $impute$class$cls
    -#> [1] "class"
    -#> 
    -#> $impute$class$occupancy
    -#> 2/3
    -#> 
    -#> $impute$class$seed
    -#> [1] 1234
    -#> 
    -#> 
    -#> 
    -#> $transform
    -#> $transform$TICnorm
    -#> named list()
    -#> 
    -#> 
    -
    -## set pre-treatment parameters
    -parameters(p,'pre-treatment') <- preTreatmentParameters(
    -  list(
    -    remove = 'classes',
    -    QC = c('RSDfilter','removeQC'),
    -    transform = 'TICnorm'
    -  )
    -)
    -
    -print(p)
    -#> Parameters:
    -#> pre-treatment
    -#> 	remove
    -#> 		classes
    -#> 			cls = class
    -#> 			classes = c()
    -#> 	QC
    -#> 		RSDfilter
    -#> 			cls = class
    -#> 			QCidx = QC
    -#> 			RSDthresh = 50
    -#> 		removeQC
    -#> 			cls = class
    -#> 			QCidx = QC
    -#> 	transform
    -#> 		TICnorm
    -#> 
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/parseParameters.html b/docs/reference/parseParameters.html deleted file mode 100644 index a134b576..00000000 --- a/docs/reference/parseParameters.html +++ /dev/null @@ -1,221 +0,0 @@ - - - - - - - - -parseParameters — parseParameters • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    parse .yaml file containing analysis parameters.

    -
    - -
    parseParameters(path)
    - -

    Arguments

    - - - - - - -
    path

    file path of .yaml file to parse

    - - -

    Examples

    -
    -paramFile <- system.file('defaultParameters.yaml',package = 'metabolyseR') -p <- parseParameters(paramFile) -p -
    #> Parameters: -#> pre-treatment -#> QC -#> occupancyFilter -#> cls = class -#> QCidx = QC -#> occupancy = 0.667 -#> impute -#> cls = class -#> QCidx = QC -#> occupancy = 0.667 -#> RSDfilter -#> cls = class -#> QCidx = QC -#> RSDthresh = 0.5 -#> removeQC -#> cls = class -#> QCidx = QC -#> occupancyFilter -#> maximum -#> cls = class -#> occupancy = 0.667 -#> impute -#> class -#> cls = class -#> occupancy = 0.667 -#> nCores = 4 -#> clusterType = FORK -#> transform -#> TICnorm -#> -#> correlations -#> method = pearson -#> pAdjustMethod = bonferroni -#> corPvalue = 0.05
    -
    -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/plotExplanatoryHeatmap-1.png b/docs/reference/plotExplanatoryHeatmap-1.png deleted file mode 100644 index 8898ad73..00000000 Binary files a/docs/reference/plotExplanatoryHeatmap-1.png and /dev/null differ diff --git a/docs/reference/plotExplanatoryHeatmap-2.png b/docs/reference/plotExplanatoryHeatmap-2.png deleted file mode 100644 index f0fce4b7..00000000 Binary files a/docs/reference/plotExplanatoryHeatmap-2.png and /dev/null differ diff --git a/docs/reference/plotExplanatoryHeatmap.html b/docs/reference/plotExplanatoryHeatmap.html deleted file mode 100644 index 4ba85d72..00000000 --- a/docs/reference/plotExplanatoryHeatmap.html +++ /dev/null @@ -1,191 +0,0 @@ - -Heatmap plot of explantory features — plotExplanatoryHeatmap • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Plot a heatmap of explanatory features.

    -
    - -
    -
    plotExplanatoryHeatmap(x, ...)
    -
    -# S4 method for Univariate
    -plotExplanatoryHeatmap(
    -  x,
    -  threshold = 0.05,
    -  title = "",
    -  distanceMeasure = "euclidean",
    -  clusterMethod = "ward.D2",
    -  featureNames = TRUE,
    -  dendrogram = TRUE,
    -  featureLimit = Inf
    -)
    -
    -# S4 method for RandomForest
    -plotExplanatoryHeatmap(
    -  x,
    -  metric = "FalsePositiveRate",
    -  threshold = 0.05,
    -  title = "",
    -  distanceMeasure = "euclidean",
    -  clusterMethod = "ward.D2",
    -  featureNames = TRUE,
    -  dendrogram = TRUE,
    -  featureLimit = Inf
    -)
    -
    -# S4 method for list
    -plotExplanatoryHeatmap(
    -  x,
    -  threshold = 0.05,
    -  distanceMeasure = "euclidean",
    -  clusterMethod = "ward.D2",
    -  featureNames = TRUE,
    -  featureLimit = Inf
    -)
    -
    -# S4 method for Analysis
    -plotExplanatoryHeatmap(
    -  x,
    -  threshold = 0.05,
    -  distanceMeasure = "euclidean",
    -  clusterMethod = "ward.D2",
    -  featureNames = TRUE,
    -  featureLimit = Inf
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    object of class Univariate, RandomForest or -Analysis

    -
    ...
    -

    arguments to pass to the appropriate method

    -
    threshold
    -

    score threshold to use for specifying explanatory features

    -
    title
    -

    plot title

    -
    distanceMeasure
    -

    distance measure to use for clustering. See details.

    -
    clusterMethod
    -

    clustering method to use. See details

    -
    featureNames
    -

    should feature names be plotted?

    -
    dendrogram
    -

    TRUE/FALSE. Should the dendrogram be plotted?

    -
    featureLimit
    -

    The maximum number of features to plot

    -
    metric
    -

    importance metric on which to retrieve explanatory features

    -
    -
    -

    Details

    -

    Distance measures can be one of any that can be used for the method argument of dist().

    -

    Cluster methods can be one of any that can be used for the method argument of hclust().

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -x <- analysisData(data = abr1$neg[,200:300],info = abr1$fact)
    -
    -## random forest classification example
    -random_forest <- randomForest(x,cls = 'day')
    -
    -plotExplanatoryHeatmap(random_forest)
    -
    -
    -## random forest regression example
    -random_forest <- randomForest(x,cls = 'injorder')
    -
    -plotExplanatoryHeatmap(random_forest,metric = '%IncMSE',threshold = 2)
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/plotFeature-1.png b/docs/reference/plotFeature-1.png deleted file mode 100644 index 19d9a1c5..00000000 Binary files a/docs/reference/plotFeature-1.png and /dev/null differ diff --git a/docs/reference/plotFeature-2.png b/docs/reference/plotFeature-2.png deleted file mode 100644 index c09a615c..00000000 Binary files a/docs/reference/plotFeature-2.png and /dev/null differ diff --git a/docs/reference/plotFeature.html b/docs/reference/plotFeature.html deleted file mode 100644 index 2a5c2a38..00000000 --- a/docs/reference/plotFeature.html +++ /dev/null @@ -1,144 +0,0 @@ - -Plot a feature — plotFeature • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Plot the trend of a feature.

    -
    - -
    -
    plotFeature(analysis, feature, cls = "class", label = NULL, labelSize = 2, ...)
    -
    -# S4 method for AnalysisData
    -plotFeature(analysis, feature, cls = "class", label = NULL, labelSize = 2)
    -
    -# S4 method for Analysis
    -plotFeature(
    -  analysis,
    -  feature,
    -  cls = "class",
    -  label = NULL,
    -  labelSize = 2,
    -  type = "pre-treated"
    -)
    -
    - -
    -

    Arguments

    -
    analysis
    -

    an object of class AnalysisData or`` Analysis`

    -
    feature
    -

    feature name to plot

    -
    cls
    -

    information column to use for class labels

    -
    label
    -

    information column to use for sample labels

    -
    labelSize
    -

    sample label size

    -
    ...
    -

    arguments to pass to the appropriate method

    -
    type
    -

    raw or pre-treated data to plot

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg,abr1$fact)
    -
    -## Plot a categorical response variable
    -plotFeature(d,'N133',cls = 'day')
    -
    -
    -## Plot a continuous response variable
    -plotFeature(d,'N133',cls = 'injorder')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/plotImportance-1.png b/docs/reference/plotImportance-1.png deleted file mode 100644 index 4b4f7d4f..00000000 Binary files a/docs/reference/plotImportance-1.png and /dev/null differ diff --git a/docs/reference/plotImportance.html b/docs/reference/plotImportance.html deleted file mode 100644 index ef49b652..00000000 --- a/docs/reference/plotImportance.html +++ /dev/null @@ -1,138 +0,0 @@ - -Plot feature importance — plotImportance • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Plot Univariate or random forest feature importance.

    -
    - -
    -
    plotImportance(x, ...)
    -
    -# S4 method for Univariate
    -plotImportance(x, response = "class", rank = TRUE, threshold = 0.05)
    -
    -# S4 method for RandomForest
    -plotImportance(x, metric = "FalsePositiveRate", rank = TRUE)
    -
    -# S4 method for list
    -plotImportance(x, metric = "FalsePositiveRate")
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class Univariate or RandomForest

    -
    ...
    -

    arguments to pass to specific method

    -
    response
    -

    response results to plot

    -
    rank
    -

    rank feature order for plotting

    -
    threshold
    -

    explanatory threshold line for the output plot

    -
    metric
    -

    importance metric to plot

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
    -       keepClasses(cls = 'day',classes = c('H','1','5')) %>% 
    -       occupancyMaximum(cls = 'day') %>%
    -       transformTICnorm()
    -       
    -rf <- randomForest(x,cls = 'day')
    -
    -plotImportance(rf,rank = FALSE)
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/plotLDA-1.png b/docs/reference/plotLDA-1.png deleted file mode 100644 index d7b5141c..00000000 Binary files a/docs/reference/plotLDA-1.png and /dev/null differ diff --git a/docs/reference/plotLDA.html b/docs/reference/plotLDA.html deleted file mode 100644 index 84da16b3..00000000 --- a/docs/reference/plotLDA.html +++ /dev/null @@ -1,192 +0,0 @@ - -Principle Component - Linear Discriminant Analysis plot — plotLDA • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Plot linear discriminant analysis results of pre-treated data

    -
    - -
    -
    plotLDA(
    -  analysis,
    -  cls = "class",
    -  label = NULL,
    -  scale = TRUE,
    -  center = TRUE,
    -  xAxis = "DF1",
    -  yAxis = "DF2",
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  title = "PC-LDA",
    -  legendPosition = "bottom",
    -  labelSize = 2,
    -  ...
    -)
    -
    -# S4 method for AnalysisData
    -plotLDA(
    -  analysis,
    -  cls = "class",
    -  label = NULL,
    -  scale = TRUE,
    -  center = TRUE,
    -  xAxis = "DF1",
    -  yAxis = "DF2",
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  title = "PC-LDA",
    -  legendPosition = "bottom",
    -  labelSize = 2
    -)
    -
    -# S4 method for Analysis
    -plotLDA(
    -  analysis,
    -  cls = "class",
    -  label = NULL,
    -  scale = TRUE,
    -  center = TRUE,
    -  xAxis = "DF1",
    -  yAxis = "DF2",
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  title = "PC-LDA",
    -  legendPosition = "bottom",
    -  labelSize = 2,
    -  type = "raw"
    -)
    -
    - -
    -

    Arguments

    -
    analysis
    -

    S4 object of class AnalysisData or Analysis

    -
    cls
    -

    name of sample information column to use for class labels

    -
    label
    -

    name of sample information column to use for sample labels. Set to NULL for no labels.

    -
    scale
    -

    scale the data

    -
    center
    -

    center the data

    -
    xAxis
    -

    principle component to plot on the x-axis

    -
    yAxis
    -

    principle component to plot on the y-axis

    -
    shape
    -

    TRUE/FALSE use shape aesthetic for plot points. -Defaults to TRUE when the number of classes is greater than 12

    -
    ellipses
    -

    TRUE/FALSE, plot multivariate normal distribution 95\ -confidence ellipses for each class

    -
    title
    -

    plot title

    -
    legendPosition
    -

    legend position to pass to legend.position argument -of ggplot2::theme. Set to "none" to remove legend.

    -
    labelSize
    -

    label size. Ignored if label is NULL

    -
    ...
    -

    arguments to pass to the appropriate method

    -
    type
    -

    raw or pre-treated data to plot

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg,abr1$fact) %>% 
    - occupancyMaximum(cls = 'day')
    -
    -## LDA plot
    -plotLDA(d,cls = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/plotMDS-1.png b/docs/reference/plotMDS-1.png deleted file mode 100644 index 1cf45201..00000000 Binary files a/docs/reference/plotMDS-1.png and /dev/null differ diff --git a/docs/reference/plotMDS.html b/docs/reference/plotMDS.html deleted file mode 100644 index dcdebd53..00000000 --- a/docs/reference/plotMDS.html +++ /dev/null @@ -1,168 +0,0 @@ - -Multidimensional scaling (MDS) plot — plotMDS • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Plot multidimensional scaling plot for a RandomForest class object.

    -
    - -
    -
    plotMDS(
    -  x,
    -  cls = "class",
    -  label = NULL,
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  title = "",
    -  legendPosition = "bottom",
    -  labelSize = 2
    -)
    -
    -# S4 method for RandomForest
    -plotMDS(
    -  x,
    -  cls = "class",
    -  label = NULL,
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  title = "",
    -  legendPosition = "bottom",
    -  labelSize = 2
    -)
    -
    -# S4 method for list
    -plotMDS(
    -  x,
    -  label = NULL,
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  title = "",
    -  legendPosition = "bottom",
    -  labelSize = 2
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class RandomForest

    -
    cls
    -

    sample information column to use for sample labelling, -Set to NULL for no labelling.

    -
    label
    -

    sample information column to use for sample labels. Set to NULL for no labels.

    -
    shape
    -

    TRUE/FALSE use shape aesthetic for plot points. -Defaults to TRUE when the number of classes is greater than 12

    -
    ellipses
    -

    TRUE/FALSE, plot multivariate normal distribution 95% -confidence ellipses for each class

    -
    title
    -

    plot title

    -
    legendPosition
    -

    legend position to pass to legend.position argument -of ggplot2::theme. Set to "none" to remove legend.

    -
    labelSize
    -

    label size. Ignored if label is NULL

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
    -       occupancyMaximum(cls = 'day') %>%
    -       transformTICnorm()
    -       
    -rf <- randomForest(x,cls = 'day')
    -
    -plotMDS(rf,cls = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/plotMetrics-1.png b/docs/reference/plotMetrics-1.png deleted file mode 100644 index 00466fd0..00000000 Binary files a/docs/reference/plotMetrics-1.png and /dev/null differ diff --git a/docs/reference/plotMetrics.html b/docs/reference/plotMetrics.html deleted file mode 100644 index 506b1c24..00000000 --- a/docs/reference/plotMetrics.html +++ /dev/null @@ -1,127 +0,0 @@ - -Plot model performance metrics — plotMetrics • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Plot random forest model performance metrics

    -
    - -
    -
    plotMetrics(x, response = "class")
    -
    -# S4 method for RandomForest
    -plotMetrics(x)
    -
    -# S4 method for list
    -plotMetrics(x)
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class RandomForest

    -
    response
    -

    response results to plot

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
    -       keepClasses(cls = 'day',classes = c('H','1','5')) %>% 
    -       occupancyMaximum(cls = 'day') %>%
    -       transformTICnorm()
    -       
    -rf <- randomForest(x,cls = 'day',binary = TRUE)
    -
    -plotMetrics(rf,response = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/plotOccupancy-1.png b/docs/reference/plotOccupancy-1.png deleted file mode 100644 index 468a7f29..00000000 Binary files a/docs/reference/plotOccupancy-1.png and /dev/null differ diff --git a/docs/reference/plotOccupancy.html b/docs/reference/plotOccupancy.html deleted file mode 100644 index 842b95f6..00000000 --- a/docs/reference/plotOccupancy.html +++ /dev/null @@ -1,127 +0,0 @@ - -Plot class occupancy distributions — plotOccupancy • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Plot class occupancy distributions.

    -
    - -
    -
    plotOccupancy(x, cls = "class", ...)
    -
    -# S4 method for AnalysisData
    -plotOccupancy(x, cls = "class")
    -
    -# S4 method for Analysis
    -plotOccupancy(x, cls = "class", type = "raw")
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class AnalysisData or Analysis

    -
    cls
    -

    sample information column to use for class labels

    -
    ...
    -

    arguments to pass to the appropriate method

    -
    type
    -

    raw or preTreated data to plot

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg,abr1$fact)
    -
    -## Plot class occupancy distributions
    -plotOccupancy(d,cls = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/plotPCA-1.png b/docs/reference/plotPCA-1.png deleted file mode 100644 index e1ca4745..00000000 Binary files a/docs/reference/plotPCA-1.png and /dev/null differ diff --git a/docs/reference/plotPCA.html b/docs/reference/plotPCA.html deleted file mode 100644 index 315e141c..00000000 --- a/docs/reference/plotPCA.html +++ /dev/null @@ -1,192 +0,0 @@ - -Principle Component Analysis plot — plotPCA • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Plot Principle Component Analysis results.

    -
    - -
    -
    plotPCA(
    -  analysis,
    -  cls = "class",
    -  label = NULL,
    -  scale = TRUE,
    -  center = TRUE,
    -  xAxis = "PC1",
    -  yAxis = "PC2",
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  title = "PCA",
    -  legendPosition = "bottom",
    -  labelSize = 2,
    -  ...
    -)
    -
    -# S4 method for AnalysisData
    -plotPCA(
    -  analysis,
    -  cls = "class",
    -  label = NULL,
    -  scale = TRUE,
    -  center = TRUE,
    -  xAxis = "PC1",
    -  yAxis = "PC2",
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  title = "Principle Component Analysis (PCA)",
    -  legendPosition = "bottom",
    -  labelSize = 2
    -)
    -
    -# S4 method for Analysis
    -plotPCA(
    -  analysis,
    -  cls = "class",
    -  label = NULL,
    -  scale = TRUE,
    -  center = TRUE,
    -  xAxis = "PC1",
    -  yAxis = "PC2",
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  title = "PCA",
    -  legendPosition = "bottom",
    -  labelSize = 2,
    -  type = "raw"
    -)
    -
    - -
    -

    Arguments

    -
    analysis
    -

    object of class AnalysisData or Analysis

    -
    cls
    -

    name of class information column to use for sample labelling

    -
    label
    -

    name of class information column to use for sample labels. Set to NULL for no labels.

    -
    scale
    -

    scale the data

    -
    center
    -

    center the data

    -
    xAxis
    -

    principle component to plot on the x-axis

    -
    yAxis
    -

    principle component to plot on the y-axis

    -
    shape
    -

    TRUE/FALSE use shape aesthetic for plot points. -Defaults to TRUE when the number of classes is greater than 12

    -
    ellipses
    -

    TRUE/FALSE, plot multivariate normal distribution 95\ -confidence ellipses for each class

    -
    title
    -

    plot title

    -
    legendPosition
    -

    legend position to pass to legend.position argument -of ggplot2::theme. Set to "none" to remove legend.

    -
    labelSize
    -

    label size. Ignored if label is NULL

    -
    ...
    -

    arguments to pass to the appropriate method

    -
    type
    -

    raw or pre-treated data to plot

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg,abr1$fact) %>% 
    - occupancyMaximum(cls = 'day')
    -
    -## PCA plot
    -plotPCA(d,cls = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/plotROC-1.png b/docs/reference/plotROC-1.png deleted file mode 100644 index 9dde9510..00000000 Binary files a/docs/reference/plotROC-1.png and /dev/null differ diff --git a/docs/reference/plotROC.html b/docs/reference/plotROC.html deleted file mode 100644 index bb56801d..00000000 --- a/docs/reference/plotROC.html +++ /dev/null @@ -1,131 +0,0 @@ - -Plot receiver operator characteristic (ROC) curves — plotROC • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Plot receiver operator characteristic curves for a -RandomForest class object.

    -
    - -
    -
    plotROC(x, title = "", legendPosition = "bottom")
    -
    -# S4 method for RandomForest
    -plotROC(x, title = "", legendPosition = "bottom")
    -
    -# S4 method for list
    -plotROC(x, title = "", legendPosition = "bottom")
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class RandomForest

    -
    title
    -

    plot title

    -
    legendPosition
    -

    legend position to pass to legend.position -argument of ggplot2::theme. Set to "none" to remove legend.

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
    -       occupancyMaximum(cls = 'day') %>%
    -       transformTICnorm()
    -       
    -rf <- randomForest(x,cls = 'day')
    -
    -plotROC(rf)
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/plotRSD-1.png b/docs/reference/plotRSD-1.png deleted file mode 100644 index 158f70a2..00000000 Binary files a/docs/reference/plotRSD-1.png and /dev/null differ diff --git a/docs/reference/plotRSD.html b/docs/reference/plotRSD.html deleted file mode 100644 index a84e8ed2..00000000 --- a/docs/reference/plotRSD.html +++ /dev/null @@ -1,129 +0,0 @@ - -Plot RSD distributions — plotRSD • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Plot RSD distributions of raw data in quality control samples.

    -
    - -
    -
    plotRSD(analysis, cls = "class", ...)
    -
    -# S4 method for AnalysisData
    -plotRSD(analysis, cls = "class")
    -
    -# S4 method for Analysis
    -plotRSD(analysis, cls = "class", type = "raw")
    -
    - -
    -

    Arguments

    -
    analysis
    -

    object of class AnalysisData or Analysis

    -
    cls
    -

    information column to use for class labels

    -
    ...
    -

    arguments to pass to the appropriate method

    -
    type
    -

    raw or pre-treated data to plot

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg,abr1$fact)
    -
    -## Plot class RSD distributions
    -plotRSD(d,cls = 'day')
    -#> Warning: Removed 716 rows containing non-finite values (stat_density).
    -#> Warning: Removed 6 row(s) containing missing values (geom_path).
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/plotSupervisedRF-1.png b/docs/reference/plotSupervisedRF-1.png deleted file mode 100644 index dc59c461..00000000 Binary files a/docs/reference/plotSupervisedRF-1.png and /dev/null differ diff --git a/docs/reference/plotSupervisedRF.html b/docs/reference/plotSupervisedRF.html deleted file mode 100644 index ac31502f..00000000 --- a/docs/reference/plotSupervisedRF.html +++ /dev/null @@ -1,186 +0,0 @@ - -Supervised random forest MDS plot — plotSupervisedRF • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    A multidimensional scaling (MDS) plot of supervised random forest analysis

    -
    - -
    -
    plotSupervisedRF(
    -  x,
    -  cls = "class",
    -  rf = list(),
    -  label = NULL,
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  ROC = TRUE,
    -  seed = 1234,
    -  title = "",
    -  legendPosition = "bottom",
    -  labelSize = 2,
    -  ...
    -)
    -
    -# S4 method for AnalysisData
    -plotSupervisedRF(
    -  x,
    -  cls = "class",
    -  rf = list(),
    -  label = NULL,
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  ROC = TRUE,
    -  seed = 1234,
    -  title = "",
    -  legendPosition = "bottom",
    -  labelSize = 2
    -)
    -
    -# S4 method for Analysis
    -plotSupervisedRF(
    -  x,
    -  cls = "class",
    -  rf = list(),
    -  label = NULL,
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  ROC = TRUE,
    -  seed = 1234,
    -  title = "",
    -  legendPosition = "bottom",
    -  labelSize = 2,
    -  type = "raw"
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    object of class AnalysisData or Analysis containing analysis results

    -
    cls
    -

    information column to use for sample classes

    -
    rf
    -

    list of additional parameters to pass to randomForest

    -
    label
    -

    information column to use for sample labels. Set to NULL for no labels.

    -
    shape
    -

    TRUE/FALSE use shape aesthetic for plot points. -Defaults to TRUE when the number of classes is greater than 12

    -
    ellipses
    -

    TRUE/FALSE, plot multivariate normal distribution 95% -confidence ellipses for each class

    -
    ROC
    -

    should receiver-operator characteristics be plotted?

    -
    seed
    -

    random number seed

    -
    title
    -

    plot title

    -
    legendPosition
    -

    legend position to pass to legend.position argument -of ggplot2::theme. Set to "none" to remove legend.

    -
    labelSize
    -

    label size. Ignored if label is NULL

    -
    ...
    -

    arguments to pass to the appropriate method

    -
    type
    -

    raw or pre-treated data to plot

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    -
    -## Supervised random forest MDS plot
    -plotSupervisedRF(d,cls = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/plotTIC-1.png b/docs/reference/plotTIC-1.png deleted file mode 100644 index b820c449..00000000 Binary files a/docs/reference/plotTIC-1.png and /dev/null differ diff --git a/docs/reference/plotTIC-2.png b/docs/reference/plotTIC-2.png deleted file mode 100644 index 58da2b70..00000000 Binary files a/docs/reference/plotTIC-2.png and /dev/null differ diff --git a/docs/reference/plotTIC.html b/docs/reference/plotTIC.html deleted file mode 100644 index 95617d8b..00000000 --- a/docs/reference/plotTIC.html +++ /dev/null @@ -1,137 +0,0 @@ - -Plot sample total ion counts — plotTIC • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Plot total ion counts of sample data.

    -
    - -
    -
    plotTIC(analysis, by = "injOrder", colour = "block", ...)
    -
    -# S4 method for AnalysisData
    -plotTIC(analysis, by = "injOrder", colour = "block")
    -
    -# S4 method for Analysis
    -plotTIC(
    -  analysis,
    -  by = "injOrder",
    -  colour = "block",
    -  type = c("raw", "pre-treated")
    -)
    -
    - -
    -

    Arguments

    -
    analysis
    -

    S4 object of class AnalysisData or Analysis

    -
    by
    -

    information column to plot against

    -
    colour
    -

    information column to provide colour labels

    -
    ...
    -

    arguments to pass to the appropriate method

    -
    type
    -

    raw or pre-treated sample data

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg,abr1$fact)
    -
    -## Plot sample TIVs
    -plotTIC(d,by = 'injorder',colour = 'day')
    -
    -
    -plotTIC(d,by = 'day',colour = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/plotUnsupervisedRF-1.png b/docs/reference/plotUnsupervisedRF-1.png deleted file mode 100644 index eeec5f4f..00000000 Binary files a/docs/reference/plotUnsupervisedRF-1.png and /dev/null differ diff --git a/docs/reference/plotUnsupervisedRF.html b/docs/reference/plotUnsupervisedRF.html deleted file mode 100644 index 08c4f7b0..00000000 --- a/docs/reference/plotUnsupervisedRF.html +++ /dev/null @@ -1,181 +0,0 @@ - -Unsupervised random forest MDS plot — plotUnsupervisedRF • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    A multidimensional scaling (MDS) plot of unsupervised random forest analysis

    -
    - -
    -
    plotUnsupervisedRF(
    -  x,
    -  cls = "class",
    -  rf = list(),
    -  label = NULL,
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  seed = 1234,
    -  title = "",
    -  legendPosition = "bottom",
    -  labelSize = 2,
    -  ...
    -)
    -
    -# S4 method for AnalysisData
    -plotUnsupervisedRF(
    -  x,
    -  cls = "class",
    -  rf = list(),
    -  label = NULL,
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  seed = 1234,
    -  title = "",
    -  legendPosition = "bottom",
    -  labelSize = 2
    -)
    -
    -# S4 method for Analysis
    -plotUnsupervisedRF(
    -  x,
    -  cls = "class",
    -  rf = list(),
    -  label = NULL,
    -  shape = FALSE,
    -  ellipses = TRUE,
    -  seed = 1234,
    -  title = "",
    -  legendPosition = "bottom",
    -  labelSize = 2,
    -  type = "raw"
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    object of class AnalysisData or Analysis

    -
    cls
    -

    sample information column to use for sample labelling

    -
    rf
    -

    list of additional parameters to pass to randomForest

    -
    label
    -

    info column to use for sample labels. Set to NULL for no labels.

    -
    shape
    -

    TRUE/FALSE use shape aesthetic for plot points. -Defaults to TRUE when the number of classes is greater than 12

    -
    ellipses
    -

    TRUE/FALSE, plot multivariate normal distribution 95% -confidence ellipses for each class

    -
    seed
    -

    random number seed

    -
    title
    -

    plot title

    -
    legendPosition
    -

    legend position to pass to legend.position argument -of ggplot2::theme. Set to "none" to remove legend.

    -
    labelSize
    -

    label size. Ignored if label is NULL

    -
    ...
    -

    arguments to pass to the appropriate method

    -
    type
    -

    raw or pre-treated data to plot

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    -
    -## Unsupervised random forest MDS plot
    -plotUnsupervisedRF(d,cls = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/pre-treatment-parameters.html b/docs/reference/pre-treatment-parameters.html deleted file mode 100644 index 6db4a800..00000000 --- a/docs/reference/pre-treatment-parameters.html +++ /dev/null @@ -1,154 +0,0 @@ - -Pre-treatment parameters — preTreatmentElements • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Return pre-treatment elements, methods and parameters.

    -
    - -
    -
    preTreatmentElements()
    -
    -preTreatmentMethods(element)
    -
    -preTreatmentParameters(methods)
    -
    - -
    -

    Arguments

    -
    element
    -

    pre-treatment element name

    -
    methods
    -

    a named list of element methods

    -
    - -
    -

    Examples

    -
    ## Return the availalble pre-treatment elements
    -preTreatmentElements()
    -#> [1] "aggregate"       "correction"      "impute"          "keep"           
    -#> [5] "occupancyFilter" "QC"              "remove"          "transform"      
    -
    -## Return the available pre-treatment methods for the remove element
    -preTreatmentMethods('remove')
    -#> [1] "classes"  "features" "samples" 
    -
    -## Define some default pre-treatment parameters
    -p <- preTreatmentParameters(
    -  list(
    -    remove = 'classes',
    -    QC = c('RSDfilter','removeQC'),
    -    transform = 'TICnorm'
    -  )
    -)
    -
    -## Assign the pre-treatment parameters to analysis parameters
    -ap <- analysisParameters('pre-treatment')
    -parameters(ap,'pre-treatment') <- p
    -
    -print(ap)
    -#> Parameters:
    -#> pre-treatment
    -#> 	remove
    -#> 		classes
    -#> 			cls = class
    -#> 			classes = c()
    -#> 	QC
    -#> 		RSDfilter
    -#> 			cls = class
    -#> 			QCidx = QC
    -#> 			RSDthresh = 50
    -#> 		removeQC
    -#> 			cls = class
    -#> 			QCidx = QC
    -#> 	transform
    -#> 		TICnorm
    -#> 
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/preTreated.html b/docs/reference/preTreated.html deleted file mode 100644 index 9736d84a..00000000 --- a/docs/reference/preTreated.html +++ /dev/null @@ -1,194 +0,0 @@ - - - - - - - - -preTreated — preTreated • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Get or set an AnalysisData object from -the pre-treated slot of the Analysis class.

    -
    - -
    preTreated(x)
    -
    -preTreated(x) <- value
    -
    -# S4 method for Analysis
    -preTreated(x)
    -
    -# S4 method for Analysis
    -preTreated(x) <- value
    - -

    Arguments

    - - - - - - - - - - -
    x

    S4 object of class Analysis

    value

    S4 object of class AnalysisData

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/preTreatmentElements.html b/docs/reference/preTreatmentElements.html deleted file mode 100644 index f2eb66e7..00000000 --- a/docs/reference/preTreatmentElements.html +++ /dev/null @@ -1,172 +0,0 @@ - - - - - - - - -preTreatmentElements — preTreatmentElements • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return names of available pre-treatment elements

    -
    - -
    preTreatmentElements()
    - - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/preTreatmentMethods.html b/docs/reference/preTreatmentMethods.html deleted file mode 100644 index cc170537..00000000 --- a/docs/reference/preTreatmentMethods.html +++ /dev/null @@ -1,182 +0,0 @@ - - - - - - - - -preTreatmentMethods — preTreatmentMethods • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return names of available methods for a given pre-treatment -element.

    -
    - -
    preTreatmentMethods(element)
    - -

    Arguments

    - - - - - - -
    element

    pre-treatment element

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/preTreatmentParameters.html b/docs/reference/preTreatmentParameters.html deleted file mode 100644 index 49432957..00000000 --- a/docs/reference/preTreatmentParameters.html +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - -preTreatmentParameters — preTreatmentParameters • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return default parameters for given pre-treatment element -methods.

    -
    - -
    preTreatmentParameters(methods)
    - -

    Arguments

    - - - - - - -
    methods

    a named list of element methods

    - - -

    Examples

    -
    p <- preTreatmentParameters( - list( - remove = 'classes', - QC = c('RSDfilter','removeQC'), - transform = 'TICnorm' - ) -) -
    -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/randomForest-1.png b/docs/reference/randomForest-1.png deleted file mode 100644 index 1cf45201..00000000 Binary files a/docs/reference/randomForest-1.png and /dev/null differ diff --git a/docs/reference/randomForest.html b/docs/reference/randomForest.html deleted file mode 100644 index 3b0bfb3c..00000000 --- a/docs/reference/randomForest.html +++ /dev/null @@ -1,168 +0,0 @@ - -Random forest analysis — randomForest • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Perform random forest on an AnalysisData object

    -
    - -
    -
    randomForest(
    -  x,
    -  cls = "class",
    -  rf = list(),
    -  reps = 1,
    -  binary = FALSE,
    -  comparisons = list(),
    -  perm = 0,
    -  returnModels = FALSE,
    -  seed = 1234
    -)
    -
    -# S4 method for AnalysisData
    -randomForest(
    -  x,
    -  cls = "class",
    -  rf = list(),
    -  reps = 1,
    -  binary = FALSE,
    -  comparisons = list(),
    -  perm = 0,
    -  returnModels = FALSE,
    -  seed = 1234
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class AnalysisData

    -
    cls
    -

    vector of sample information columns to use for response variable information. Set to NULL for unsupervised.

    -
    rf
    -

    named list of arguments to pass to randomForest::randomForest

    -
    reps
    -

    number of repetitions to perform

    -
    binary
    -

    TRUE/FALSE should binary comparisons be performed. Ignored for unsupervised and regression. Ignored if comparisons specified.

    -
    comparisons
    -

    list of comparisons to perform. -Ignored for unsupervised and regression. See details.

    -
    perm
    -

    number of permutations to perform. Ignored for unsupervised.

    -
    returnModels
    -

    TRUE/FALSE should model objects be returned.

    -
    seed
    -

    random number seed

    -
    -
    -

    Value

    -

    An S4 object of class RandomForest.

    -
    -
    -

    Details

    -

    Specified class comparisons should be given as a list named -according to cls. Comparisons should be given as class names -separated by '~' (eg. '1~2~H').

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
    -       occupancyMaximum(cls = 'day') %>%
    -       transformTICnorm()
    -       
    -rf <- randomForest(x,cls = 'day')
    -
    -plotMDS(rf,cls = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/raw.html b/docs/reference/raw.html deleted file mode 100644 index 2ff8b19b..00000000 --- a/docs/reference/raw.html +++ /dev/null @@ -1,194 +0,0 @@ - - - - - - - - -raw — raw • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Get or set an AnalysisData object from -the raw slot of the Analysis class.

    -
    - -
    raw(x)
    -
    -raw(x) <- value
    -
    -# S4 method for Analysis
    -raw(x)
    -
    -# S4 method for Analysis
    -raw(x) <- value
    - -

    Arguments

    - - - - - - - - - - -
    x

    S4 object of class Analysis

    value

    S4 object of class AnalysisData

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/reAnalyse.html b/docs/reference/reAnalyse.html deleted file mode 100644 index 9f830aa4..00000000 --- a/docs/reference/reAnalyse.html +++ /dev/null @@ -1,246 +0,0 @@ - - - - - - - - -reAnalyse — reAnalyse • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Re-analyse an object of class Analysis using -specified parameters.

    -
    - -
    reAnalyse(analysis, parameters = analysisParameters(), verbose = TRUE)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    analysis

    an object of class Analysis containing previous -analysis results

    parameters

    an object of class Parameters containing parameters for -re-analysis

    verbose

    should output be printed to the console

    - - -

    Examples

    -
    library(metaboData) - -## Generate analysis parameters -p <- analysisParameters(c('pre-treatment','modelling')) - -## Alter pre-treatment and modelling parameters to use different methods -parameters(p,'pre-treatment') <- preTreatmentParameters( - list(occupancyFilter = 'maximum', - transform = 'TICnorm') -) -parameters(p,'modelling') <- modellingParameters('anova') - -## Change "cls" and "nCores" parameters -changeParameter(p,'cls') <- 'day' -changeParameter(p,'nCores') <- 2 - -## Run analysis using a subset of the abr1 negative mode data set -analysis <- metabolyse(abr1$neg[,1:200], - abr1$fact, - p) -
    #> -#> metabolyseR v0.14.0 Thu Apr 15 21:57:53 2021
    #> ________________________________________________________________________________
    #> Parameters: -#> pre-treatment -#> occupancyFilter -#> maximum -#> cls = day -#> occupancy = 2/3 -#> transform -#> TICnorm -#> -#> modelling -#> anova -#> cls = day -#> pAdjust = bonferroni -#> comparisons = list() -#> returnModels = FALSE
    #> ________________________________________________________________________________
    #> Pre-treatment
    #> Pre-treatment [0.9S]
    #> Modelling
    #> -#> Attaching package: ‘purrr’
    #> The following object is masked from ‘package:testthat’: -#> -#> is_null
    #> Modelling [0.7S]
    #> ________________________________________________________________________________
    #> -#> Complete! [1.7S]
    -## Re-analyse to include correlation analysis -analysis <- reAnalyse(analysis, - parameters = analysisParameters('correlations')) -
    #> -#> metabolyseR v0.14.0 Thu Apr 15 21:57:55 2021 -#> ________________________________________________________________________________ -#> Parameters: -#> correlations -#> method = pearson -#> pAdjustMethod = bonferroni -#> corPvalue = 0.05 -#> ________________________________________________________________________________ -#>
    #> Correlations
    #> Error in (function (cl, name, valueClass) { ClassDef <- getClass(cl) slotClass <- ClassDef@slots[[name]] if (is.null(slotClass)) stop(gettextf("%s is not a slot in class %s", sQuote(name), dQuote(cl)), domain = NA) if (.identC(slotClass, valueClass)) return(TRUE) ok <- possibleExtends(valueClass, slotClass, ClassDef2 = getClassDef(slotClass, where = .classEnv(ClassDef))) if (isFALSE(ok)) stop(gettextf("assignment of an object of class %s is not valid for @%s in an object of class %s; is(value, \"%s\") is not TRUE", dQuote(valueClass), sQuote(name), dQuote(cl), slotClass), domain = NA) TRUE})(structure("AnalysisData", package = "metabolyseR"), "correlations", c("tbl_df", "tbl", "data.frame")): ‘correlations’ is not a slot in class “AnalysisData”
    -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html deleted file mode 100644 index 0d0ee6f1..00000000 --- a/docs/reference/reexports.html +++ /dev/null @@ -1,116 +0,0 @@ - -Objects exported from other packages — reexports • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    These objects are imported from other packages. Follow the links -below to see their documentation.

    -
    future
    -

    plan

    - - -
    magrittr
    -

    %>%

    - - -
    - - - -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/remove.html b/docs/reference/remove.html deleted file mode 100644 index 0bf27f07..00000000 --- a/docs/reference/remove.html +++ /dev/null @@ -1,178 +0,0 @@ - -Remove samples, classes or features — removeClasses • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Exclusion of samples, classes or features from an AnalysisData object.

    -
    - -
    -
    removeClasses(d, cls = "class", classes = c())
    -
    -# S4 method for AnalysisData
    -removeClasses(d, cls = "class", classes = c())
    -
    -removeFeatures(d, features = character())
    -
    -# S4 method for AnalysisData
    -removeFeatures(d, features = character())
    -
    -removeSamples(d, idx = "fileOrder", samples = c())
    -
    -# S4 method for AnalysisData
    -removeSamples(d, idx = "fileOrder", samples = c())
    -
    - -
    -

    Arguments

    -
    d
    -

    S4 object of class AnalysisData

    -
    cls
    -

    info column to use for class information

    -
    classes
    -

    classes to remove

    -
    features
    -

    features to remove

    -
    idx
    -

    info column containing sample indexes

    -
    samples
    -

    sample indexes to remove

    -
    -
    -

    Value

    -

    An S4 object of class AnalysisData with samples, classes or features removed.

    -
    -
    -

    Methods

    - - -
    • removeClasses: Remove classes.

    • -
    • removeFeatures: Remove features.

    • -
    • removeSamples: Remove samples.

    • -
    - -
    -

    Examples

    -
    library(metaboData)
    - d <- analysisData(abr1$neg[,200:300],abr1$fact)
    - 
    - ## Remove classes
    - d %>% 
    -  removeClasses(cls = 'day',classes = 'H')
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 100 
    -#> Features: 101 
    -#> Info: 9 
    -#> 
    - 
    - ## Remove features
    - d %>% 
    -  removeFeatures(features = c('N200','N201'))
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 120 
    -#> Features: 99 
    -#> Info: 9 
    -#> 
    - 
    - ## Remove samples
    - d %>% 
    -  removeSamples(idx = 'injorder',samples = c(1,10))
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 118 
    -#> Features: 101 
    -#> Info: 9 
    -#> 
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/removeClasses.html b/docs/reference/removeClasses.html deleted file mode 100644 index 7d65d8ae..00000000 --- a/docs/reference/removeClasses.html +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - -removeClasses — removeClasses • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Remove classes from an AnalysisData object.

    -
    - -
    removeClasses(d, cls = "class", classes = c())
    -
    -# S4 method for AnalysisData
    -removeClasses(d, cls = "class", classes = c())
    - -

    Arguments

    - - - - - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    cls

    info column to use for class information

    classes

    classes to remove

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/removeFeatures.html b/docs/reference/removeFeatures.html deleted file mode 100644 index da445ab5..00000000 --- a/docs/reference/removeFeatures.html +++ /dev/null @@ -1,187 +0,0 @@ - - - - - - - - -removeFeatures — removeFeatures • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Remove features from an AnalysisData object.

    -
    - -
    removeFeatures(d, features = character())
    -
    -# S4 method for AnalysisData
    -removeFeatures(d, features = character())
    - -

    Arguments

    - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    features

    features to remove

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/removeSamples.html b/docs/reference/removeSamples.html deleted file mode 100644 index 5b0d7e76..00000000 --- a/docs/reference/removeSamples.html +++ /dev/null @@ -1,191 +0,0 @@ - - - - - - - - -removeSamples — removeSamples • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Remove samples from an AnalysisData object.

    -
    - -
    removeSamples(d, idx = "fileOrder", samples = c())
    -
    -# S4 method for AnalysisData
    -removeSamples(d, idx = "fileOrder", samples = c())
    - -

    Arguments

    - - - - - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    idx

    info column containing sample indexes

    samples

    sample indexes to remove

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/response.html b/docs/reference/response.html deleted file mode 100644 index 29ba7b51..00000000 --- a/docs/reference/response.html +++ /dev/null @@ -1,183 +0,0 @@ - - - - - - - - -response — response • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return the response variable name from a random forest analysis.

    -
    - -
    response(x)
    -
    -# S4 method for RandomForest
    -response(x)
    - -

    Arguments

    - - - - - - -
    x

    S4 object of class RandomForest

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/roc.html b/docs/reference/roc.html deleted file mode 100644 index 50222e62..00000000 --- a/docs/reference/roc.html +++ /dev/null @@ -1,144 +0,0 @@ - -Receiver-operator characteristic (ROC) curves — roc • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    ROC curves for out-of-bag random forest predictions.

    -
    - -
    -
    roc(x)
    -
    -# S4 method for RandomForest
    -roc(x)
    -
    -# S4 method for list
    -roc(x)
    -
    -# S4 method for Analysis
    -roc(x)
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class RandomForest, Analysis or a list

    -
    -
    -

    Value

    -

    A tibble containing the ROC curves.

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
    -       occupancyMaximum(cls = 'day') %>%
    -       transformTICnorm()
    -       
    -rf <- randomForest(x,cls = 'day')
    -
    -roc(rf)
    -#> # A tibble: 711 × 6
    -#>    Response Comparison  Class .threshold specificity sensitivity
    -#>    <chr>    <chr>       <chr>      <dbl>       <dbl>       <dbl>
    -#>  1 day      1~2~3~4~5~H 1     -Inf              0              1
    -#>  2 day      1~2~3~4~5~H 1        0              0              1
    -#>  3 day      1~2~3~4~5~H 1        0.00503        0.01           1
    -#>  4 day      1~2~3~4~5~H 1        0.00538        0.02           1
    -#>  5 day      1~2~3~4~5~H 1        0.0103         0.03           1
    -#>  6 day      1~2~3~4~5~H 1        0.0105         0.04           1
    -#>  7 day      1~2~3~4~5~H 1        0.0117         0.05           1
    -#>  8 day      1~2~3~4~5~H 1        0.0144         0.06           1
    -#>  9 day      1~2~3~4~5~H 1        0.0157         0.07           1
    -#> 10 day      1~2~3~4~5~H 1        0.0222         0.08           1
    -#> # … with 701 more rows
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/rsd.html b/docs/reference/rsd.html deleted file mode 100644 index f9752f69..00000000 --- a/docs/reference/rsd.html +++ /dev/null @@ -1,138 +0,0 @@ - -Calculate feature relative standard deviations — rsd • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Calculate relative standard deviation (RSD) percentage values for each -feature per class for a given sample information column.

    -
    - -
    -
    rsd(x, cls = "class")
    -
    -# S4 method for AnalysisData
    -rsd(x, cls = "class")
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class AnalysisData

    -
    cls
    -

    sample information column to use for class structure

    -
    -
    -

    Value

    -

    A tibble containing the computed RSD values.

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    -
    -rsd(d,cls = 'day')
    -#> # A tibble: 606 × 5
    -#>    day   Feature   Mean    SD   RSD
    -#>    <fct> <chr>    <dbl> <dbl> <dbl>
    -#>  1 1     N200    0.224  1.00  447. 
    -#>  2 1     N201    0.228  0.946 415. 
    -#>  3 1     N202    0.0538 0.151 280. 
    -#>  4 1     N203    1.34   1.03   76.5
    -#>  5 1     N204    0.0833 0.202 242. 
    -#>  6 1     N205    1.55   2.29  148. 
    -#>  7 1     N206    0.112  0.360 320. 
    -#>  8 1     N207    0.220  0.396 180. 
    -#>  9 1     N208    0.124  0.225 182. 
    -#> 10 1     N209    1.37   2.03  148. 
    -#> # … with 596 more rows
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/show-Analysis-method.html b/docs/reference/show-Analysis-method.html deleted file mode 100644 index 1f26da02..00000000 --- a/docs/reference/show-Analysis-method.html +++ /dev/null @@ -1,181 +0,0 @@ - - - - - - - - -show-Analysis — show,Analysis-method • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    show method for Analysis class.

    -
    - -
    # S4 method for Analysis
    -show(object)
    - -

    Arguments

    - - - - - - -
    object

    S4 object of class Analysis

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/show-AnalysisData-method.html b/docs/reference/show-AnalysisData-method.html deleted file mode 100644 index cff83e5b..00000000 --- a/docs/reference/show-AnalysisData-method.html +++ /dev/null @@ -1,181 +0,0 @@ - - - - - - - - -show-AnalysisData — show,AnalysisData-method • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    show method for AnalysisData class

    -
    - -
    # S4 method for AnalysisData
    -show(object)
    - -

    Arguments

    - - - - - - -
    object

    S4 object of class AnalysisData

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/show-AnalysisParameters-method.html b/docs/reference/show-AnalysisParameters-method.html deleted file mode 100644 index 0f5d2252..00000000 --- a/docs/reference/show-AnalysisParameters-method.html +++ /dev/null @@ -1,181 +0,0 @@ - - - - - - - - -show-AnalysisParameters — show,AnalysisParameters-method • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    show method for AnalysisParameters class.

    -
    - -
    # S4 method for AnalysisParameters
    -show(object)
    - -

    Arguments

    - - - - - - -
    object

    S4 object of class AnalysisParameters

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/show-RandomForest-method.html b/docs/reference/show-RandomForest-method.html deleted file mode 100644 index 889af07a..00000000 --- a/docs/reference/show-RandomForest-method.html +++ /dev/null @@ -1,181 +0,0 @@ - - - - - - - - -show-RandomForest — show,RandomForest-method • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Show method for RandomForest class.

    -
    - -
    # S4 method for RandomForest
    -show(object)
    - -

    Arguments

    - - - - - - -
    object

    S4 object of class RandomForest

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/show-Univariate-method.html b/docs/reference/show-Univariate-method.html deleted file mode 100644 index 8eb55c04..00000000 --- a/docs/reference/show-Univariate-method.html +++ /dev/null @@ -1,181 +0,0 @@ - - - - - - - - -show-Univariate — show,Univariate-method • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Show method for the Univariate class.

    -
    - -
    # S4 method for Univariate
    -show(object)
    - -

    Arguments

    - - - - - - -
    object

    S4 object of class Univariate

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/sinfo.html b/docs/reference/sinfo.html deleted file mode 100644 index 8be0eacc..00000000 --- a/docs/reference/sinfo.html +++ /dev/null @@ -1,207 +0,0 @@ - - - - - - - - -sinfo — sinfo • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return sample info from an AnalysisData or Analysis object.

    -
    - -
    sinfo(x, ...)
    -
    -sinfo(x, ...) <- value
    -
    -# S4 method for AnalysisData
    -sinfo(x)
    -
    -# S4 method for AnalysisData
    -sinfo(x) <- value
    -
    -# S4 method for Analysis
    -sinfo(x, type = "raw", value)
    -
    -# S4 method for Analysis
    -sinfo(x, type = "raw") <- value
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    x

    S4 object of class AnalysisData or Analysis

    ...

    arguments to pass to the appropriate method

    value

    tibble containing sample info

    type

    sample information type to extract or set. -Should be one of "raw" or "pre-treated"

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/split.html b/docs/reference/split.html deleted file mode 100644 index cf503323..00000000 --- a/docs/reference/split.html +++ /dev/null @@ -1,181 +0,0 @@ - -Split an AnalysisData object — split • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Split an object of class AnalysisData into a list based -a class grouping variable.

    -
    - -
    -
    split(x, cls = "class")
    -
    -# S4 method for AnalysisData
    -split(x, cls = "class")
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class AnalysisData

    -
    cls
    -

    sample information column to use for splitting

    -
    -
    -

    Value

    -

    A list of AnalysisData objects.

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg,abr1$fact)
    -
    -## Split the data set based on the 'day' class information column
    -d <- split(d,cls = 'day')
    -
    -print(d)
    -#> $`1`
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 20 
    -#> Features: 2000 
    -#> Info: 9 
    -#> 
    -#> 
    -#> $`2`
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 20 
    -#> Features: 2000 
    -#> Info: 9 
    -#> 
    -#> 
    -#> $`3`
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 20 
    -#> Features: 2000 
    -#> Info: 9 
    -#> 
    -#> 
    -#> $`4`
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 20 
    -#> Features: 2000 
    -#> Info: 9 
    -#> 
    -#> 
    -#> $`5`
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 20 
    -#> Features: 2000 
    -#> Info: 9 
    -#> 
    -#> 
    -#> $H
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 20 
    -#> Features: 2000 
    -#> Info: 9 
    -#> 
    -#> 
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/transform-1.png b/docs/reference/transform-1.png deleted file mode 100644 index 9521d4d7..00000000 Binary files a/docs/reference/transform-1.png and /dev/null differ diff --git a/docs/reference/transform-10.png b/docs/reference/transform-10.png deleted file mode 100644 index 62356bb8..00000000 Binary files a/docs/reference/transform-10.png and /dev/null differ diff --git a/docs/reference/transform-11.png b/docs/reference/transform-11.png deleted file mode 100644 index 7693e42d..00000000 Binary files a/docs/reference/transform-11.png and /dev/null differ diff --git a/docs/reference/transform-12.png b/docs/reference/transform-12.png deleted file mode 100644 index 1fef3723..00000000 Binary files a/docs/reference/transform-12.png and /dev/null differ diff --git a/docs/reference/transform-2.png b/docs/reference/transform-2.png deleted file mode 100644 index 493d3ede..00000000 Binary files a/docs/reference/transform-2.png and /dev/null differ diff --git a/docs/reference/transform-3.png b/docs/reference/transform-3.png deleted file mode 100644 index eb809b14..00000000 Binary files a/docs/reference/transform-3.png and /dev/null differ diff --git a/docs/reference/transform-4.png b/docs/reference/transform-4.png deleted file mode 100644 index 0bc25fa7..00000000 Binary files a/docs/reference/transform-4.png and /dev/null differ diff --git a/docs/reference/transform-5.png b/docs/reference/transform-5.png deleted file mode 100644 index bba71d7a..00000000 Binary files a/docs/reference/transform-5.png and /dev/null differ diff --git a/docs/reference/transform-6.png b/docs/reference/transform-6.png deleted file mode 100644 index 69887a90..00000000 Binary files a/docs/reference/transform-6.png and /dev/null differ diff --git a/docs/reference/transform-7.png b/docs/reference/transform-7.png deleted file mode 100644 index dc045329..00000000 Binary files a/docs/reference/transform-7.png and /dev/null differ diff --git a/docs/reference/transform-8.png b/docs/reference/transform-8.png deleted file mode 100644 index ad093f40..00000000 Binary files a/docs/reference/transform-8.png and /dev/null differ diff --git a/docs/reference/transform-9.png b/docs/reference/transform-9.png deleted file mode 100644 index d4e31ff3..00000000 Binary files a/docs/reference/transform-9.png and /dev/null differ diff --git a/docs/reference/transform.html b/docs/reference/transform.html deleted file mode 100644 index 7819a044..00000000 --- a/docs/reference/transform.html +++ /dev/null @@ -1,271 +0,0 @@ - -Scaling, transformation and normalisation methods — transformArcSine • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Methods for data scaling, transformation and normalisation.

    -
    - -
    -
    transformArcSine(d)
    -
    -# S4 method for AnalysisData
    -transformArcSine(d)
    -
    -transformAuto(d)
    -
    -# S4 method for AnalysisData
    -transformAuto(d)
    -
    -transformCenter(d)
    -
    -# S4 method for AnalysisData
    -transformCenter(d)
    -
    -transformLevel(d)
    -
    -# S4 method for AnalysisData
    -transformLevel(d)
    -
    -transformLn(d, add = 1)
    -
    -# S4 method for AnalysisData
    -transformLn(d, add = 1)
    -
    -transformLog10(d, add = 1)
    -
    -# S4 method for AnalysisData
    -transformLog10(d, add = 1)
    -
    -transformPareto(d)
    -
    -# S4 method for AnalysisData
    -transformPareto(d)
    -
    -transformRange(d)
    -
    -# S4 method for AnalysisData
    -transformRange(d)
    -
    -transformSQRT(d)
    -
    -# S4 method for AnalysisData
    -transformSQRT(d)
    -
    -transformTICnorm(d)
    -
    -# S4 method for AnalysisData
    -transformTICnorm(d)
    -
    -transformVast(d)
    -
    -# S4 method for AnalysisData
    -transformVast(d)
    -
    - -
    -

    Arguments

    -
    d
    -

    S4 object of class AnalysisData

    -
    add
    -

    value to add prior to transformation

    -
    -
    -

    Value

    -

    An S4 object of class AnalysisData containing the transformed data.

    -
    -
    -

    Details

    -

    Prior to downstream analyses, metabolomics data often require transformation to fulfil the assumptions of a particular statistical/data mining technique. -Before applying a transformation, it is important to consider the effects that the transformation will have on the data, as this can greatly effect the outcome of further downstream analyses. -It is also important to consider at what stage in the pre-treatment routine a transformation is applied as this too could introduce artefacts into the data. -The best practice is to apply a transformation as the last in a pre-treatment routine after all other steps have been taken. -There are a wide range of transformation methods available that are commonly used for the analysis of metabolomics data.

    -
    -
    -

    Methods

    - - -
    • transformArcSine: Arc-sine transformation.

    • -
    • transformAuto: Auto scaling.

    • -
    • transformCenter: Mean centring.

    • -
    • transformLevel: Level scaling.

    • -
    • transformLn: Natural logarithmic transformation.

    • -
    • transformLog10: Logarithmic transformation.

    • -
    • transformPareto: Pareto scaling.

    • -
    • transformRange: Range scaling. Also known as min-max scaling.

    • -
    • transformSQRT: Square root transformation.

    • -
    • transformTICnorm: Total ion count normalisation.

    • -
    • transformVast: Vast scaling.

    • -
    - -
    -

    Examples

    -
    
    -## Each of the following examples shows the application of the transformation and then 
    -## a Linear Discriminant Analysis is plotted to show it's effect on the data structure.
    -
    -## Initial example data preparation
    -library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% 
    - occupancyMaximum(occupancy = 2/3)
    -
    -d %>% 
    - plotLDA(cls = 'day')
    -
    - 
    -
    -## Arc-sine transformation
    -d %>% 
    - transformArcSine() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Auto scaling
    -d %>% 
    - transformAuto() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Mean centring
    -d %>% 
    - transformCenter()%>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Level scaling
    -d %>% 
    - transformLevel() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Natural logarithmic transformation
    -d %>% 
    - transformLn() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Logarithmic transformation
    -d %>% 
    - transformLog10()%>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Pareto scaling
    -d %>% 
    - transformPareto() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Range scaling
    -d %>% 
    - transformRange() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Square root scaling
    -d %>% 
    - transformSQRT() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Total ion count nromalisation
    -d %>% 
    - transformTICnorm() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Vast scaling
    -d %>% 
    - transformVast() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/transformArcSine.html b/docs/reference/transformArcSine.html deleted file mode 100644 index 947901b3..00000000 --- a/docs/reference/transformArcSine.html +++ /dev/null @@ -1,183 +0,0 @@ - - - - - - - - -transformArcSine — transformArcSine • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Arc-sine transformation of sample data.

    -
    - -
    transformArcSine(d)
    -
    -# S4 method for AnalysisData
    -transformArcSine(d)
    - -

    Arguments

    - - - - - - -
    d

    S4 object of class AnalysisData

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/transformAuto.html b/docs/reference/transformAuto.html deleted file mode 100644 index ebf1774d..00000000 --- a/docs/reference/transformAuto.html +++ /dev/null @@ -1,183 +0,0 @@ - - - - - - - - -transformAuto — transformAuto • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Auto scaling of sample data.

    -
    - -
    transformAuto(d)
    -
    -# S4 method for AnalysisData
    -transformAuto(d)
    - -

    Arguments

    - - - - - - -
    d

    S4 object of class AnalysisData

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/transformCenter.html b/docs/reference/transformCenter.html deleted file mode 100644 index bb8603ed..00000000 --- a/docs/reference/transformCenter.html +++ /dev/null @@ -1,183 +0,0 @@ - - - - - - - - -transformCenter — transformCenter • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Mean center sample data.

    -
    - -
    transformCenter(d)
    -
    -# S4 method for AnalysisData
    -transformCenter(d)
    - -

    Arguments

    - - - - - - -
    d

    S4 object of class AnalysisData

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/transformLevel.html b/docs/reference/transformLevel.html deleted file mode 100644 index abcc1d59..00000000 --- a/docs/reference/transformLevel.html +++ /dev/null @@ -1,183 +0,0 @@ - - - - - - - - -transformLevel — transformLevel • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Level scaling of sample data.

    -
    - -
    transformLevel(d)
    -
    -# S4 method for AnalysisData
    -transformLevel(d)
    - -

    Arguments

    - - - - - - -
    d

    S4 object of class AnalysisData

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/transformLn.html b/docs/reference/transformLn.html deleted file mode 100644 index 50b5af34..00000000 --- a/docs/reference/transformLn.html +++ /dev/null @@ -1,187 +0,0 @@ - - - - - - - - -transformLn — transformLn • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Natural logarithmic transformation of sample data.

    -
    - -
    transformLn(d, add = 1)
    -
    -# S4 method for AnalysisData
    -transformLn(d, add = 1)
    - -

    Arguments

    - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    add

    value to add prior to transformation

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/transformLog10.html b/docs/reference/transformLog10.html deleted file mode 100644 index 35bdf11b..00000000 --- a/docs/reference/transformLog10.html +++ /dev/null @@ -1,187 +0,0 @@ - - - - - - - - -transformLog10 — transformLog10 • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Logarithmic transformation of sample data.

    -
    - -
    transformLog10(d, add = 1)
    -
    -# S4 method for AnalysisData
    -transformLog10(d, add = 1)
    - -

    Arguments

    - - - - - - - - - - -
    d

    S4 object of class AnalysisData

    add

    value to add prior to transformation

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/transformPareto.html b/docs/reference/transformPareto.html deleted file mode 100644 index 2ffa6ffd..00000000 --- a/docs/reference/transformPareto.html +++ /dev/null @@ -1,183 +0,0 @@ - - - - - - - - -transformPareto — transformPareto • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Pareto scaling of sample data.

    -
    - -
    transformPareto(d)
    -
    -# S4 method for AnalysisData
    -transformPareto(d)
    - -

    Arguments

    - - - - - - -
    d

    S4 object of class AnalysisData

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/transformRange.html b/docs/reference/transformRange.html deleted file mode 100644 index ecc89955..00000000 --- a/docs/reference/transformRange.html +++ /dev/null @@ -1,183 +0,0 @@ - - - - - - - - -transformRange — transformRange • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Range scaling of sample data. Also known as min-max scaling.

    -
    - -
    transformRange(d)
    -
    -# S4 method for AnalysisData
    -transformRange(d)
    - -

    Arguments

    - - - - - - -
    d

    S4 object of class AnalysisData

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/transformSQRT.html b/docs/reference/transformSQRT.html deleted file mode 100644 index aa001324..00000000 --- a/docs/reference/transformSQRT.html +++ /dev/null @@ -1,183 +0,0 @@ - - - - - - - - -transformSQRT — transformSQRT • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Square root transformation of sample data.

    -
    - -
    transformSQRT(d)
    -
    -# S4 method for AnalysisData
    -transformSQRT(d)
    - -

    Arguments

    - - - - - - -
    d

    S4 object of class AnalysisData

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/transformTICnorm.html b/docs/reference/transformTICnorm.html deleted file mode 100644 index 0d2d1bf7..00000000 --- a/docs/reference/transformTICnorm.html +++ /dev/null @@ -1,183 +0,0 @@ - - - - - - - - -transformTICnorm — transformTICnorm • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Total ion count normalisation of sample data.

    -
    - -
    transformTICnorm(d)
    -
    -# S4 method for AnalysisData
    -transformTICnorm(d)
    - -

    Arguments

    - - - - - - -
    d

    S4 object of class AnalysisData

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/transformVast.html b/docs/reference/transformVast.html deleted file mode 100644 index d695eccc..00000000 --- a/docs/reference/transformVast.html +++ /dev/null @@ -1,183 +0,0 @@ - - - - - - - - -transformVast — transformVast • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Vast scaling of sample data.

    -
    - -
    transformVast(d)
    -
    -# S4 method for AnalysisData
    -transformVast(d)
    - -

    Arguments

    - - - - - - -
    d

    S4 object of class AnalysisData

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/ttest.html b/docs/reference/ttest.html deleted file mode 100644 index e9691b2b..00000000 --- a/docs/reference/ttest.html +++ /dev/null @@ -1,161 +0,0 @@ - -Welch's t-test — ttest • metabolyseR - - -
    -
    - - - -
    -
    - - -
    -

    Welch's t-test

    -
    - -
    -
    ttest(
    -  x,
    -  cls = "class",
    -  pAdjust = "bonferroni",
    -  comparisons = list(),
    -  returnModels = FALSE
    -)
    -
    -# S4 method for AnalysisData
    -ttest(
    -  x,
    -  cls = "class",
    -  pAdjust = "bonferroni",
    -  comparisons = list(),
    -  returnModels = FALSE
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    S4 object of class AnalysisData

    -
    cls
    -

    vector of sample information column names to analyse

    -
    pAdjust
    -

    p value adjustment method

    -
    comparisons
    -

    named list of binary comparisons to analyse

    -
    returnModels
    -

    should models be returned

    -
    -
    -

    Value

    -

    An S4 object of class Univariate.

    -
    - -
    -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% 
    - keepClasses(cls = 'day',classes = c('H','5'))
    -
    -## Perform t-test
    -ttest_analysis <- ttest(d,cls = 'day')
    -
    -## Extract significant features
    -explanatoryFeatures(ttest_analysis)
    -#> # A tibble: 11 × 14
    -#>    Response Comparison Feature estimate estimate1 estimate2 statistic    p.value
    -#>    <chr>    <chr>      <chr>      <dbl>     <dbl>     <dbl>     <dbl>      <dbl>
    -#>  1 day      5~H        N277       65.4      79.2     13.8        7.77    1.58e-7
    -#>  2 day      5~H        N299        7.68      8.99     1.31       6.36    2.53e-6
    -#>  3 day      5~H        N229       50.3      55.2      4.93       5.96    8.60e-6
    -#>  4 day      5~H        N295        4.19      5.12     0.937      5.56    8.65e-6
    -#>  5 day      5~H        N233       -4.65      2.68     7.33      -5.00    1.69e-5
    -#>  6 day      5~H        N267       27.3      48.1     20.8        4.79    2.96e-5
    -#>  7 day      5~H        N245       18.0      19.9      1.94       4.92    9.00e-5
    -#>  8 day      5~H        N279        7.64      9.21     1.57       4.61    1.63e-4
    -#>  9 day      5~H        N278        4.14      6.27     2.12       4.45    1.76e-4
    -#> 10 day      5~H        N281        3.02      3.72     0.701      4.47    1.92e-4
    -#> 11 day      5~H        N272        2.99      3.71     0.722      4.30    2.49e-4
    -#> # … with 6 more variables: parameter <dbl>, conf.low <dbl>, conf.high <dbl>,
    -#> #   method <chr>, alternative <chr>, adjusted.p.value <dbl>
    -
    -
    -
    - -
    - - -
    - -
    -

    Site built with pkgdown 2.0.2.

    -
    - -
    - - - - - - - - diff --git a/docs/reference/type.html b/docs/reference/type.html deleted file mode 100644 index 72580bd7..00000000 --- a/docs/reference/type.html +++ /dev/null @@ -1,183 +0,0 @@ - - - - - - - - -type — type • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Return the random forest analysis type.

    -
    - -
    type(x)
    -
    -# S4 method for RandomForest
    -type(x)
    - -

    Arguments

    - - - - - - -
    x

    S4 object of class RandomForest

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/sitemap.xml b/docs/sitemap.xml deleted file mode 100644 index 6ca698d7..00000000 --- a/docs/sitemap.xml +++ /dev/null @@ -1,396 +0,0 @@ - - - - https://jasenfinch.github.io/metabolyseR/404.html - - - https://jasenfinch.github.io/metabolyseR/articles/01_quick_start.html - - - https://jasenfinch.github.io/metabolyseR/articles/02_introduction.html - - - https://jasenfinch.github.io/metabolyseR/articles/03_pre_treatment.html - - - https://jasenfinch.github.io/metabolyseR/articles/04_modelling.html - - - https://jasenfinch.github.io/metabolyseR/articles/index.html - - - https://jasenfinch.github.io/metabolyseR/articles/introduction.html - - - https://jasenfinch.github.io/metabolyseR/articles/metabolyseR.html - - - https://jasenfinch.github.io/metabolyseR/articles/modelling.html - - - https://jasenfinch.github.io/metabolyseR/articles/pre_treatment.html - - - https://jasenfinch.github.io/metabolyseR/articles/quick_start.html - - - https://jasenfinch.github.io/metabolyseR/authors.html - - - https://jasenfinch.github.io/metabolyseR/index.html - - - https://jasenfinch.github.io/metabolyseR/news/index.html - - - https://jasenfinch.github.io/metabolyseR/reference/Analysis-class.html - - - https://jasenfinch.github.io/metabolyseR/reference/AnalysisData-class.html - - - https://jasenfinch.github.io/metabolyseR/reference/AnalysisParameters-class.html - - - https://jasenfinch.github.io/metabolyseR/reference/QC.html - - - https://jasenfinch.github.io/metabolyseR/reference/QCimpute.html - - - https://jasenfinch.github.io/metabolyseR/reference/QCoccupancy.html - - - https://jasenfinch.github.io/metabolyseR/reference/QCremove.html - - - https://jasenfinch.github.io/metabolyseR/reference/QCrsdFilter.html - - - https://jasenfinch.github.io/metabolyseR/reference/RandomForest-class.html - - - https://jasenfinch.github.io/metabolyseR/reference/Univariate-class.html - - - https://jasenfinch.github.io/metabolyseR/reference/aggregate.html - - - https://jasenfinch.github.io/metabolyseR/reference/aggregateMean.html - - - https://jasenfinch.github.io/metabolyseR/reference/aggregateMedian.html - - - https://jasenfinch.github.io/metabolyseR/reference/aggregateSum.html - - - https://jasenfinch.github.io/metabolyseR/reference/analysis-accessors.html - - - https://jasenfinch.github.io/metabolyseR/reference/analysisData.html - - - https://jasenfinch.github.io/metabolyseR/reference/analysisElements.html - - - https://jasenfinch.github.io/metabolyseR/reference/analysisParameters.html - - - https://jasenfinch.github.io/metabolyseR/reference/analysisResults.html - - - https://jasenfinch.github.io/metabolyseR/reference/anova.html - - - https://jasenfinch.github.io/metabolyseR/reference/binaryComparisons.html - - - https://jasenfinch.github.io/metabolyseR/reference/bind.html - - - https://jasenfinch.github.io/metabolyseR/reference/bindAnalysesRows.html - - - https://jasenfinch.github.io/metabolyseR/reference/changeParameter.html - - - https://jasenfinch.github.io/metabolyseR/reference/cls.html - - - https://jasenfinch.github.io/metabolyseR/reference/clsAdd.html - - - https://jasenfinch.github.io/metabolyseR/reference/clsArrange.html - - - https://jasenfinch.github.io/metabolyseR/reference/clsAvailable.html - - - https://jasenfinch.github.io/metabolyseR/reference/clsExtract.html - - - https://jasenfinch.github.io/metabolyseR/reference/clsRemove.html - - - https://jasenfinch.github.io/metabolyseR/reference/clsRename.html - - - https://jasenfinch.github.io/metabolyseR/reference/clsReplace.html - - - https://jasenfinch.github.io/metabolyseR/reference/correction.html - - - https://jasenfinch.github.io/metabolyseR/reference/correctionCenter.html - - - https://jasenfinch.github.io/metabolyseR/reference/correlations.html - - - https://jasenfinch.github.io/metabolyseR/reference/correlationsParameters.html - - - https://jasenfinch.github.io/metabolyseR/reference/dat.html - - - https://jasenfinch.github.io/metabolyseR/reference/explanatoryFeatures.html - - - https://jasenfinch.github.io/metabolyseR/reference/exportParameters.html - - - https://jasenfinch.github.io/metabolyseR/reference/features.html - - - https://jasenfinch.github.io/metabolyseR/reference/importance.html - - - https://jasenfinch.github.io/metabolyseR/reference/importanceMetrics.html - - - https://jasenfinch.github.io/metabolyseR/reference/impute.html - - - https://jasenfinch.github.io/metabolyseR/reference/imputeAll.html - - - https://jasenfinch.github.io/metabolyseR/reference/imputeClass.html - - - https://jasenfinch.github.io/metabolyseR/reference/index.html - - - https://jasenfinch.github.io/metabolyseR/reference/io-parameters.html - - - https://jasenfinch.github.io/metabolyseR/reference/keep.html - - - https://jasenfinch.github.io/metabolyseR/reference/keepClasses.html - - - https://jasenfinch.github.io/metabolyseR/reference/keepFeatures.html - - - https://jasenfinch.github.io/metabolyseR/reference/keepSamples.html - - - https://jasenfinch.github.io/metabolyseR/reference/linearRegression.html - - - https://jasenfinch.github.io/metabolyseR/reference/mds.html - - - https://jasenfinch.github.io/metabolyseR/reference/metabolyse.html - - - https://jasenfinch.github.io/metabolyseR/reference/metrics.html - - - https://jasenfinch.github.io/metabolyseR/reference/modelling-accessors.html - - - https://jasenfinch.github.io/metabolyseR/reference/modelling-parameters.html - - - https://jasenfinch.github.io/metabolyseR/reference/modellingMethods.html - - - https://jasenfinch.github.io/metabolyseR/reference/modellingParameters.html - - - https://jasenfinch.github.io/metabolyseR/reference/nFeatures.html - - - https://jasenfinch.github.io/metabolyseR/reference/nSamples.html - - - https://jasenfinch.github.io/metabolyseR/reference/occupancy.html - - - https://jasenfinch.github.io/metabolyseR/reference/occupancyFilter.html - - - https://jasenfinch.github.io/metabolyseR/reference/occupancyMaximum.html - - - https://jasenfinch.github.io/metabolyseR/reference/occupancyMinimum.html - - - https://jasenfinch.github.io/metabolyseR/reference/parameters.html - - - https://jasenfinch.github.io/metabolyseR/reference/parseParameters.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotFeature.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotImportance.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotLDA.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotMDS.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotMetrics.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotOccupancy.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotPCA.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotROC.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotRSD.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotSupervisedRF.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotTIC.html - - - https://jasenfinch.github.io/metabolyseR/reference/plotUnsupervisedRF.html - - - https://jasenfinch.github.io/metabolyseR/reference/pre-treatment-parameters.html - - - https://jasenfinch.github.io/metabolyseR/reference/preTreated.html - - - https://jasenfinch.github.io/metabolyseR/reference/preTreatmentElements.html - - - https://jasenfinch.github.io/metabolyseR/reference/preTreatmentMethods.html - - - https://jasenfinch.github.io/metabolyseR/reference/preTreatmentParameters.html - - - https://jasenfinch.github.io/metabolyseR/reference/randomForest.html - - - https://jasenfinch.github.io/metabolyseR/reference/raw.html - - - https://jasenfinch.github.io/metabolyseR/reference/reAnalyse.html - - - https://jasenfinch.github.io/metabolyseR/reference/reexports.html - - - https://jasenfinch.github.io/metabolyseR/reference/remove.html - - - https://jasenfinch.github.io/metabolyseR/reference/removeClasses.html - - - https://jasenfinch.github.io/metabolyseR/reference/removeFeatures.html - - - https://jasenfinch.github.io/metabolyseR/reference/removeSamples.html - - - https://jasenfinch.github.io/metabolyseR/reference/response.html - - - https://jasenfinch.github.io/metabolyseR/reference/roc.html - - - https://jasenfinch.github.io/metabolyseR/reference/rsd.html - - - https://jasenfinch.github.io/metabolyseR/reference/show-Analysis-method.html - - - https://jasenfinch.github.io/metabolyseR/reference/show-AnalysisData-method.html - - - https://jasenfinch.github.io/metabolyseR/reference/show-AnalysisParameters-method.html - - - https://jasenfinch.github.io/metabolyseR/reference/show-RandomForest-method.html - - - https://jasenfinch.github.io/metabolyseR/reference/show-Univariate-method.html - - - https://jasenfinch.github.io/metabolyseR/reference/sinfo.html - - - https://jasenfinch.github.io/metabolyseR/reference/split.html - - - https://jasenfinch.github.io/metabolyseR/reference/transform.html - - - https://jasenfinch.github.io/metabolyseR/reference/transformArcSine.html - - - https://jasenfinch.github.io/metabolyseR/reference/transformAuto.html - - - https://jasenfinch.github.io/metabolyseR/reference/transformCenter.html - - - https://jasenfinch.github.io/metabolyseR/reference/transformLevel.html - - - https://jasenfinch.github.io/metabolyseR/reference/transformLn.html - - - https://jasenfinch.github.io/metabolyseR/reference/transformLog10.html - - - https://jasenfinch.github.io/metabolyseR/reference/transformPareto.html - - - https://jasenfinch.github.io/metabolyseR/reference/transformRange.html - - - https://jasenfinch.github.io/metabolyseR/reference/transformSQRT.html - - - https://jasenfinch.github.io/metabolyseR/reference/transformTICnorm.html - - - https://jasenfinch.github.io/metabolyseR/reference/transformVast.html - - - https://jasenfinch.github.io/metabolyseR/reference/ttest.html - - - https://jasenfinch.github.io/metabolyseR/reference/type.html - - diff --git a/man/modelling-accessors.Rd b/man/modelling-accessors.Rd index 2f88f176..90565c65 100644 --- a/man/modelling-accessors.Rd +++ b/man/modelling-accessors.Rd @@ -3,6 +3,8 @@ \name{binaryComparisons} \alias{binaryComparisons} \alias{binaryComparisons,AnalysisData-method} +\alias{mtry} +\alias{mtry,AnalysisData-method} \alias{type} \alias{type,RandomForest-method} \alias{response} @@ -33,6 +35,10 @@ binaryComparisons(x, cls = "class") \S4method{binaryComparisons}{AnalysisData}(x, cls = "class") +mtry(x, cls = "class") + +\S4method{mtry}{AnalysisData}(x, cls = "class") + type(x) \S4method{type}{RandomForest}(x) @@ -101,6 +107,7 @@ Methods for accessing modelling results. \itemize{ \item \code{binaryComparisons}: Return a vector of all possible binary comparisons for a given sample information column. +\item \code{mtry}: Calculate the default \code{mtry} random forest parameter value for a given sample information column. \item \code{type}: Return the type of random forest analysis. \item \code{response}: Return the response variable name used for a random forest analysis. \item \code{metrics}: Retrieve the model performance metrics for a random forest analysis @@ -116,9 +123,12 @@ library(metaboData) d <- analysisData(abr1$neg[,200:300],abr1$fact) -## Return possible binary comparisons for the 'day' column +## Return possible binary comparisons for the `day` response column binaryComparisons(d,cls = 'day') +## Return the default random forest `mtry` parameter for the `day` response column +mtry(d,cls = 'day') + ## Perform random forest analysis rf_analysis <- randomForest(d,cls = 'day') diff --git a/man/predict.Rd b/man/predict.Rd new file mode 100644 index 00000000..852cff9f --- /dev/null +++ b/man/predict.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/predict.R +\name{predict} +\alias{predict} +\alias{predict,RandomForest,AnalysisData-method} +\title{Predict random forest model responses} +\usage{ +predict( + model, + new_data, + idx = NULL, + type = c("response", "prob", "votes"), + ... +) + +\S4method{predict}{RandomForest,AnalysisData}( + model, + new_data, + idx = NULL, + type = c("response", "prob", "votes"), + ... +) +} +\arguments{ +\item{model}{S4 object of class \code{RandomForest}} + +\item{new_data}{S4 object of class \code{AnalysisData}} + +\item{idx}{sample information column to use for sample names. If \code{NULL}, the sample row number will be used. Sample names should be unique for each row of data.} + +\item{type}{one of \code{response}, \code{prob}, or \code{votes} to indicate the type of prediction to make} + +\item{...}{arguments to pass to \code{randomForest::predict.randomForest()}} +} +\description{ +Predict values of random forest model response variables from new data. +} +\details{ +The features contained within \code{new_data} should match those of the features used to train \code{model}. +The \code{features()} method can be used to check this. +The argument \code{returnModels = TRUE} should also be used when training the \code{RandomForest-class} object used for argument \code{model}. +} +\examples{ +library(metaboData) + +## Prepare some data +x <- analysisData(abr1$neg[,200:300],abr1$fact) \%>\% + occupancyMaximum(cls = 'day') \%>\% + transformTICnorm() + +## Extract data from which to train a random forest model +training_data <- x \%>\% + keepClasses(cls = 'day', + classes = c('H','1')) + +## Extract data for which response values will be predicted +test_data <- x \%>\% + keepClasses(cls = 'day', + classes = c('2','3')) + +rf <- randomForest(training_data, + cls = 'day', + returnModels = TRUE) + +predict(rf, + test_data) +} diff --git a/man/tune.Rd b/man/tune.Rd new file mode 100644 index 00000000..ca20d62e --- /dev/null +++ b/man/tune.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tune.R +\name{tune} +\alias{tune} +\alias{tune,AnalysisData-method} +\title{Tune random forest parameters} +\usage{ +tune( + x, + cls = "class", + mtry_range = floor(seq(mtry(x, cls = cls) - mtry(x, cls = cls)/2, mtry(x, cls = cls) + + mtry(x, cls = cls)/2, length.out = 4)), + ntree_range = 1000, + seed = 1234 +) + +\S4method{tune}{AnalysisData}( + x, + cls = "class", + mtry_range = floor(seq(mtry(x, cls = cls) - mtry(x, cls = cls)/2, mtry(x, cls = cls) + + mtry(x, cls = cls)/2, length.out = 4)), + ntree_range = 1000, + seed = 1234 +) +} +\arguments{ +\item{x}{S4 object of class \code{AnalysisData}} + +\item{cls}{sample information column to use} + +\item{mtry_range}{numeric vector of \code{mtry} values to search} + +\item{ntree_range}{numeric vector of \code{ntree} values to search} + +\item{seed}{random number seed} +} +\value{ +A list containing the optimal \code{mtry} and \code{ntree} parameters. +This is suitable for use as the \code{rf} argument in method \code{randomForest()}. +} +\description{ +Tune the \code{mtry} and \code{ntree} random forest parameters using a grid search approach. +} +\details{ +Parameter tuning is performed by grid search of all combinations of the \code{mtry_range} and \code{ntree_range} vectors provided. +The optimal parameter values are selected using the out-of-bag error estimates of the \code{margin} metric for classification and the \code{rmse} (root-mean-square error) metric for regression. +} +\examples{ +library(metaboData) + +## Prepare some data +x <- analysisData(abr1$neg[,200:300],abr1$fact) \%>\% + occupancyMaximum(cls = 'day') \%>\% + transformTICnorm() + +## Tune the `mtry` parameter for the `day` response +tune(x,cls = 'day') +} diff --git a/tests/testthat/test-predict.R b/tests/testthat/test-predict.R new file mode 100644 index 00000000..9c55677c --- /dev/null +++ b/tests/testthat/test-predict.R @@ -0,0 +1,67 @@ +test_that("predict works", { + x <- analysisData(abr1$neg[,200:300],abr1$fact) %>% + occupancyMaximum(cls = 'day') %>% + transformTICnorm() + + training_data <- x %>% + keepClasses(cls = 'day', + classes = c('H','1')) + + test_data <- x %>% + keepClasses(cls = 'day', + classes = c('2','3')) + + rf <- randomForest(training_data, + cls = 'day', + returnModels = TRUE) + + predictions <- predict(rf, + test_data) + + expect_s3_class(predictions,'tbl_df') + expect_error(predict(rf, + test_data, + idx = 'day')) +}) + +test_that("predit throws an error if unsupervised random forest used",{ + x <- analysisData(abr1$neg[,200:300],abr1$fact) %>% + occupancyMaximum(cls = 'day') %>% + transformTICnorm() + + training_data <- x %>% + keepClasses(cls = 'day', + classes = c('H','1')) + + test_data <- x %>% + keepClasses(cls = 'day', + classes = c('2','3')) + + rf <- randomForest(training_data, + cls = NULL, + returnModels = TRUE) + + expect_error(predict(rf, + test_data)) +}) + +test_that("predict throws an error if RandomForest object does not contain models",{ + x <- analysisData(abr1$neg[,200:300],abr1$fact) %>% + occupancyMaximum(cls = 'day') %>% + transformTICnorm() + + training_data <- x %>% + keepClasses(cls = 'day', + classes = c('H','1')) + + test_data <- x %>% + keepClasses(cls = 'day', + classes = c('2','3')) + + rf <- randomForest(training_data, + cls = 'day', + returnModels = FALSE) + + expect_error(predict(rf, + test_data)) +}) diff --git a/tests/testthat/test-tune.R b/tests/testthat/test-tune.R new file mode 100644 index 00000000..d8f4db96 --- /dev/null +++ b/tests/testthat/test-tune.R @@ -0,0 +1,22 @@ + +x <- analysisData(abr1$neg[,200:300],abr1$fact) %>% + occupancyMaximum(cls = 'day') %>% + transformTICnorm() + +test_that("tuning works", { + tune_values <- tune(x,cls = 'day') + + expect_equal(tune_values$mtry,9) + expect_equal(tune_values$ntree,1000) +}) + +test_that('tuning throws an error for unsupervised random forest',{ + expect_error(tune(x,cls = NULL)) +}) + +test_that('an empty list is returned when no optimal parameters can be found',{ + tune_values <- tune(x, + cls = 'day', + ntree_range = 1) + expect_identical(tune_values,list()) +})