diff --git a/DESCRIPTION b/DESCRIPTION index 3722e986..6170e610 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: metabolyseR Title: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data -Version: 0.14.6 +Version: 0.14.7 Authors@R: person("Jasen", "Finch", email = "jsf9@aber.ac.uk", role = c("aut", "cre")) Description: A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data. URL: https://jasenfinch.github.io/metabolyseR diff --git a/NAMESPACE b/NAMESPACE index 903c998c..4a9d6df1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -190,6 +190,7 @@ importFrom(magrittr,set_colnames) importFrom(magrittr,set_names) importFrom(magrittr,set_rownames) importFrom(methods,"slot<-") +importFrom(methods,as) importFrom(methods,new) importFrom(methods,show) importFrom(methods,slot) diff --git a/NEWS.md b/NEWS.md index 9de6357a..92d4f501 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,10 @@ +# metabolyseR 0.14.7 + +* Single replicate classes now automatically removed by [`plotLDA()`](https://jasenfinch.github.io/metabolyseR/reference/plotLDA.html). + # metabolyseR 0.14.6 -* [`plotExplanatoryHeatmap`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html) method for the [`Analysis`](https://jasenfinch.github.io/metabolyseR/reference/Analysis-class.html) class now returns the plot only if the number of plots is equal to 1. +* [`plotExplanatoryHeatmap()`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html) method for the [`Analysis`](https://jasenfinch.github.io/metabolyseR/reference/Analysis-class.html) class now returns the plot only if the number of plots is equal to 1. * Removed reference to the `nCores` parameter from the documentation example of [`metabolyse()`](https://jasenfinch.github.io/metabolyseR/reference/metabolyse.html). diff --git a/R/allClasses.R b/R/allClasses.R index 1a05c5c9..6edd9228 100644 --- a/R/allClasses.R +++ b/R/allClasses.R @@ -115,3 +115,22 @@ setClass('Univariate', models = 'list', results = 'tbl_df' )) + +setClass('LDA', + contains = 'AnalysisData', + slots = list( + stats = 'tbl_df', + Tw = 'numeric', + rankmat = 'numeric', + means = 'numeric', + loadings = 'tbl_df', + x = 'tbl_df', + xmeans = 'tbl_df', + pred = 'factor', + cl = 'factor', + prior = 'numeric', + conf = 'table', + acc = 'numeric', + lev = 'character', + call = 'call' + )) \ No newline at end of file diff --git a/R/nlda.R b/R/nlda.R index fb13b859..fdb4f937 100644 --- a/R/nlda.R +++ b/R/nlda.R @@ -1,141 +1,156 @@ -## FIEmspro https://github.com/aberHRML/FIEmspro nlda functionality +## Based on FIEmspro https://github.com/aberHRML/FIEmspro nlda functionality -#'@importFrom e1071 naiveBayes -#'@importFrom stats cov predict +setGeneric('nlda',function(x,cls = 'class',prior = NULL,scale = FALSE,comprank = FALSE,...) + standardGeneric('nlda')) -`nlda.default` <- - function(dat,cl, prior=NULL,scale=FALSE,comprank = FALSE,...) - { - if (missing(dat) || missing(cl)) - stop("data set or class are missing") - dat <- as.matrix(dat) - - cl <- as.factor(cl) - if (any(table(cl) == 0)) stop("Can't have empty classes in cl.") - - if (nrow(dat) != length(cl)) stop("dat and cl don't match.") - if (length(unique(cl)) < 2) - stop("Classification needs at least two classes.") - if (any(is.na(dat)) || any(is.na(cl))) - stop("NA is not permitted in data set or class labels.") - - if(is.null(prior)){prior <- as.vector(table(cl)/length(cl))} - if(is.null(names(prior))){names(prior) <- levels(cl)} - - pc <- prcomp(dat,scale=scale) - - rankmat <- max(1,ncol(pc$x)-1) - - if(comprank == TRUE) - rankmat <- qr(cov(dat)*(dim(dat)[1]-1))$rank - score <- pc$x[,1:rankmat,drop=FALSE] - - g <- nlevels(cl) - mx <- apply(score,2,mean) - t <- matrix(0,nrow = rankmat,ncol=rankmat) - W <- matrix(0,nrow = rankmat,ncol=rankmat) - for(j in 1:g){ - idx <- which(cl==levels(cl)[j]) - L <- length(idx) - K <- score[idx,,drop=FALSE] - zz <- apply(K,2,mean) - A <- K - t(matrix(rep(mx, L),length(mx),L)) - C <- K - t(matrix(rep(zz, L),length(zz),L)) - t <- t + t(A)%*%A - W <- W + t(C)%*%C - } - B <- t-W - - Ng <- nrow(score)-g - P <- W/(Ng) - eP <- eigen(P) - ord <- sort.list(eP$values) - V <- sweep( - eP$vectors[,ord,drop=FALSE], - 2, - sqrt(colSums(eP$vectors[,ord,drop=FALSE]^2)), - "/") - Dg <- eP$values[ord] - nDg <- length(Dg) - Dmean <- sum(diag(P))/nDg - Dn <- matrix(0,nDg,nDg) - for(i in 1:nDg) - Dn[i,i] <- max(c(Dg[i],Dmean)) - - Wn <- V%*%Dn%*%t(V)*Ng - ratio <- solve(Wn)%*%B - er <- eigen(ratio) - ev <- Re(er$values) - ev[Im(er$values)>0] <- 0 - vec <- Re(er$vectors) - ord <- sort.list(ev,decreasing=TRUE) - vec <- sweep( - vec[,ord,drop=FALSE], - 2, - sqrt(colSums(vec[,ord,drop=FALSE]^2)), - "/") - ev <- ev[ord] - maxg <- min(c(g-1,dim(vec)[1])) - vec <- vec[,1:maxg] ## discriminant functions - Tw <- ev[1:maxg] - names(Tw) <- paste("DF", 1:maxg, sep = "") - - ## get stats here - flip <- function(x) x[rev(seq_along(x))] - n <- dim(dat)[1] - st <- matrix(0,length(Tw),3) - st[,1] <- round(Tw,3) - st[,2] <- round(Tw*100/sum(Tw),3) - st[,3] <- round(sqrt(Tw/(1+Tw)),3) - st <- as.data.frame(st) - dimnames(st) <- list( - paste("DF", 1:maxg, sep = ""), - c("Eig","Perceig","Cancor")) - - res <- list() - res$stats <- st - res$Tw <- Tw - res$rankmat <- rankmat - res$means <- pc$center - res$loadings <- pc$rotation[,1:rankmat,drop=FALSE] %*% - vec ## discriminant functions with PCA - - colnames(res$loadings) <- paste("DF", 1:maxg, sep = "") - - ## rotated data (projection) - x <- sweep(dat, 2, res$means) %*% res$loadings - - ## group means based on the rotated data - xmeans <- tapply(x, list(rep(cl,ncol(x)),col(x)), mean) - dimnames(xmeans)[[2]] <- colnames(x) - - #if(type==1){ - # mdist=as.matrix(dist(rbind(xmeans,x))) - # mdist=mdist[1:g,(g+1):ncol(mdist)] - # prob= (1-t(sweep(mdist,2,apply(mdist,2,sum),"/")))/(g-1) - # pred = apply(prob,1,which.max) - # pred <- factor(dimnames(prob)[[2]][pred], levels = levels(cl)) - #} - #else{ - nbmod <- naiveBayes(data.frame(x),cl) - prob <- predict(nbmod,data.frame(x),type="raw") - pred <- apply(prob,1,which.max) - pred <- factor(levels(cl)[pred], levels = levels(cl)) - #} - res$x <- x - res$xmeans <- xmeans - res$pred <- pred - res$cl <- cl - res$prior <- prior - res$conf <- table(cl,pred) - res$acc <- round(sum(diag(res$conf))*100/nrow(dat),2) - res$lev <- levels(cl) - res$call <- match.call() - res$call[[1]] <- as.name("nlda") - class(res) <- "nlda" - - return(res) - } +#' @importFrom e1071 naiveBayes +#' @importFrom stats cov predict +#' @importFrom methods as - -nlda <- function (dat, ...) UseMethod ("nlda") +setMethod('nlda',signature = 'AnalysisData', + function(x,cls = 'class',prior=NULL,scale=FALSE,comprank = FALSE,...) { + + cl <- x %>% + clsExtract(cls) + + if (is.numeric(cl)) + stop('Classes should not be numeric',call. = FALSE) + + cl <- factor(cl,levels = unique(cl)) + + if (any(table(cl) < 2)) { + remove_classes <- cl %>% + table() %>% + names() %>% + {.[table(cl) < 2]} + + x <- x %>% + removeClasses(cls = cls, + classes = remove_classes) + + warning(str_c('Classes with a single replicate removed: ', + str_c(str_c('"', + remove_classes, + '"'), + collapse = ', ')), + call. = FALSE) + + cl <- x %>% + clsExtract(cls) %>% + factor(levels = unique(.)) + } + + if (length(unique(cl)) < 2) + stop('More than 1 class needed for PC-LDA.',call. = FALSE) + + d <- x %>% + dat() %>% + as.matrix() + + if(is.null(prior)){prior <- as.vector(table(cl)/length(cl))} + if(is.null(names(prior))){names(prior) <- levels(cl)} + + pc <- prcomp(d,scale=scale) + + rankmat <- max(1,ncol(pc$x)-1) + + if(comprank == TRUE) + rankmat <- qr(cov(d)*(dim(d)[1]-1))$rank + score <- pc$x[,1:rankmat,drop=FALSE] + + g <- nlevels(cl) + mx <- apply(score,2,mean) + t <- matrix(0,nrow = rankmat,ncol=rankmat) + W <- matrix(0,nrow = rankmat,ncol=rankmat) + for(j in 1:g){ + idx <- which(cl==levels(cl)[j]) + L <- length(idx) + K <- score[idx,,drop=FALSE] + zz <- apply(K,2,mean) + A <- K - t(matrix(rep(mx, L),length(mx),L)) + C <- K - t(matrix(rep(zz, L),length(zz),L)) + t <- t + t(A)%*%A + W <- W + t(C)%*%C + } + B <- t-W + + Ng <- nrow(score)-g + P <- W/(Ng) + eP <- eigen(P) + ord <- sort.list(eP$values) + V <- sweep( + eP$vectors[,ord,drop=FALSE], + 2, + sqrt(colSums(eP$vectors[,ord,drop=FALSE]^2)), + "/") + Dg <- eP$values[ord] + nDg <- length(Dg) + Dmean <- sum(diag(P))/nDg + Dn <- matrix(0,nDg,nDg) + for(i in 1:nDg) + Dn[i,i] <- max(c(Dg[i],Dmean)) + + Wn <- V%*%Dn%*%t(V)*Ng + ratio <- solve(Wn)%*%B + er <- eigen(ratio) + ev <- Re(er$values) + ev[Im(er$values)>0] <- 0 + vec <- Re(er$vectors) + ord <- sort.list(ev,decreasing=TRUE) + vec <- sweep( + vec[,ord,drop=FALSE], + 2, + sqrt(colSums(vec[,ord,drop=FALSE]^2)), + "/") + ev <- ev[ord] + maxg <- min(c(g-1,dim(vec)[1])) + vec <- vec[,1:maxg] ## discriminant functions + Tw <- ev[1:maxg] + names(Tw) <- paste("DF", 1:maxg, sep = "") + + ## get stats here + flip <- function(x) x[rev(seq_along(x))] + n <- dim(d)[1] + st <- matrix(0,length(Tw),3) + st[,1] <- round(Tw,3) + st[,2] <- round(Tw*100/sum(Tw),3) + st[,3] <- round(sqrt(Tw/(1+Tw)),3) + st <- as.data.frame(st) + dimnames(st) <- list( + paste("DF", 1:maxg, sep = ""), + c("Eig","Perceig","Cancor")) + + res <- as(x,'LDA') + res@stats <- as_tibble(st) + res@Tw <- Tw + res@rankmat <- rankmat + res@means <- pc$center + + loadings <- pc$rotation[,1:rankmat,drop=FALSE] %*% vec + colnames(loadings) <- paste("DF", 1:maxg, sep = "") + x <- sweep(d, 2, res@means) %*% loadings + + ## group means based on the rotated data + xmeans <- tapply(x, list(rep(cl,ncol(x)),col(x)), mean) + dimnames(xmeans)[[2]] <- colnames(x) + + nbmod <- naiveBayes(data.frame(x),cl) + prob <- predict(nbmod,data.frame(x),type="raw") + pred <- apply(prob,1,which.max) + pred <- factor(levels(cl)[pred], levels = levels(cl)) + + res@loadings <- as_tibble(loadings) + res@x <- as_tibble(x) + res@xmeans <- as_tibble(xmeans) + res@pred <- pred + res@cl <- cl + res@prior <- prior + res@conf <- table(cl,pred) + res@acc <- round(sum(diag(res@conf))*100/nrow(d),2) + res@lev <- levels(cl) + res@call <- match.call() + res@call[[1]] <- as.name("nlda") + + return(res) + } +) diff --git a/R/plotLDA.R b/R/plotLDA.R index 3b326780..7f427619 100644 --- a/R/plotLDA.R +++ b/R/plotLDA.R @@ -63,24 +63,16 @@ setMethod('plotLDA', legendPosition = 'bottom', labelSize = 2){ - classLength <- clsLen(analysis,cls) + lda <- nlda(analysis,cls = cls,scale = scale,center = center) - if (classLength < 2) { - stop('More than 1 class needed for PC-LDA.',call. = FALSE) - } - - info <- analysis %>% - clsExtract(cls) %>% - factor() - - lda <- nlda(dat(analysis),cl = info,scale = scale,center = center) - - tw <- lda$Tw %>% + tw <- lda@Tw %>% round(2) - lda <- lda$x %>% + classLength <- clsLen(lda,cls = cls) + + lda <- lda@x %>% as_tibble() %>% - mutate(!!cls := info) + mutate(!!cls := lda@cl) if (classLength > 2) { lda <- lda %>% @@ -92,8 +84,6 @@ setMethod('plotLDA', select(all_of(label))) } - classLength <- clsLen(analysis,cls) - pl <- scatterPlot(lda, cls, xAxis, diff --git a/docs/404.html b/docs/404.html index 6fe14936..8e202619 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,66 +1,27 @@ - - - - + + + + - Page not found (404) • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + - - - - -
-
- + +
+ + + - - -
+
+
-
+ + - - diff --git a/docs/articles/index.html b/docs/articles/index.html index 14966dd8..580075ba 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,66 +1,12 @@ - - - - - - - -Articles • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Articles • metabolyseR - + + - - - -
-
- -
- -
+
-
- - + + diff --git a/docs/articles/metabolyseR.html b/docs/articles/metabolyseR.html index b616fec7..0ac5d5dd 100644 --- a/docs/articles/metabolyseR.html +++ b/docs/articles/metabolyseR.html @@ -19,6 +19,8 @@ + +
+
-
-

-Introduction

+
+

Introduction +

The metabolyseR package provides a suite of methods that encompass three elements of metabolomics data analysis:

  • data pre-treatment
  • @@ -113,118 +109,118 @@

    The package also distinguishes between the flexibility and simplicity required for exploratory analyses compared to the convenience needed for more complex routine analyses. This is reflected in the underlying S4 object-oriented implementations and associated methods defined within the package. It should be noted that it is useful to understand the principles involved in using metabolyseR for exploratory analyses to aid in extracting and wrangling the results generated from routine analyses.

    The following document will provide an introduction to the basic usage of the package and includes how to create and use the base classes that are the foundation of metabolyseR. This will be focused around the applications for both exploratory and routine analyses. For more detailed information on the individual analysis elements see their associated vignette using:

    -browseVignettes('metabolyseR')
    +browseVignettes('metabolyseR')

There is also an example quick start analysis vignette provided.

-vignette('quick_start','metabolyseR')
-

Any issues, bugs or errors encountered while using the package should be reported here.

-

The examples shown here will use the abr1 data set from the metaboData package (?metaboData::abr1). This is a nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data set from a plant-pathogen infection time course experiment. The examples will also include use of the pipe %>% from the magrittr package.

+vignette('quick_start','metabolyseR')
+

Any issues, bugs or errors encountered while using the package should be reported here.

+

The examples shown here will use the abr1 data set from the metaboData package (?metaboData::abr1). This is a nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data set from a plant-pathogen infection time course experiment. The examples will also include use of the pipe %>% from the magrittr package.

Firstly load the necessary packages:

+library(metabolyseR) +library(metaboData)
-
-

-Parallel processing

-

The package supports parallel processing using the future package.

-

By default, processing by metabolyseR will be done sequentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel back-end using plan(). The following example specifies using the multisession implementation (multiple background R sessions) with two worker processes.

+
+

Parallel processing +

+

The package supports parallel processing using the future package.

+

By default, processing by metabolyseR will be done sequentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel back-end using plan(). The following example specifies using the multisession implementation (multiple background R sessions) with two worker processes.

-plan(future::multisession,workers = 2)
-

See the future package documentation for more information on the types of parallel implementations that are available.

+plan(future::multisession,workers = 2)
+

See the future package documentation for more information on the types of parallel implementations that are available.

-
-

-Exploratory analyses

+
+

Exploratory analyses +

For exploratory analyses, simple questions of the data need to be answered quickly, requiring few steps. Key requirements for any tool used by investigators are that it should be both simple and flexible.

In metabolyseR, the AnalysisData class is the base S4 class that provides these requirements. The following sections will give an overview of the basics in constructing and using these objects as the base for analysis.

-
-

-Analysis data

+
+

Analysis data +

We can firstly construct an AnalysisData object which requires two data tables. The first is the metabolomic data where the columns are the metabolome features, the rows the sample observations and contains the abundance values. The second is the sample meta-information where the row order should match to that of the metabolome data table. Using the example data, his can be constructed and assigned to the variable d by:

 d <- analysisData(data = abr1$neg,
                   info = abr1$fact)

Where abr1$neg is the negative ionisation mode data and abr1$fact is the corresponding sample information. By printing d we can view some basic information about our data.

-print(d)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 2000 
-## Info: 9
+print(d)
+
## 
+## AnalysisData object containing:
+## 
+## Samples: 120 
+## Features: 2000 
+## Info: 9

We can also return the numbers of samples and numbers of features respectively using the following:

-
## [1] 120
+nSamples(d)
+
## [1] 120
-
## [1] 2000
+nFeatures(d)
+
## [1] 2000

The data table can be extracted using the dat method:

-dat(d)
-
## # A tibble: 120 × 2,000
-##       N1    N2    N3    N4    N5    N6    N7    N8    N9   N10   N11   N12   N13
-##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
-##  1     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  2     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  3     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  4     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  5     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  6     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  7     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  8     0     0     0     0     0     0     0     0     0     0     0     0     0
-##  9     0     0     0     0     0     0     0     0     0     0     0     0     0
-## 10     0     0     0     0     0     0     0     0     0     0     0     0     0
-## # … with 110 more rows, and 1,987 more variables: N14 <dbl>, N15 <dbl>,
-## #   N16 <dbl>, N17 <dbl>, N18 <dbl>, N19 <dbl>, N20 <dbl>, N21 <dbl>,
-## #   N22 <dbl>, N23 <dbl>, N24 <dbl>, N25 <dbl>, N26 <dbl>, N27 <dbl>,
-## #   N28 <dbl>, N29 <dbl>, N30 <dbl>, N31 <dbl>, N32 <dbl>, N33 <dbl>,
-## #   N34 <dbl>, N35 <dbl>, N36 <dbl>, N37 <dbl>, N38 <dbl>, N39 <dbl>,
-## #   N40 <dbl>, N41 <dbl>, N42 <dbl>, N43 <dbl>, N44 <dbl>, N45 <dbl>,
-## #   N46 <dbl>, N47 <dbl>, N48 <dbl>, N49 <dbl>, N50 <dbl>, N51 <dbl>, …
+dat(d)
+
## # A tibble: 120 × 2,000
+##       N1    N2    N3    N4    N5    N6    N7    N8    N9   N10   N11   N12   N13
+##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
+##  1     0     0     0     0     0     0     0     0     0     0     0     0     0
+##  2     0     0     0     0     0     0     0     0     0     0     0     0     0
+##  3     0     0     0     0     0     0     0     0     0     0     0     0     0
+##  4     0     0     0     0     0     0     0     0     0     0     0     0     0
+##  5     0     0     0     0     0     0     0     0     0     0     0     0     0
+##  6     0     0     0     0     0     0     0     0     0     0     0     0     0
+##  7     0     0     0     0     0     0     0     0     0     0     0     0     0
+##  8     0     0     0     0     0     0     0     0     0     0     0     0     0
+##  9     0     0     0     0     0     0     0     0     0     0     0     0     0
+## 10     0     0     0     0     0     0     0     0     0     0     0     0     0
+## # … with 110 more rows, and 1,987 more variables: N14 <dbl>, N15 <dbl>,
+## #   N16 <dbl>, N17 <dbl>, N18 <dbl>, N19 <dbl>, N20 <dbl>, N21 <dbl>,
+## #   N22 <dbl>, N23 <dbl>, N24 <dbl>, N25 <dbl>, N26 <dbl>, N27 <dbl>,
+## #   N28 <dbl>, N29 <dbl>, N30 <dbl>, N31 <dbl>, N32 <dbl>, N33 <dbl>,
+## #   N34 <dbl>, N35 <dbl>, N36 <dbl>, N37 <dbl>, N38 <dbl>, N39 <dbl>,
+## #   N40 <dbl>, N41 <dbl>, N42 <dbl>, N43 <dbl>, N44 <dbl>, N45 <dbl>,
+## #   N46 <dbl>, N47 <dbl>, N48 <dbl>, N49 <dbl>, N50 <dbl>, N51 <dbl>, …

Or alternatively, can be used to assign a new data table:

-dat(d) <- abr1$pos
+dat(d) <- abr1$pos
 d
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 2000 
-## Info: 9
+
## 
+## AnalysisData object containing:
+## 
+## Samples: 120 
+## Features: 2000 
+## Info: 9

The sample information table can be extracted using the sinfo method:

-sinfo(d)
-
## # A tibble: 120 × 9
-##    injorder pathcdf              filecdf name.org remark name    rep day   class
-##       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
-##  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
-##  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
-##  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
-##  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
-##  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
-##  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
-##  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
-##  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
-##  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
-## 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
-## # … with 110 more rows
+sinfo(d)
+
## # A tibble: 120 × 9
+##    injorder pathcdf              filecdf name.org remark name    rep day   class
+##       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
+##  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
+##  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
+##  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
+##  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
+##  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
+##  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
+##  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
+##  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
+##  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
+## 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
+## # … with 110 more rows

And similarly used to assign a new sample information table:

-sinfo(d) <- abr1$fact[,1:2]
+sinfo(d) <- abr1$fact[,1:2]
 d
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 2000 
-## Info: 2
+
## 
+## AnalysisData object containing:
+## 
+## Samples: 120 
+## Features: 2000 
+## Info: 2
-
-

-Sample information

+
+

Sample information +

There are a number of methods that provide utility for querying and altering the sample information within an AnalysisData object. These methods are all named with the prefix cls and include:

  • clsAdd
  • @@ -235,99 +231,99 @@

  • clsRename
  • clsReplace
-

The names of the available sample information columns can be shown using clsAvailable().

+

The names of the available sample information columns can be shown using clsAvailable().

-
## [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
-## [8] "day"      "class"
-

A given column can be extracted using clsExtract(). Here, the day column is extracted.

+clsAvailable(d)
+
## [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
+## [8] "day"      "class"
+

A given column can be extracted using clsExtract(). Here, the day column is extracted.

-clsExtract(d,cls = 'day')
-
##   [1] 2 3 4 1 2 1 2 4 H H 4 5 1 2 H 5 3 3 2 H 4 3 5 4 H H 3 H H 1 1 1 5 5 3 4 H
-##  [38] 1 5 5 1 2 4 3 2 4 3 2 5 4 4 H 3 4 2 4 4 1 5 4 4 1 1 H 3 2 H 3 3 1 2 H H 2
-##  [75] 3 5 3 2 5 2 4 3 H 2 3 2 1 1 4 5 3 2 1 H 5 2 4 H 1 4 4 1 1 5 H 5 1 3 3 5 5
-## [112] 5 3 2 5 H 5 H 2 1
-## Levels: 1 2 3 4 5 H
+clsExtract(d,cls = 'day')
+
##   [1] 2 3 4 1 2 1 2 4 H H 4 5 1 2 H 5 3 3 2 H 4 3 5 4 H H 3 H H 1 1 1 5 5 3 4 H
+##  [38] 1 5 5 1 2 4 3 2 4 3 2 5 4 4 H 3 4 2 4 4 1 5 4 4 1 1 H 3 2 H 3 3 1 2 H H 2
+##  [75] 3 5 3 2 5 2 4 3 H 2 3 2 1 1 4 5 3 2 1 H 5 2 4 H 1 4 4 1 1 5 H 5 1 3 3 5 5
+## [112] 5 3 2 5 H 5 H 2 1
+## Levels: 1 2 3 4 5 H

Sample class frequencies could then be computed.

-clsExtract(d,cls = 'day') %>%
-  table()
-
## .
-##  1  2  3  4  5  H 
-## 20 20 20 20 20 20
+clsExtract(d,cls = 'day') %>% + table() +
## .
+##  1  2  3  4  5  H 
+## 20 20 20 20 20 20

It can be seen that there are 20 samples available in each class.

Another example is the addition of a new sample information column. In the following, a column called new_class will be added with all samples labelled 1.

-d <- clsAdd(d,cls = 'new_class',value = rep(1,nSamples(d)))
-clsAvailable(d)
-
##  [1] "injorder"  "pathcdf"   "filecdf"   "name.org"  "remark"    "name"     
-##  [7] "rep"       "day"       "class"     "new_class"
+d <- clsAdd(d,cls = 'new_class',value = rep(1,nSamples(d))) +clsAvailable(d) +
##  [1] "injorder"  "pathcdf"   "filecdf"   "name.org"  "remark"    "name"     
+##  [7] "rep"       "day"       "class"     "new_class"
-
-

-Keeping / removing samples or features

+
+

Keeping / removing samples or features +

Samples or features can easily be kept or removed from an AnalysisData object as is most convenient.

Below can be seen the first 6 sample indexes in the injorder column of the sample information.

-samples <- d %>%
-  clsExtract(cls = 'injorder') %>%
-  head()
+samples <- d %>%
+  clsExtract(cls = 'injorder') %>%
+  head()
 
-print(samples)
-
## [1] 1 2 3 4 5 6
+print(samples)
+
## [1] 1 2 3 4 5 6

Only these samples could be kept using:

-d %>%
-  keepSamples(idx = 'injorder',samples = samples)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 6 
-## Features: 2000 
-## Info: 10
+d %>% + keepSamples(idx = 'injorder',samples = samples)
+
## 
+## AnalysisData object containing:
+## 
+## Samples: 6 
+## Features: 2000 
+## Info: 10

Or removed using:

-d %>%
-  removeSamples(idx = 'injorder',samples = samples)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 114 
-## Features: 2000 
-## Info: 10
+d %>% + removeSamples(idx = 'injorder',samples = samples) +
## 
+## AnalysisData object containing:
+## 
+## Samples: 114 
+## Features: 2000 
+## Info: 10

The process is very similar for keeping or removing specific metabolome features from the data table. Below can be seen the first 6 feature names in the data table.

-feat <- d %>%
-  features() %>%
-  head()
+feat <- d %>%
+  features() %>%
+  head()
 
-print(feat)
-
## [1] "N1" "N2" "N3" "N4" "N5" "N6"
+print(feat)
+
## [1] "N1" "N2" "N3" "N4" "N5" "N6"

Only these features can be kept using:

-d %>%
-  keepFeatures(features = feat)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 6 
-## Info: 10
+d %>% + keepFeatures(features = feat) +
## 
+## AnalysisData object containing:
+## 
+## Samples: 120 
+## Features: 6 
+## Info: 10

Or to remove these features:

-d %>%
-  removeFeatures(features = feat)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 1994 
-## Info: 10
+d %>% + removeFeatures(features = feat) +
## 
+## AnalysisData object containing:
+## 
+## Samples: 120 
+## Features: 1994 
+## Info: 10
-
-

-Routine analyses

+
+

Routine analyses +

Routine analyses are those that are often made up of numerous steps where parameters have likely already been previously established. The emphasis here is on convenience with as little code as possible required. In these analyses, the necessary analysis elements, order and parameters are first prepared and then the analysis routine subsequently performed in a single step. This section will introduce how this type of analysis can be performed using metabolyseR and will include four main topics:

  • analysis parameter selection
  • @@ -335,213 +331,213 @@

  • performing a re-analysis
  • extracting analysis results
-
-

-Analysis parameters

+
+

Analysis parameters +

Parameter selection is the fundamental aspect for performing routine analyses using metabolyseR and will be the step requiring the most input from the user. The parameters for an analysis are stored in an S4 object of class AnalysisParameters containing the relevant parameters of the selected analysis elements.

The parameters have been named so that they denote the same functionality commonly across all analysis element methods. Discussion of the specific parameters can be found withing the vignettes of the relevant analysis elements. These can be accessed using:

-browseVignettes('metabolyseR')
+browseVignettes('metabolyseR')

There are several ways to specify the parameters to use for analysis. The first is programatically and the second is through the use of the YAML format.

-
-

-Programatic specification

+
+

Programatic specification +

The available analysis elements can be shown using:

-
## [1] "pre-treatment" "modelling"     "correlations"
+
## [1] "pre-treatment" "modelling"     "correlations"

The analysisParameters() function can be used to create an AnalysisParameters object containing the default parameters. For example, the code below will return default parameters for all the metabolyseR analysis elements.

-
## Parameters:
-## pre-treatment
-##  QC
-##      occupancyFilter
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##      impute
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##          parallel = variables
-##          seed = 1234
-##      RSDfilter
-##          cls = class
-##          QCidx = QC
-##          RSDthresh = 50
-##      removeQC
-##          cls = class
-##          QCidx = QC
-##  occupancyFilter
-##      maximum
-##          cls = class
-##          occupancy = 2/3
-##  impute
-##      class
-##          cls = class
-##          occupancy = 2/3
-##          seed = 1234
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = class
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-## 
-## correlations
-##  method = pearson
-##  pAdjustMethod = bonferroni
-##  corPvalue = 0.05
+
## Parameters:
+## pre-treatment
+##  QC
+##      occupancyFilter
+##          cls = class
+##          QCidx = QC
+##          occupancy = 2/3
+##      impute
+##          cls = class
+##          QCidx = QC
+##          occupancy = 2/3
+##          parallel = variables
+##          seed = 1234
+##      RSDfilter
+##          cls = class
+##          QCidx = QC
+##          RSDthresh = 50
+##      removeQC
+##          cls = class
+##          QCidx = QC
+##  occupancyFilter
+##      maximum
+##          cls = class
+##          occupancy = 2/3
+##  impute
+##      class
+##          cls = class
+##          occupancy = 2/3
+##          seed = 1234
+##  transform
+##      TICnorm
+## 
+## modelling
+##  randomForest
+##      cls = class
+##      rf = list()
+##      reps = 1
+##      binary = FALSE
+##      comparisons = list()
+##      perm = 0
+##      returnModels = FALSE
+##      seed = 1234
+## 
+## correlations
+##  method = pearson
+##  pAdjustMethod = bonferroni
+##  corPvalue = 0.05

To retrieve parameters for a subset of analysis elements the following can be run, returning parameters for only the pre-treatment and modelling elements.

-p <- analysisParameters(c('pre-treatment','modelling'))
+p <- analysisParameters(c('pre-treatment','modelling'))
 p
-
## Parameters:
-## pre-treatment
-##  QC
-##      occupancyFilter
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##      impute
-##          cls = class
-##          QCidx = QC
-##          occupancy = 2/3
-##          parallel = variables
-##          seed = 1234
-##      RSDfilter
-##          cls = class
-##          QCidx = QC
-##          RSDthresh = 50
-##      removeQC
-##          cls = class
-##          QCidx = QC
-##  occupancyFilter
-##      maximum
-##          cls = class
-##          occupancy = 2/3
-##  impute
-##      class
-##          cls = class
-##          occupancy = 2/3
-##          seed = 1234
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = class
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
+
## Parameters:
+## pre-treatment
+##  QC
+##      occupancyFilter
+##          cls = class
+##          QCidx = QC
+##          occupancy = 2/3
+##      impute
+##          cls = class
+##          QCidx = QC
+##          occupancy = 2/3
+##          parallel = variables
+##          seed = 1234
+##      RSDfilter
+##          cls = class
+##          QCidx = QC
+##          RSDthresh = 50
+##      removeQC
+##          cls = class
+##          QCidx = QC
+##  occupancyFilter
+##      maximum
+##          cls = class
+##          occupancy = 2/3
+##  impute
+##      class
+##          cls = class
+##          occupancy = 2/3
+##          seed = 1234
+##  transform
+##      TICnorm
+## 
+## modelling
+##  randomForest
+##      cls = class
+##      rf = list()
+##      reps = 1
+##      binary = FALSE
+##      comparisons = list()
+##      perm = 0
+##      returnModels = FALSE
+##      seed = 1234

The changeParameter() function can be used to uniformly change these parameters across all of the selected methods. The example below changes the defaults of all the parameters named cls from the default class to day.

 p <- analysisParameters()
 changeParameter(p,'cls') <- 'day'
 p
-
## Parameters:
-## pre-treatment
-##  QC
-##      occupancyFilter
-##          cls = day
-##          QCidx = QC
-##          occupancy = 2/3
-##      impute
-##          cls = day
-##          QCidx = QC
-##          occupancy = 2/3
-##          parallel = variables
-##          seed = 1234
-##      RSDfilter
-##          cls = day
-##          QCidx = QC
-##          RSDthresh = 50
-##      removeQC
-##          cls = day
-##          QCidx = QC
-##  occupancyFilter
-##      maximum
-##          cls = day
-##          occupancy = 2/3
-##  impute
-##      class
-##          cls = day
-##          occupancy = 2/3
-##          seed = 1234
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = day
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-## 
-## correlations
-##  method = pearson
-##  pAdjustMethod = bonferroni
-##  corPvalue = 0.05
+
## Parameters:
+## pre-treatment
+##  QC
+##      occupancyFilter
+##          cls = day
+##          QCidx = QC
+##          occupancy = 2/3
+##      impute
+##          cls = day
+##          QCidx = QC
+##          occupancy = 2/3
+##          parallel = variables
+##          seed = 1234
+##      RSDfilter
+##          cls = day
+##          QCidx = QC
+##          RSDthresh = 50
+##      removeQC
+##          cls = day
+##          QCidx = QC
+##  occupancyFilter
+##      maximum
+##          cls = day
+##          occupancy = 2/3
+##  impute
+##      class
+##          cls = day
+##          occupancy = 2/3
+##          seed = 1234
+##  transform
+##      TICnorm
+## 
+## modelling
+##  randomForest
+##      cls = day
+##      rf = list()
+##      reps = 1
+##      binary = FALSE
+##      comparisons = list()
+##      perm = 0
+##      returnModels = FALSE
+##      seed = 1234
+## 
+## correlations
+##  method = pearson
+##  pAdjustMethod = bonferroni
+##  corPvalue = 0.05

Alternatively the parameters of a specific analysis elements can be targeted using the elements argument. The following will only alter the cls parameter back to class for the pre-treatment element parameters:

 changeParameter(p,'cls',elements = 'pre-treatment') <- 'class'

Parameters can be extracted from the AnalysisParameters class using the parameters() function for a specified element.

 parameters(p,'correlations')
-
## $method
-## [1] "pearson"
-## 
-## $pAdjustMethod
-## [1] "bonferroni"
-## 
-## $corPvalue
-## [1] 0.05
-

Each analysis element has a function for returning default parameters for specific methods. These include preTreatmentParameters(), modellingParameters() and correlationParameters(). Each returns a list of the default parameters for a specified methods as shown in the example for modellingParameters() below.

+
## $method
+## [1] "pearson"
+## 
+## $pAdjustMethod
+## [1] "bonferroni"
+## 
+## $corPvalue
+## [1] 0.05
+

Each analysis element has a function for returning default parameters for specific methods. These include preTreatmentParameters(), modellingParameters() and correlationParameters(). Each returns a list of the default parameters for a specified methods as shown in the example for modellingParameters() below.

-
## $anova
-## $anova$cls
-## [1] "class"
-## 
-## $anova$pAdjust
-## [1] "bonferroni"
-## 
-## $anova$comparisons
-## list()
-## 
-## $anova$returnModels
-## [1] FALSE
+modellingParameters('anova')
+
## $anova
+## $anova$cls
+## [1] "class"
+## 
+## $anova$pAdjust
+## [1] "bonferroni"
+## 
+## $anova$comparisons
+## list()
+## 
+## $anova$returnModels
+## [1] FALSE

Refer to the documentation (?) of each function for sepecific usage details.

The parameters returned by these functions can be assigned to an AnalysisParameters object, again using parameters()

-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
+parameters(p,'pre-treatment') <- preTreatmentParameters(
+  list(
     occupancyFilter = 'maximum',
     transform = 'TICnorm'
       )
   )
-
-

-YAML specification

-

Due to the relatively complex structure of the parameters needed for analyses containing many components, it is also possible to specify analysis parameters using the YAML file format. YAML parameter files (.yaml) can be parsed using the parseParameters() function. The example below shows the YAML specification for the defaults returned by analysisParameters().

+
+

YAML specification +

+

Due to the relatively complex structure of the parameters needed for analyses containing many components, it is also possible to specify analysis parameters using the YAML file format. YAML parameter files (.yaml) can be parsed using the parseParameters() function. The example below shows the YAML specification for the defaults returned by analysisParameters().

pre-treatment:
   QC:
     occupancyFilter:
@@ -589,8 +585,8 @@ 

corPvalue: 0.05

This can be passed directly into an AnalysisParameters object using the following:

-paramFile <- system.file('defaultParameters.yaml',package = 'metabolyseR')
-p <- parseParameters(paramFile)
+paramFile <- system.file('defaultParameters.yaml',package = 'metabolyseR') +p <- parseParameters(paramFile)

For more complex pre-treatment situations such as the following:

pre-treatment:
   remove:
@@ -613,21 +609,21 @@ 

Existing AnalysisParameters objects can also be exported to YAML format as shown below:

 p <- analysisParameters()
-exportParameters(p,file = 'analysis_parameters.yaml')
+exportParameters(p,file = 'analysis_parameters.yaml')

-
-

-Performing an analysis

+
+

Performing an analysis +

The analysis is performed in a single step using the metabolyse() function. This accepts the metabolomic data, the sample information and the analysis parameters.

The metabolomic data table of abundance values where the columns are the metabolome features and the rows are each sample observation. Similarly, the sample meta-information table should consist of the observations as rows and the meta information as columns. The order of the observation rows of the sample information table should be concordant with the rows in the metabolomics data table.

We can run an example analysis using the abr1 data set by first generating the default parameters for pre-treatment and modelling (random forest) analysis elements.

-p <- analysisParameters(c('pre-treatment','modelling'))
+p <- analysisParameters(c('pre-treatment','modelling'))

Custom pre-treatment parameters can then be specified to only inlude occupancy filtering and total ion count normalisation.

-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
+parameters(p,'pre-treatment') <- preTreatmentParameters(
+  list(
   occupancyFilter = 'maximum',
   transform = 'TICnorm')
 )
@@ -637,63 +633,63 @@

Finally, the analysis can be run in a single step. Here only the fist 200 features of the negative ionisation mode data are specified to reduce the analysis time needed for this example.

 analysis <- metabolyse(abr1$neg[,1:200],abr1$fact,p) 
-
## 
-## metabolyseR  v0.14.6 Wed Nov 17 10:25:14 2021
-
## ________________________________________________________________________________
-
## Parameters:
-## pre-treatment
-##  occupancyFilter
-##      maximum
-##          cls = day
-##          occupancy = 2/3
-##  transform
-##      TICnorm
-## 
-## modelling
-##  randomForest
-##      cls = day
-##      rf = list()
-##      reps = 1
-##      binary = FALSE
-##      comparisons = list()
-##      perm = 0
-##      returnModels = FALSE
-##      seed = 1234
-
## ________________________________________________________________________________
-
## Pre-treatment …
+
## 
+## metabolyseR  v0.14.7 Fri Dec 17 17:56:28 2021
+
## ________________________________________________________________________________
+
## Parameters:
+## pre-treatment
+##  occupancyFilter
+##      maximum
+##          cls = day
+##          occupancy = 2/3
+##  transform
+##      TICnorm
+## 
+## modelling
+##  randomForest
+##      cls = day
+##      rf = list()
+##      reps = 1
+##      binary = FALSE
+##      comparisons = list()
+##      perm = 0
+##      returnModels = FALSE
+##      seed = 1234
+
## ________________________________________________________________________________
+
## 
[34mPre-treatment 
[39m…
 
-Pre-treatment   ✓ [0.8S]
-## Modelling …
-
-Modelling   ✓ [3.5S]
+
[34mPre-treatment 
[39m    
[32m✓
[39m [0.8S]
+## 
[34mModelling 
[39m…
+
[34m
+Modelling 
[39m 
[32m✓
[39m [3.3S]
 ## ________________________________________________________________________________
 ## 
-## Complete! [4.4S]
+## 
[32mComplete! 
[39m[4.1S]

Note: If a data pre-treatment step is not performed prior to modelling or correlation analysis, the raw data will automatically be used.

The analysis object containing the analysis results can be printed to provide some basic information about the results of the analysis.

-print(analysis)
-
## 
-## metabolyseR v0.14.6
-## Analysis:
-##  Wed Nov 17 10:25:14 2021
-## 
-##  Raw Data:
-##      No. samples = 120
-##      No. features = 200
-## 
-##  Pre-treated Data:
-##      Wed Nov 17 10:25:15 2021
-##      No. samples = 120
-##      No. features = 48
-## 
-##  Modelling:
-##      Wed Nov 17 10:25:18 2021
-##      Methods: randomForest
+print(analysis)

+
## 
+## metabolyseR v0.14.7
+## Analysis:
+##     Fri Dec 17 17:56:28 2021
+## 
+##  Raw Data:
+##      No. samples = 120
+##      No. features = 200
+## 
+##  Pre-treated Data:
+##      Fri Dec 17 17:56:29 2021
+##      No. samples = 120
+##      No. features = 48
+## 
+##  Modelling:
+##      Fri Dec 17 17:56:32 2021
+##      Methods: randomForest
-
-

-Performing a re-analysis

+
+

Performing a re-analysis +

There are likely to be occasions where an analysis will need to be re-analysed using a new set of parameters. This can be achieved using the reAnalyse() function.

In the example below we will run a correlation analysis in addition to the pre-treatment and modelling elements already performed.

Firstly, we can specify the correlation parameters:

@@ -702,119 +698,119 @@

Then perform the re-analysis on our previously analysed Analysis object, specifying the additional parameters.

 analysis <- reAnalyse(analysis,parameters)
-
## 
-## metabolyseR v0.14.6 Wed Nov 17 10:25:19 2021
-## ________________________________________________________________________________
-## Parameters:
-## correlations
-##  method = pearson
-##  pAdjustMethod = bonferroni
-##  corPvalue = 0.05
-## ________________________________________________________________________________
-
## Correlations …
-
-Correlations    ✓ [0.1S]
-
## ________________________________________________________________________________
-## 
-## Complete! [0.1S]
+
## 
+## metabolyseR v0.14.7 Fri Dec 17 17:56:33 2021
+## ________________________________________________________________________________
+## Parameters:
+## correlations
+##  method = pearson
+##  pAdjustMethod = bonferroni
+##  corPvalue = 0.05
+## ________________________________________________________________________________
+
## 
[34mCorrelations 
[39m…
+
[34m
+Correlations 
[39m  
[32m✓
[39m [0.1S]
+
## ________________________________________________________________________________
+## 
+## Complete! [0.1S]

An overview of the results of the analysis (now including correlations) can then be printed.

-print(analysis)
-
## 
-## metabolyseR v0.14.6
-## Analysis:
-##  Wed Nov 17 10:25:14 2021
-## 
-##  Raw Data:
-##      No. samples = 120
-##      No. features = 200
-## 
-##  Pre-treated Data:
-##      Wed Nov 17 10:25:15 2021
-##      No. samples = 120
-##      No. features = 48
-## 
-##  Modelling:
-##      Wed Nov 17 10:25:18 2021
-##      Methods: randomForest
-## 
-##  Correlations:
-##      Wed Nov 17 10:25:19 2021
-##      No. correlations = 140
+print(analysis)

+
## 
+## metabolyseR v0.14.7
+## Analysis:
+##     Fri Dec 17 17:56:28 2021
+## 
+##  Raw Data:
+##      No. samples = 120
+##      No. features = 200
+## 
+##  Pre-treated Data:
+##      Fri Dec 17 17:56:29 2021
+##      No. samples = 120
+##      No. features = 48
+## 
+##  Modelling:
+##      Fri Dec 17 17:56:32 2021
+##      Methods: randomForest
+## 
+##  Correlations:
+##      Fri Dec 17 17:56:33 2021
+##      No. correlations = 140
-
-

-Extracting analysis results

+
+

Extracting analysis results +

An analysis performed by metabolyse() returns an S4 object of class Analysis. There are a number of ways of extracting analysis results from this object.

-

Similarly to the AnalysisData class, the dat() and sinfo() functions can be used to extract the metabolomics data or sample information tables directly for either the raw or pre-treated data.

+

Similarly to the AnalysisData class, the dat() and sinfo() functions can be used to extract the metabolomics data or sample information tables directly for either the raw or pre-treated data.

For example, to extract the pre-treated metabolomics data from our object analysis:

-dat(analysis,type = 'pre-treated')
-
## # A tibble: 120 × 48
-##       N113    N115    N117    N118    N119    N127    N128    N129  N130    N131
-##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>
-##  1 0.00646 0       1.68e-4 0       1.60e-3 0.0323  2.65e-4 2.80e-4     0 0      
-##  2 0.0113  7.74e-4 1.02e-3 0       1.43e-3 0.00856 0       3.95e-4     0 0      
-##  3 0.00931 6.01e-4 2.70e-3 6.22e-5 5.58e-3 0       0       1.05e-4     0 6.51e-4
-##  4 0.00798 0       0       0       1.62e-4 0.00848 0       4.05e-4     0 1.28e-4
-##  5 0.0105  0       0       0       0       0.00658 0       1.97e-3     0 0      
-##  6 0.00454 0       2.48e-4 3.25e-4 5.31e-4 0.00207 0       1.98e-4     0 0      
-##  7 0.0117  0       1.14e-3 0       4.39e-4 0.00603 0       4.04e-4     0 0      
-##  8 0.00787 2.36e-3 1.43e-3 1.52e-4 4.22e-3 0.00290 2.78e-4 5.76e-5     0 0      
-##  9 0.00136 1.87e-4 8.17e-4 1.87e-4 0       0.0610  1.31e-4 5.23e-4     0 0      
-## 10 0.00899 4.26e-4 2.06e-3 0       8.36e-4 0.00106 7.72e-4 0           0 0      
-## # … with 110 more rows, and 38 more variables: N132 <dbl>, N133 <dbl>,
-## #   N134 <dbl>, N135 <dbl>, N136 <dbl>, N137 <dbl>, N139 <dbl>, N143 <dbl>,
-## #   N145 <dbl>, N146 <dbl>, N147 <dbl>, N149 <dbl>, N153 <dbl>, N155 <dbl>,
-## #   N157 <dbl>, N161 <dbl>, N163 <dbl>, N164 <dbl>, N165 <dbl>, N168 <dbl>,
-## #   N169 <dbl>, N170 <dbl>, N171 <dbl>, N173 <dbl>, N174 <dbl>, N175 <dbl>,
-## #   N179 <dbl>, N180 <dbl>, N181 <dbl>, N183 <dbl>, N187 <dbl>, N191 <dbl>,
-## #   N192 <dbl>, N193 <dbl>, N195 <dbl>, N196 <dbl>, N197 <dbl>, N198 <dbl>
+dat(analysis,type = 'pre-treated')
+
## # A tibble: 120 × 48
+##       N113    N115    N117    N118    N119    N127    N128    N129  N130    N131
+##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>
+##  1 0.00646 0       1.68e-4 0       1.60e-3 0.0323  2.65e-4 2.80e-4     0 0      
+##  2 0.0113  7.74e-4 1.02e-3 0       1.43e-3 0.00856 0       3.95e-4     0 0      
+##  3 0.00931 6.01e-4 2.70e-3 6.22e-5 5.58e-3 0       0       1.05e-4     0 6.51e-4
+##  4 0.00798 0       0       0       1.62e-4 0.00848 0       4.05e-4     0 1.28e-4
+##  5 0.0105  0       0       0       0       0.00658 0       1.97e-3     0 0      
+##  6 0.00454 0       2.48e-4 3.25e-4 5.31e-4 0.00207 0       1.98e-4     0 0      
+##  7 0.0117  0       1.14e-3 0       4.39e-4 0.00603 0       4.04e-4     0 0      
+##  8 0.00787 2.36e-3 1.43e-3 1.52e-4 4.22e-3 0.00290 2.78e-4 5.76e-5     0 0      
+##  9 0.00136 1.87e-4 8.17e-4 1.87e-4 0       0.0610  1.31e-4 5.23e-4     0 0      
+## 10 0.00899 4.26e-4 2.06e-3 0       8.36e-4 0.00106 7.72e-4 0           0 0      
+## # … with 110 more rows, and 38 more variables: N132 <dbl>, N133 <dbl>,
+## #   N134 <dbl>, N135 <dbl>, N136 <dbl>, N137 <dbl>, N139 <dbl>, N143 <dbl>,
+## #   N145 <dbl>, N146 <dbl>, N147 <dbl>, N149 <dbl>, N153 <dbl>, N155 <dbl>,
+## #   N157 <dbl>, N161 <dbl>, N163 <dbl>, N164 <dbl>, N165 <dbl>, N168 <dbl>,
+## #   N169 <dbl>, N170 <dbl>, N171 <dbl>, N173 <dbl>, N174 <dbl>, N175 <dbl>,
+## #   N179 <dbl>, N180 <dbl>, N181 <dbl>, N183 <dbl>, N187 <dbl>, N191 <dbl>,
+## #   N192 <dbl>, N193 <dbl>, N195 <dbl>, N196 <dbl>, N197 <dbl>, N198 <dbl>

Or to extract the raw sample information:

-sinfo(analysis,type = 'raw')
-
## # A tibble: 120 × 9
-##    injorder pathcdf              filecdf name.org remark name    rep day   class
-##       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
-##  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
-##  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
-##  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
-##  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
-##  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
-##  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
-##  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
-##  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
-##  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
-## 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
-## # … with 110 more rows
+sinfo(analysis,type = 'raw')
+
## # A tibble: 120 × 9
+##    injorder pathcdf              filecdf name.org remark name    rep day   class
+##       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
+##  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
+##  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
+##  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
+##  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
+##  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
+##  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
+##  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
+##  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
+##  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
+## 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
+## # … with 110 more rows

Alternatively the raw or preTreated functions can be used to extract the AnalysisData class objects containing both the metabolomics data and sample information for the raw and pre-treated data respectively.

-raw(analysis)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 200 
-## Info: 9
+raw(analysis)
+
## 
+## AnalysisData object containing:
+## 
+## Samples: 120 
+## Features: 200 
+## Info: 9
-preTreated(analysis)
-
## 
-## AnalysisData object containing:
-## 
-## Samples: 120 
-## Features: 48 
-## Info: 9
+preTreated(analysis) +
## 
+## AnalysisData object containing:
+## 
+## Samples: 120 
+## Features: 48 
+## Info: 9

Lastly the analysisResults function can be used to extract the results of any of the analysis elements. The following will extract the modelling results:

-analysisResults(analysis,element = 'modelling')
-
## $randomForest
-## 
-## Random forest classification 
-## 
-## Samples:  120 
-## Features:     48 
-## Response:     day 
-## # comparisons:    1
+analysisResults(analysis,element = 'modelling') +
## $randomForest
+## 
+## Random forest classification 
+## 
+## Samples:  120 
+## Features:     48 
+## Response:     day 
+## # comparisons:    1
@@ -830,11 +826,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -843,5 +841,7 @@

+ + diff --git a/docs/articles/modelling.html b/docs/articles/modelling.html index c1754606..917f5f6f 100644 --- a/docs/articles/modelling.html +++ b/docs/articles/modelling.html @@ -19,6 +19,8 @@ + +
+
-
-

-Introduction

+
+

Introduction +

Modelling provides the essential data mining step for extracting biological information and explanatory metabolome features from a data set relating to the experimental conditions. metabolyseR provides a number of both univariate and multivariate methods for data mining.

For an introduction to the usage of metabolyseR for both exploratory and routine analyses, see the introduction vignette using:

-vignette('introduction','metabolyseR')
+vignette('introduction','metabolyseR')

To further supplement this document, a quick start example analysis is also available as a vignette:

-vignette('quick_start','metabolyseR')
+vignette('quick_start','metabolyseR')

To begin, the package can be loaded using:

-library(metabolyseR)
+library(metabolyseR)
 #> 
 #> Attaching package: 'metabolyseR'
 #> The following object is masked from 'package:stats':
@@ -122,17 +118,17 @@ 

#> The following objects are masked from 'package:base': #> #> raw, split

-
-

-Example data

-

The examples used here will use the abr1 data set from the metaboData package. This is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The pipe %>% from the magrittr package will also be used. The example data can be loaded using:

+
+

Example data +

+

The examples used here will use the abr1 data set from the metaboData package. This is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The pipe %>% from the magrittr package will also be used. The example data can be loaded using:

+library(metaboData)

Only the negative acquisition mode data (abr1$neg) will be used along with the sample meta-information (abr1$fact). Create an AnalysisData class object, assigned to the variable d, using the following:

 d <- analysisData(abr1$neg[,1:500],abr1$fact)
-print(d)
+print(d)
 #> 
 #> AnalysisData object containing:
 #> 
@@ -141,33 +137,33 @@ 

#> Info: 9

As can be seen above the data set contains a total of 120 samples and 500 features.

-
-

-Parallel processing

-

The package supports parallel processing using the future package.

-

By default, processing by metabolyseR will be done seqentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel implementation using plan(). The following example specifies using the multisession implementation (muliple background R sessions) with two worker processes.

+
+

Parallel processing +

+

The package supports parallel processing using the future package.

+

By default, processing by metabolyseR will be done seqentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel implementation using plan(). The following example specifies using the multisession implementation (muliple background R sessions) with two worker processes.

-plan(future::multisession,workers = 2)
-

See the future package documentation for more information on the types of parallel implementations that are available.

+plan(future::multisession,workers = 2)
+

See the future package documentation for more information on the types of parallel implementations that are available.

-
-

-Random Forest

+
+

Random Forest +

Random forest is a versatile ensemble machine learning approach based on forests of decision trees for multivariate data mining. This can include unsupervised analysis, classification of discrete response variables and regression of continuous responses.

-

Random forest can be performed in metabolyseR using the randomForest() method. For further details on the arguments for using this function, see ?randomForest. This implementation of random forest in metabolyseR utilises the randomForest package. See ?randomForest::randomForest for more information about that implementation.

-
-

-Unsupervised

+

Random forest can be performed in metabolyseR using the randomForest() method. For further details on the arguments for using this function, see ?randomForest. This implementation of random forest in metabolyseR utilises the randomForest package. See ?randomForest::randomForest for more information about that implementation.

+
+

Unsupervised +

The unsupervised random forest approach can be useful starting point for analysis in any experimental context. It can be used to give a general overview of the structure of the data and to identify any possible problems. These could include situations such as the presence of outliers samples or splits in the data caused by the impact of analytical or sample preparation factors. Unsupervised random forest can have advantages in these assessments over other approaches such as Principle Component Analysis (PCA). It is less sensitive to the effect of a single feature that in fact could have little overall impact relative to the other hundreds that could be present in a data set.

The examples below will show the use of unsupervised random forest for assessing the general structure of the example data set and the presence of outlier samples.

Unsupervised random forest can be performed by setting the cls argument of randomForest() to NULL:

-unsupervised_rf <- d %>%
+unsupervised_rf <- d %>%
   randomForest(cls = NULL)

The type of random forest that has been performed can be checked using the type method.

-type(unsupervised_rf)
+type(unsupervised_rf)
 #> [1] "unsupervised"

Or by printing the results object.

@@ -196,75 +192,75 @@ 

This shows that it is indeed the experimental factor of interest that is having the greatest impact on the structure of the data. The progression of the experimental time points are obvious across Dimension 1.

The available feature importance metrics for a random forest analysis can be retrieved by:

-importanceMetrics(unsupervised_rf)
+importanceMetrics(unsupervised_rf)
 #> [1] "1"                    "2"                    "FalsePositiveRate"   
 #> [4] "MeanDecreaseAccuracy" "MeanDecreaseGini"     "SelectionFrequency"

And the importance values of these metrics for each feature can returned using:

-importance(unsupervised_rf)
-#> # A tibble: 3,000 × 3
+importance(unsupervised_rf)
+#> # A tibble: 3,000 × 3
 #>    Feature Metric                Value
-#>    <chr>   <chr>                 <dbl>
-#>  1 N1      1                    0     
-#>  2 N1      2                    0     
-#>  3 N1      FalsePositiveRate    0.0238
-#>  4 N1      MeanDecreaseAccuracy 0     
-#>  5 N1      MeanDecreaseGini     0     
-#>  6 N1      SelectionFrequency   0     
-#>  7 N10     1                    0     
-#>  8 N10     2                    0     
-#>  9 N10     FalsePositiveRate    0.0238
-#> 10 N10     MeanDecreaseAccuracy 0     
-#> # … with 2,990 more rows
+#> <chr> <chr> <dbl> +#> 1 N1 1 0 +#> 2 N1 2 0 +#> 3 N1 FalsePositiveRate 0.0238 +#> 4 N1 MeanDecreaseAccuracy 0 +#> 5 N1 MeanDecreaseGini 0 +#> 6 N1 SelectionFrequency 0 +#> 7 N10 1 0 +#> 8 N10 2 0 +#> 9 N10 FalsePositiveRate 0.0238 +#> 10 N10 MeanDecreaseAccuracy 0 +#> # … with 2,990 more rows

The explanatory features for a given threshold can be extracted for any of the importance metrics. The following will extract the explanatory features below a threshold of 0.05 based on the false positive rate metric.

-unsupervised_rf %>%
-  explanatoryFeatures(metric = "FalsePositiveRate", 
+unsupervised_rf %>%
+  explanatoryFeatures(metric = "FalsePositiveRate", 
                       threshold = 0.05)
-#> # A tibble: 359 × 3
+#> # A tibble: 359 × 3
 #>    Feature Metric               Value
-#>    <chr>   <chr>                <dbl>
-#>  1 N342    FalsePositiveRate 1.31e-19
-#>  2 N161    FalsePositiveRate 2.34e-16
-#>  3 N341    FalsePositiveRate 6.50e-16
-#>  4 N315    FalsePositiveRate 1.79e-15
-#>  5 N367    FalsePositiveRate 3.47e-14
-#>  6 N173    FalsePositiveRate 9.09e-14
-#>  7 N385    FalsePositiveRate 9.09e-14
-#>  8 N133    FalsePositiveRate 1.52e-12
-#>  9 N439    FalsePositiveRate 1.52e-12
-#> 10 N379    FalsePositiveRate 3.78e-12
-#> # … with 349 more rows
+#> <chr> <chr> <dbl> +#> 1 N342 FalsePositiveRate 1.31e-19 +#> 2 N161 FalsePositiveRate 2.34e-16 +#> 3 N341 FalsePositiveRate 6.50e-16 +#> 4 N315 FalsePositiveRate 1.79e-15 +#> 5 N367 FalsePositiveRate 3.47e-14 +#> 6 N173 FalsePositiveRate 9.09e-14 +#> 7 N385 FalsePositiveRate 9.09e-14 +#> 8 N133 FalsePositiveRate 1.52e-12 +#> 9 N439 FalsePositiveRate 1.52e-12 +#> 10 N379 FalsePositiveRate 3.78e-12 +#> # … with 349 more rows

In this example there are 359 explanatory features.

The trend of the most highly ranked explanatory feature against the day factor can be plotted using the plotFeature() method.

-unsupervised_rf %>%
+unsupervised_rf %>%
   plotFeature(feature = 'N425',
               cls = 'day')

-
-

-Classification

+
+

Classification +

Random forest classification can be used to assess the extent of discrimination (difference) between classes of a discrete response variable. This includes both multinomial (number of classes > 2) and binary (number of classes = 2) comparisons.

In multinomial situations, the suitability of a multinomial comparison versus multiple binary comparisons can depend on the experimental context. For instance, in a treatment/control experiment that includes multiple time points, a multinomial comparison using all available classes could be useful to visualise the general structure of the data. However, it could make any extracted explanatory features difficult to reason about as to how they relate to the individual experimental time point or treatment conditions. An investigator could instead identify the binary comparisons relevant to the biological question and focus the further classification comparisons to better select for explanatory features.

-
-

-Multinomial comparisons

+
+

Multinomial comparisons +

In experiments with more than two classes, multinomial random forest classification can be used to assess the discrimination between the classes and give an overview of the relative structure between classes.

The example data set consists of a total of 6 classes for the day response variable.

-d %>% 
-  clsExtract(cls = 'day') %>% 
-  unique()
+d %>% 
+  clsExtract(cls = 'day') %>% 
+  unique()
 #> [1] 2 3 4 1 H 5
 #> Levels: 1 2 3 4 5 H

Multinomial classification can be performed by:

-multinomial_rf <- d %>%
+multinomial_rf <- d %>%
   randomForest(cls = 'day')
 
-print(multinomial_rf)
+print(multinomial_rf)
 #> 
 #> Random forest classification 
 #> 
@@ -274,117 +270,117 @@ 

#> # comparisons: 1

The performance of this model can be assessed using metrics based on the success of the out of bag (OOB) predictions. The performance metrics can be extracted using:

-multinomial_rf %>%
-  metrics()
-#> # A tibble: 4 × 5
+multinomial_rf %>%
+  metrics()
+#> # A tibble: 4 × 5
 #>   Response Comparison  .metric  .estimator .estimate
-#>   <chr>    <chr>       <chr>    <chr>          <dbl>
-#> 1 day      1~2~3~4~5~H accuracy multiclass     0.8  
-#> 2 day      1~2~3~4~5~H kap      multiclass     0.76 
-#> 3 day      1~2~3~4~5~H roc_auc  hand_till      0.964
-#> 4 day      1~2~3~4~5~H margin   <NA>           0.146
+#> <chr> <chr> <chr> <chr> <dbl> +#> 1 day 1~2~3~4~5~H accuracy multiclass 0.8 +#> 2 day 1~2~3~4~5~H kap multiclass 0.76 +#> 3 day 1~2~3~4~5~H roc_auc hand_till 0.964 +#> 4 day 1~2~3~4~5~H margin NA 0.146

These metrics include accuracy, Cohen’s kappa (kap), area under the receiver operator characteristic curve (roc_auc, ROC-AUC) and margin. Each metric has both strengths and weaknesses that depend on the context of the classification such as the balance of observations between the classes. As shown below, the class frequencies for this example are balanced with 20 observations per class.

-d %>% 
-  clsExtract(cls = 'day') %>% 
-  table()
+d %>% 
+  clsExtract(cls = 'day') %>% 
+  table()
 #> .
 #>  1  2  3  4  5  H 
 #> 20 20 20 20 20 20

In this context, each of these metrics could be used to assess the predictive performance of the model. The margin metric is the difference between the proportion of votes for the correct class and the maximum proportion of votes for the other classes for a given observation which is then averaged across all the observations. A positive margin value indicates correct classification and values greater than 0.2 can be considered as the models having strong predictive power. The margin also allows the extent of discrimination to be discerned even in very distinct cases above where both the accuracy and ROC-AUC would be registering values of 1.

In this example, the values of all the metrics suggest that the model is showing good predictive performance. This can be investigated further by plotting the MDS of observation proximity values.

-multinomial_rf %>% 
+multinomial_rf %>% 
   plotMDS(cls = 'day')

This shows that the model is able to discriminate highly between classes such as 5 and H. It is less able to discriminate more similar classes such as H and 1 or 4 and 5 whose confidence ellipses show a high degree of overlap. This makes sense in the context of this experiment as these are adjacent time points that are more likely to be similar than time points at each end of the experiment.

The ROC curves can also be plotted as shown below.

-multinomial_rf %>% 
+multinomial_rf %>% 
   plotROC()

Classes with their line further from the central dashed line are those that were predicted with the greatest reliability by the model. This plot shows that both the H and 1 classes were least reliably predicted which is a result of their close proximity shown in the MDS plot previously.

Importance metrics can be used to identify the metabolome features that contribute most to the class discrimination in the model. The available importance metrics for this model are shown below.

-importanceMetrics(multinomial_rf)
+importanceMetrics(multinomial_rf)
 #>  [1] "1"                    "2"                    "3"                   
 #>  [4] "4"                    "5"                    "FalsePositiveRate"   
 #>  [7] "H"                    "MeanDecreaseAccuracy" "MeanDecreaseGini"    
 #> [10] "SelectionFrequency"

Here, we will use the false positive rate metric with a threshold of below 0.05 to identify explanatory features for the day response variable.

-multinomial_rf %>%
-  explanatoryFeatures(metric = 'FalsePositiveRate',
+multinomial_rf %>%
+  explanatoryFeatures(metric = 'FalsePositiveRate',
                       threshold = 0.05)
-#> # A tibble: 121 × 5
+#> # A tibble: 121 × 5
 #>    Response Comparison  Feature Metric               Value
-#>    <chr>    <chr>       <chr>   <chr>                <dbl>
-#>  1 day      1~2~3~4~5~H N341    FalsePositiveRate 1.02e-93
-#>  2 day      1~2~3~4~5~H N133    FalsePositiveRate 7.38e-68
-#>  3 day      1~2~3~4~5~H N163    FalsePositiveRate 3.59e-61
-#>  4 day      1~2~3~4~5~H N439    FalsePositiveRate 1.07e-54
-#>  5 day      1~2~3~4~5~H N342    FalsePositiveRate 3.19e-49
-#>  6 day      1~2~3~4~5~H N377    FalsePositiveRate 3.19e-49
-#>  7 day      1~2~3~4~5~H N171    FalsePositiveRate 6.26e-44
-#>  8 day      1~2~3~4~5~H N497    FalsePositiveRate 6.11e-30
-#>  9 day      1~2~3~4~5~H N146    FalsePositiveRate 2.74e-29
-#> 10 day      1~2~3~4~5~H N195    FalsePositiveRate 7.16e-25
-#> # … with 111 more rows
+#> <chr> <chr> <chr> <chr> <dbl> +#> 1 day 1~2~3~4~5~H N341 FalsePositiveRate 1.02e-93 +#> 2 day 1~2~3~4~5~H N133 FalsePositiveRate 7.38e-68 +#> 3 day 1~2~3~4~5~H N163 FalsePositiveRate 3.59e-61 +#> 4 day 1~2~3~4~5~H N439 FalsePositiveRate 1.07e-54 +#> 5 day 1~2~3~4~5~H N342 FalsePositiveRate 3.19e-49 +#> 6 day 1~2~3~4~5~H N377 FalsePositiveRate 3.19e-49 +#> 7 day 1~2~3~4~5~H N171 FalsePositiveRate 6.26e-44 +#> 8 day 1~2~3~4~5~H N497 FalsePositiveRate 6.11e-30 +#> 9 day 1~2~3~4~5~H N146 FalsePositiveRate 2.74e-29 +#> 10 day 1~2~3~4~5~H N195 FalsePositiveRate 7.16e-25 +#> # … with 111 more rows

As shown above there were a total of 121 explanatory features identified.

Within a multinomial experiment, it is also possible to specify the exact class comparisons to include, where it might not be suitable to compare all the classes at once using the comparisons argument. This should be specified as a named list, the corresponding to the cls argument. Each named element should then consist of a vector of comparisons, the classes to compare separated using the ~.

The following specifies two comparisons (H~1~2,H~1~5) for the day response variable and displays the performance metrics.

-d %>%
+d %>%
   randomForest(cls = 'day',
-               comparisons = list(day = c('H~1~2',
-                                          'H~1~5'))) %>%
-  metrics()
-#> # A tibble: 8 × 5
+               comparisons = list(day = c('H~1~2',
+                                          'H~1~5'))) %>%
+  metrics()
+#> # A tibble: 8 × 5
 #>   Response Comparison .metric  .estimator .estimate
-#>   <chr>    <chr>      <chr>    <chr>          <dbl>
-#> 1 day      H~1~2      accuracy multiclass     0.833
-#> 2 day      H~1~2      kap      multiclass     0.75 
-#> 3 day      H~1~5      accuracy multiclass     0.75 
-#> 4 day      H~1~5      kap      multiclass     0.625
-#> 5 day      H~1~2      roc_auc  hand_till      0.906
-#> 6 day      H~1~5      roc_auc  hand_till      0.909
-#> 7 day      H~1~2      margin   <NA>           0.172
-#> 8 day      H~1~5      margin   <NA>           0.320
+#> <chr> <chr> <chr> <chr> <dbl> +#> 1 day H~1~2 accuracy multiclass 0.833 +#> 2 day H~1~2 kap multiclass 0.75 +#> 3 day H~1~5 accuracy multiclass 0.75 +#> 4 day H~1~5 kap multiclass 0.625 +#> 5 day H~1~2 roc_auc hand_till 0.906 +#> 6 day H~1~5 roc_auc hand_till 0.909 +#> 7 day H~1~2 margin NA 0.172 +#> 8 day H~1~5 margin NA 0.320

The MDS and ROC curve plots can also be plotted simultaneously for the two comparisons.

-d %>%
+d %>%
   randomForest(cls = 'day',
-               comparisons = list(day = c('H~1~2',
-                                          'H~1~5'))) %>%
+               comparisons = list(day = c('H~1~2',
+                                          'H~1~5'))) %>%
   {plotMDS(.,cls = 'day') +
       plotROC(.) +
-      patchwork::plot_layout(ncol = 1)}
+ patchwork::plot_layout(ncol = 1)}

Similarly, it is also possible to model multiple response factors with a single random forest call by specifying a vector of response class information column names to the cls argument. In the following, both the name and day response factors will be analysed and the performance metrics returned in a single table.

-d %>%
-  randomForest(cls = c('name','day')) %>%
-  metrics()
+d %>%
+  randomForest(cls = c('name','day')) %>%
+  metrics()
 #> Warning: Classes with < 5 replicates removed: "11_3", "11_4", "11_5", "11_6",
 #> "11_H", "12_1", "12_3", "12_6", "12_H", "13_1", "13_2", "13_3", "13_5", "13_6",
 #> "13_H", "14_2", "14_3", "14_5", "14_6", "14_H", "15_1", "15_2", "15_4", "15_5",
 #> "15_6", "15_H"
 #> Unbalanced classes detected. Stratifying sample size to the smallest class size.
-#> # A tibble: 8 × 5
+#> # A tibble: 8 × 5
 #>   Response Comparison                    .metric  .estimator .estimate
-#>   <chr>    <chr>                         <chr>    <chr>          <dbl>
-#> 1 name     11_2~12_2~12_4~13_4~14_4~15_3 accuracy multiclass    0.35  
-#> 2 name     11_2~12_2~12_4~13_4~14_4~15_3 kap      multiclass    0.212 
-#> 3 name     11_2~12_2~12_4~13_4~14_4~15_3 roc_auc  hand_till     0.753 
-#> 4 name     11_2~12_2~12_4~13_4~14_4~15_3 margin   <NA>         -0.0485
-#> 5 day      1~2~3~4~5~H                   accuracy multiclass    0.8   
-#> 6 day      1~2~3~4~5~H                   kap      multiclass    0.76  
-#> 7 day      1~2~3~4~5~H                   roc_auc  hand_till     0.964 
-#> 8 day      1~2~3~4~5~H                   margin   <NA>          0.146
+#> <chr> <chr> <chr> <chr> <dbl> +#> 1 name 11_2~12_2~12_4~13_4~14_4~15_3 accuracy multiclass 0.35 +#> 2 name 11_2~12_2~12_4~13_4~14_4~15_3 kap multiclass 0.212 +#> 3 name 11_2~12_2~12_4~13_4~14_4~15_3 roc_auc hand_till 0.753 +#> 4 name 11_2~12_2~12_4~13_4~14_4~15_3 margin NA -0.0485 +#> 5 day 1~2~3~4~5~H accuracy multiclass 0.8 +#> 6 day 1~2~3~4~5~H kap multiclass 0.76 +#> 7 day 1~2~3~4~5~H roc_auc hand_till 0.964 +#> 8 day 1~2~3~4~5~H margin NA 0.146

The MDS plots can also be returned for both models simultaneously.

-d %>%
-  randomForest(cls = c('name','day')) %>%
+d %>%
+  randomForest(cls = c('name','day')) %>%
   plotMDS()
 #> Warning: Classes with < 5 replicates removed: "11_3", "11_4", "11_5", "11_6",
 #> "11_H", "12_1", "12_3", "12_6", "12_H", "13_1", "13_2", "13_3", "13_5", "13_6",
@@ -393,26 +389,26 @@ 

#> Unbalanced classes detected. Stratifying sample size to the smallest class size.

-
-

-Binary comparisons

+
+

Binary comparisons +

It may in some cases be preferable to analyse class comparisons as multiple binary comparisons.

-

The possible binary comparisons for a given response variable can be displayed using the binaryComparisons() method. Below shows the 15 comparisons for the day response variable.

+

The possible binary comparisons for a given response variable can be displayed using the binaryComparisons() method. Below shows the 15 comparisons for the day response variable.

-binaryComparisons(d,cls = 'day')
+binaryComparisons(d,cls = 'day')
 #>  [1] "1~2" "1~3" "1~4" "1~5" "1~H" "2~3" "2~4" "2~5" "2~H" "3~4" "3~5" "3~H"
 #> [13] "4~5" "4~H" "5~H"

For this example we will only use the binary comparisons containing the H class.

-binary_comparisons <- binaryComparisons(d,cls = 'day') %>% 
-  .[stringr::str_detect(.,'H')]
+binary_comparisons <- binaryComparisons(d,cls = 'day') %>% + .[stringr::str_detect(.,'H')]

The binary comparisons can then be performed using the following.

-binary_rf <- d %>%
+binary_rf <- d %>%
   randomForest(cls = 'day',
-               comparisons = list(day = binary_comparisons))
+               comparisons = list(day = binary_comparisons))
 
-print(binary_rf)
+print(binary_rf)
 #> 
 #> Random forest classification 
 #> 
@@ -423,59 +419,59 @@ 

To run all possible binary comparisons, the binary = TRUE argument could instead be used.

The MDS plots for each comparison can be visualised to inspect the comparisons.

-binary_rf %>% 
+binary_rf %>% 
   plotMDS(cls = 'day')

These plots show good separation in all the comparisons except H~1 which is also shown by the plot of the performance metrics below. Each of the comparisons are showing perfect performance for the accuracy, Cohen’s kappa and ROC-AUC metrics as well as very high margin values except for the H~1 comparison.

-binary_rf %>% 
+binary_rf %>% 
   plotMetrics()

The explanatory features for these comparisons can be extracted as below using the false positive rate metric and a cut-off threshold of 0.05. This gives a total of 251 explanatory features.

-binary_rf %>% 
-  explanatoryFeatures(metric = 'FalsePositiveRate',
+binary_rf %>% 
+  explanatoryFeatures(metric = 'FalsePositiveRate',
                       threshold = 0.05)
-#> # A tibble: 251 × 5
+#> # A tibble: 251 × 5
 #>    Response Comparison Feature Metric               Value
-#>    <chr>    <chr>      <chr>   <chr>                <dbl>
-#>  1 day      2~H        N341    FalsePositiveRate 7.34e-52
-#>  2 day      2~H        N439    FalsePositiveRate 1.80e-45
-#>  3 day      3~H        N342    FalsePositiveRate 2.71e-39
-#>  4 day      2~H        N327    FalsePositiveRate 1.06e-35
-#>  5 day      3~H        N439    FalsePositiveRate 1.06e-35
-#>  6 day      2~H        N477    FalsePositiveRate 1.60e-34
-#>  7 day      3~H        N377    FalsePositiveRate 1.60e-34
-#>  8 day      4~H        N477    FalsePositiveRate 7.40e-34
-#>  9 day      2~H        N447    FalsePositiveRate 6.48e-30
-#> 10 day      3~H        N163    FalsePositiveRate 6.48e-30
-#> # … with 241 more rows
+#> <chr> <chr> <chr> <chr> <dbl> +#> 1 day 2~H N341 FalsePositiveRate 7.34e-52 +#> 2 day 2~H N439 FalsePositiveRate 1.80e-45 +#> 3 day 3~H N342 FalsePositiveRate 2.71e-39 +#> 4 day 2~H N327 FalsePositiveRate 1.06e-35 +#> 5 day 3~H N439 FalsePositiveRate 1.06e-35 +#> 6 day 2~H N477 FalsePositiveRate 1.60e-34 +#> 7 day 3~H N377 FalsePositiveRate 1.60e-34 +#> 8 day 4~H N477 FalsePositiveRate 7.40e-34 +#> 9 day 2~H N447 FalsePositiveRate 6.48e-30 +#> 10 day 3~H N163 FalsePositiveRate 6.48e-30 +#> # … with 241 more rows

A heatmap of these explanatory features can be plotted to show their mean relative intensities across the experiment time points. Here, the classes are also refactored to customise the order of the classes on the x-axis.

-refactor_cls <- clsExtract(binary_rf,
-                           cls = 'day') %>% 
-  factor(.,levels = c('H','1','2','3','4','5'))
+refactor_cls <- clsExtract(binary_rf,
+                           cls = 'day') %>% 
+  factor(.,levels = c('H','1','2','3','4','5'))
 
-binary_rf <- clsReplace(binary_rf,
+binary_rf <- clsReplace(binary_rf,
                         value = refactor_cls,
                         cls = 'day')
-binary_rf %>% 
+binary_rf %>% 
   plotExplanatoryHeatmap(metric = 'FalsePositiveRate',
                       threshold = 0.05,
                       featureNames = TRUE)

-
-

-Regression

+
+

Regression +

Random forest regression can be used to assess the extent of association of the metabolomic data with continuous response variables.

In this example, the extent of association of injection order with the example data will be assessed.

-regression_rf <- d %>% 
+regression_rf <- d %>% 
   randomForest(cls = 'injorder')
 
-print(regression_rf)
+print(regression_rf)
 #> 
 #> Random forest regression 
 #> 
@@ -484,21 +480,21 @@ 

#> Response: injorder

The regression model performance metrics, based on the OOB prediction error, can be extracted using the following:

-regression_rf %>% 
-  metrics()
-#> # A tibble: 5 × 4
+regression_rf %>% 
+  metrics()
+#> # A tibble: 5 × 4
 #>   Response .metric .estimator .estimate
-#>   <chr>    <chr>   <chr>          <dbl>
-#> 1 injorder rsq     standard       0.476
-#> 2 injorder mae     standard      23.5  
-#> 3 injorder mape    standard     154.   
-#> 4 injorder rmse    standard      26.5  
-#> 5 injorder ccc     standard       0.508
+#> <chr> <chr> <chr> <dbl> +#> 1 injorder rsq standard 0.476 +#> 2 injorder mae standard 23.5 +#> 3 injorder mape standard 154. +#> 4 injorder rmse standard 26.5 +#> 5 injorder ccc standard 0.508

These regression metrics include R2 (rsq), mean absolute error (mae), mean absolute percentage error (mape), root mean squared error (rmse) and the concordance correlation coefficient (ccc).

The R2 and concordance correlation coefficient metrics suggest that there is some association of features with the injection order, although this is weak. This is in agreement with mean absolute error metric that shows that on average, the injection order could only be predicted to an accuracy of 23 injection order positions.

The MDS plot belows the relative proximities of the samples based on this injection order regression model. This shows that for the most part, there is little correspondence of the sample positions with their injection order. However, there is a small grouping of samples towards the end of the run around sample ~99 to 120. It suggests that there could have been some analytical issues, for certain features, towards the end of the mass spectral analytical run.

-regression_rf %>% 
+regression_rf %>% 
   plotMDS(cls = NULL,
           ellipses = FALSE,
           label = 'injorder',
@@ -508,51 +504,51 @@ 

The available feature importance metrics for this regression model can be listed.

-regression_rf %>% 
-  importanceMetrics()
+regression_rf %>% 
+  importanceMetrics()
 #> [1] "%IncMSE"       "IncNodePurity"

The feature importance metrics can be plotted to give an overview of their distribution. The following will plot the percentage increase in the mean squared error (%IncMSE) importance metric.

-regression_rf %>% 
+regression_rf %>% 
   plotImportance(metric = "%IncMSE", 
                  rank = FALSE)

This shows that there are only a few features that are contributing to the association with injection order. These explanatory features can be extracted with the following, using a threshold of above 5.

-regression_rf %>% 
-  explanatoryFeatures(metric = '%IncMSE',
+regression_rf %>% 
+  explanatoryFeatures(metric = '%IncMSE',
                       threshold = 5)
-#> # A tibble: 7 × 4
+#> # A tibble: 7 × 4
 #>   Response Feature Metric  Value
-#>   <chr>    <chr>   <chr>   <dbl>
-#> 1 injorder N283    %IncMSE 19.9 
-#> 2 injorder N135    %IncMSE  8.71
-#> 3 injorder N451    %IncMSE  5.58
-#> 4 injorder N161    %IncMSE  5.51
-#> 5 injorder N306    %IncMSE  5.49
-#> 6 injorder N118    %IncMSE  5.22
-#> 7 injorder N297    %IncMSE  5.07
+#> <chr> <chr> <chr> <dbl> +#> 1 injorder N283 %IncMSE 19.9 +#> 2 injorder N135 %IncMSE 8.71 +#> 3 injorder N451 %IncMSE 5.58 +#> 4 injorder N161 %IncMSE 5.51 +#> 5 injorder N306 %IncMSE 5.49 +#> 6 injorder N118 %IncMSE 5.22 +#> 7 injorder N297 %IncMSE 5.07

This returned a total of 7 explanatory features above this threshold. The top ranked feature N283 can be plotted to investigate it’s trend in relation to injection order.

-regression_rf %>% 
+regression_rf %>% 
   plotFeature(feature = 'N283',
               cls = 'injorder')

This shows an increase in the intensity of that feature for samples above 100 in the injection order which corresponds with the cluster that was seen in the MDS plot above.

-
-

-Univariate analyses

+
+

Univariate analyses +

Univariate methods select features, explanatory for response variables, with features tested on an individual basis. These methods offer simplicity and easy interpretation in their use, however they provide no information as to how features may interact.

The univariate methods currently available in metabolyseR include Welch’s t-test, analysis of variance (ANOVA) and linear regression. The following sections will provide brief examples of the use of each of these methods.

-
-

-Welch’s t-test

+
+

Welch’s t-test +

Welch’s t-test can be used to select explanatory metabolome features for binary comparisons of discrete variables. By default, all the possible binary comparisons for the categories of a response variable will be tested.

Below shows the possible binary comparisons for the day response variable for the example data set.

-binaryComparisons(d,
+binaryComparisons(d,
                   cls = 'day')
 #>  [1] "1~2" "1~3" "1~4" "1~5" "1~H" "2~3" "2~4" "2~5" "2~H" "3~4" "3~5" "3~H"
 #> [13] "4~5" "4~H" "5~H"
@@ -560,11 +556,11 @@

 ttest_analysis <- ttest(d,
                         cls = 'day',
-                        comparisons = list(day = c('H~1',
+                        comparisons = list(day = c('H~1',
                                                    'H~2',
                                                    'H~5')))
 
-print(ttest_analysis)
+print(ttest_analysis)
 #> 
 #> Univariate t-test analysis
 #> 
@@ -574,38 +570,38 @@ 

#> # comparisons: 3

The explanatory features that show a significant difference between the response categories can be extracted as shown below.

-explanatoryFeatures(ttest_analysis,
+explanatoryFeatures(ttest_analysis,
                     threshold = 0.05)
-#> # A tibble: 73 × 14
+#> # A tibble: 73 × 14
 #>    Response Comparison Feature estimate estimate1 estimate2 statistic  p.value
-#>    <chr>    <chr>      <chr>      <dbl>     <dbl>     <dbl>     <dbl>    <dbl>
-#>  1 day      H~5        N163      -735.       19.5   755.       -13.8  1.43e-11
-#>  2 day      H~5        N341      2445.     2537.     92.6       13.6  2.88e-11
-#>  3 day      H~5        N133      1055.     1077.     21.9       13.0  5.44e-11
-#>  4 day      H~2        N341       200.      293.     92.6       10.6  1.38e-10
-#>  5 day      H~5        N171        62.6      64.7     2.15      11.9  2.62e-10
-#>  6 day      H~5        N119        17.2      17.9     0.763     11.0  8.54e-10
-#>  7 day      H~5        N342       243.      247.      4.13      10.8  1.42e- 9
-#>  8 day      H~5        N343        27.4      28.3     0.961      9.83 5.99e- 9
-#>  9 day      H~5        N377       152.      157.      5.05       9.81 6.75e- 9
-#> 10 day      H~5        N477       103.      129.     26.1        9.30 1.05e- 8
-#> # … with 63 more rows, and 6 more variables: parameter <dbl>, conf.low <dbl>,
-#> #   conf.high <dbl>, method <chr>, alternative <chr>, adjusted.p.value <dbl>
-

This will threshold the features based on their adjusted p-value, found in the adjusted.p.value column of the table. The results of all of the features can be returned using the importance() method.

+#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 day H~5 N163 -735. 19.5 755. -13.8 1.43e-11 +#> 2 day H~5 N341 2445. 2537. 92.6 13.6 2.88e-11 +#> 3 day H~5 N133 1055. 1077. 21.9 13.0 5.44e-11 +#> 4 day H~2 N341 200. 293. 92.6 10.6 1.38e-10 +#> 5 day H~5 N171 62.6 64.7 2.15 11.9 2.62e-10 +#> 6 day H~5 N119 17.2 17.9 0.763 11.0 8.54e-10 +#> 7 day H~5 N342 243. 247. 4.13 10.8 1.42e- 9 +#> 8 day H~5 N343 27.4 28.3 0.961 9.83 5.99e- 9 +#> 9 day H~5 N377 152. 157. 5.05 9.81 6.75e- 9 +#> 10 day H~5 N477 103. 129. 26.1 9.30 1.05e- 8 +#> # … with 63 more rows, and 6 more variables: parameter <dbl>, conf.low <dbl>, +#> # conf.high <dbl>, method <chr>, alternative <chr>, adjusted.p.value <dbl>

+

This will threshold the features based on their adjusted p-value, found in the adjusted.p.value column of the table. The results of all of the features can be returned using the importance() method.

A heat map of the explanatory features can be plotted to inspect the relative trends of the explanatory features in relation to the response variable.

 plotExplanatoryHeatmap(ttest_analysis)

-
-

-ANOVA

+
+

ANOVA +

ANOVA can be used to select explanatory features for discrete response variables with 3 or more categories. The following example will compare all the categories in the day response variable. However, the comparisons argument can be used to select particular comparisons of interest.

 anova_analysis <- anova(d,
                         cls = 'day')
 
-print(anova_analysis)
+print(anova_analysis)
 #> 
 #> Univariate ANOVA analysis
 #> 
@@ -615,22 +611,22 @@ 

#> # comparisons: 1

The explanatory features that are significantly different between the categories can then be extracted.

-explanatoryFeatures(anova_analysis,
+explanatoryFeatures(anova_analysis,
                     threshold = 0.05)
-#> # A tibble: 110 × 10
+#> # A tibble: 110 × 10
 #>    Response Comparison  Feature term        df   sumsq meansq statistic  p.value
-#>    <chr>    <chr>       <chr>   <chr>    <dbl>   <dbl>  <dbl>     <dbl>    <dbl>
-#>  1 day      1~2~3~4~5~H N341    response     5  1.09e8 2.17e7     124.  1.90e-44
-#>  2 day      1~2~3~4~5~H N163    response     5  1.25e7 2.51e6     113.  1.71e-42
-#>  3 day      1~2~3~4~5~H N133    response     5  1.96e7 3.92e6     108.  1.71e-41
-#>  4 day      1~2~3~4~5~H N171    response     5  6.29e4 1.26e4      88.8 1.16e-37
-#>  5 day      1~2~3~4~5~H N342    response     5  1.04e6 2.07e5      85.1 7.61e-37
-#>  6 day      1~2~3~4~5~H N343    response     5  1.19e4 2.38e3      66.1 4.43e-32
-#>  7 day      1~2~3~4~5~H N119    response     5  4.92e3 9.83e2      53.8 2.07e-28
-#>  8 day      1~2~3~4~5~H N497    response     5  1.10e5 2.20e4      49.6 4.83e-27
-#>  9 day      1~2~3~4~5~H N137    response     5  6.32e3 1.26e3      39.9 1.59e-23
-#> 10 day      1~2~3~4~5~H N277    response     5  6.31e4 1.26e4      39.1 3.14e-23
-#> # … with 100 more rows, and 1 more variable: adjusted.p.value <dbl>
+#> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 day 1~2~3~4~5~H N341 response 5 1.09e8 2.17e7 124. 1.90e-44 +#> 2 day 1~2~3~4~5~H N163 response 5 1.25e7 2.51e6 113. 1.71e-42 +#> 3 day 1~2~3~4~5~H N133 response 5 1.96e7 3.92e6 108. 1.71e-41 +#> 4 day 1~2~3~4~5~H N171 response 5 6.29e4 1.26e4 88.8 1.16e-37 +#> 5 day 1~2~3~4~5~H N342 response 5 1.04e6 2.07e5 85.1 7.61e-37 +#> 6 day 1~2~3~4~5~H N343 response 5 1.19e4 2.38e3 66.1 4.43e-32 +#> 7 day 1~2~3~4~5~H N119 response 5 4.92e3 9.83e2 53.8 2.07e-28 +#> 8 day 1~2~3~4~5~H N497 response 5 1.10e5 2.20e4 49.6 4.83e-27 +#> 9 day 1~2~3~4~5~H N137 response 5 6.32e3 1.26e3 39.9 1.59e-23 +#> 10 day 1~2~3~4~5~H N277 response 5 6.31e4 1.26e4 39.1 3.14e-23 +#> # … with 100 more rows, and 1 more variable: adjusted.p.value <dbl>

The top ranked explanatory feature N341 can be plotted to inspect it’s trend relative to the day response variable.

 plotFeature(anova_analysis,
@@ -638,15 +634,15 @@ 

cls = 'day')

-
-

-Linear regression

+
+

Linear regression +

Univariate linear regression can be used to associate a continuous response variable with metabolome features. In the example below, the example data will be regressed against injection order to identify any linearly associated metabolome features.

 lr_analysis <- linearRegression(d,
                                 cls = 'injorder')
 
-print(lr_analysis)
+print(lr_analysis)
 #> 
 #> Univariate linear regression analysis
 #> 
@@ -655,20 +651,20 @@ 

#> Responses: injorder

The explanatory features can then be extracted.

-explanatoryFeatures(lr_analysis)
-#> # A tibble: 8 × 15
+explanatoryFeatures(lr_analysis)
+#> # A tibble: 8 × 15
 #>   Response Feature r.squared adj.r.squared sigma statistic  p.value    df logLik
-#>   <chr>    <chr>       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl>
-#> 1 injorder N283        0.310         0.304  4.27      53.0 4.10e-11     1  -343.
-#> 2 injorder N135        0.165         0.157 78.7       23.2 4.31e- 6     1  -693.
-#> 3 injorder N221        0.140         0.133  5.87      19.3 2.50e- 5     1  -382.
-#> 4 injorder N473        0.135         0.127  7.24      18.3 3.78e- 5     1  -407.
-#> 5 injorder N335        0.132         0.124 20.1       17.9 4.59e- 5     1  -529.
-#> 6 injorder N452        0.120         0.112  4.00      16.0 1.10e- 4     1  -335.
-#> 7 injorder N255        0.119         0.111 11.1       15.9 1.17e- 4     1  -458.
-#> 8 injorder N267        0.118         0.111 26.4       15.8 1.22e- 4     1  -562.
-#> # … with 6 more variables: AIC <dbl>, BIC <dbl>, deviance <dbl>,
-#> #   df.residual <int>, nobs <int>, adjusted.p.value <dbl>
+#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 injorder N283 0.310 0.304 4.27 53.0 4.10e-11 1 -343. +#> 2 injorder N135 0.165 0.157 78.7 23.2 4.31e- 6 1 -693. +#> 3 injorder N221 0.140 0.133 5.87 19.3 2.50e- 5 1 -382. +#> 4 injorder N473 0.135 0.127 7.24 18.3 3.78e- 5 1 -407. +#> 5 injorder N335 0.132 0.124 20.1 17.9 4.59e- 5 1 -529. +#> 6 injorder N452 0.120 0.112 4.00 16.0 1.10e- 4 1 -335. +#> 7 injorder N255 0.119 0.111 11.1 15.9 1.17e- 4 1 -458. +#> 8 injorder N267 0.118 0.111 26.4 15.8 1.22e- 4 1 -562. +#> # … with 6 more variables: AIC <dbl>, BIC <dbl>, deviance <dbl>, +#> # df.residual <int>, nobs <int>, adjusted.p.value <dbl>

The top ranked explanatory feature N283 can be plotted to inspect inspects it’s association with injection order.

 plotFeature(lr_analysis,
@@ -677,36 +673,36 @@ 

-
-

-Routine analyses

+
+

Routine analyses +

For routine analyses, the initial analysis parameters for pre-treatment of the data and then the modelling can be selected.

-p <- analysisParameters(c('pre-treatment','modelling'))
+p <- analysisParameters(c('pre-treatment','modelling'))

More specific parameters for pre-treatment of the example data can be declared using the following.

-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
+parameters(p,'pre-treatment') <- preTreatmentParameters(
+  list(
     keep = 'classes',
     occupancyFilter = 'maximum',
     transform = 'TICnorm' 
   )
 )
-

The modellingMethods() function can be used to list the modelling methods that are currently available in metabolyseR.

+

The modellingMethods() function can be used to list the modelling methods that are currently available in metabolyseR.

-modellingMethods()
+modellingMethods()
 #> [1] "anova"            "ttest"            "linearRegression" "randomForest"
-

The modellingParameters() function can be used to retrieve the default parameters for specific modelling methods. Below, the default modelling parameters for the randomForest and ttest methods are specified.

+

The modellingParameters() function can be used to retrieve the default parameters for specific modelling methods. Below, the default modelling parameters for the randomForest and ttest methods are specified.

-parameters(p,'modelling') <- modellingParameters(c('randomForest','ttest'))
+parameters(p,'modelling') <- modellingParameters(c('randomForest','ttest'))

The class parameters can the be universily specified for both the pre-treatment and modelling elements. For this example, the day response variable will be used with just the H and 2 classes.

 changeParameter(p,'cls') <- 'day'
-changeParameter(p,'classes') <- c('H','2')
+changeParameter(p,'classes') <- c('H','2')

This gives the following parameters for the analysis.

 p
-#> Parameters:
+#> Parameters:
 #> pre-treatment
 #>  keep
 #>      classes
@@ -736,10 +732,10 @@ 

#> returnModels = FALSE

The analysis can then be executed.

analysis <- metabolyse(abr1$neg,abr1$fact,p)
-#> 
-#> metabolyseR  v0.14.6 Wed Nov 17 10:26:39 2021
+#> 
[34m
+#> metabolyseR 
[39m 
[31mv0.14.7
[39m Fri Dec 17 17:57:41 2021
 #> ________________________________________________________________________________
-#> Parameters:
+#> 
[33m
[33mParameters:
[33m
[39m
 #> pre-treatment
 #>  keep
 #>      classes
@@ -768,18 +764,18 @@ 

#> comparisons = list() #> returnModels = FALSE #> ________________________________________________________________________________ -#> Pre-treatment … +#> 
[34mPre-treatment 
[39m… -Pre-treatment ✓ [6.9S] -#> Modelling … - -Modelling ✓ [4.7S] +
[34mPre-treatment 
[39m 
[32m✓
[39m [6.4S] +#> 
[34mModelling 
[39m… +
[34m +Modelling 
[39m 
[32m✓
[39m [4.5S] #> ________________________________________________________________________________ #> -#> Complete! [11.7S]

+#> 
[32mComplete! 
[39m[10.9S]

The results for the modelling can be specifically extracted using the following.

-analysisResults(analysis,'modelling')
+analysisResults(analysis,'modelling')
 #> $randomForest
 #> 
 #> Random forest classification 
@@ -801,30 +797,30 @@ 

This returns the results as a list containing the modelling results objects for each specified method.

Alternatively, the modelling results can be assess directly from the Analysis object. Below shows the extraction of the explanatory features, using default parameters for each method, with the results returned in a single table.

-explanatory_features <- analysis %>% 
-  explanatoryFeatures()
+explanatory_features <- analysis %>% 
+  explanatoryFeatures()
 
-print(explanatory_features)
-#> # A tibble: 100 × 17
+print(explanatory_features)
+#> # A tibble: 100 × 17
 #>    Method       Response Comparison Feature Metric      Value estimate estimate1
-#>    <chr>        <chr>    <chr>      <chr>   <chr>       <dbl>    <dbl>     <dbl>
-#>  1 randomForest day      2~H        N341    FalsePo… 8.06e-28       NA        NA
-#>  2 randomForest day      2~H        N377    FalsePo… 5.70e-18       NA        NA
-#>  3 randomForest day      2~H        N447    FalsePo… 5.70e-18       NA        NA
-#>  4 randomForest day      2~H        N579    FalsePo… 5.70e-18       NA        NA
-#>  5 randomForest day      2~H        N1084   FalsePo… 1.19e-16       NA        NA
-#>  6 randomForest day      2~H        N327    FalsePo… 2.33e-15       NA        NA
-#>  7 randomForest day      2~H        N580    FalsePo… 4.32e-14       NA        NA
-#>  8 randomForest day      2~H        N1083   FalsePo… 7.49e-13       NA        NA
-#>  9 randomForest day      2~H        N1085   FalsePo… 7.49e-13       NA        NA
-#> 10 randomForest day      2~H        N503    FalsePo… 7.49e-13       NA        NA
-#> # … with 90 more rows, and 9 more variables: estimate2 <dbl>, statistic <dbl>,
-#> #   p.value <dbl>, parameter <dbl>, conf.low <dbl>, conf.high <dbl>,
-#> #   method <chr>, alternative <chr>, adjusted.p.value <dbl>
+#> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> +#> 1 randomForest day 2~H N341 FalsePo… 8.06e-28 NA NA +#> 2 randomForest day 2~H N377 FalsePo… 5.70e-18 NA NA +#> 3 randomForest day 2~H N447 FalsePo… 5.70e-18 NA NA +#> 4 randomForest day 2~H N579 FalsePo… 5.70e-18 NA NA +#> 5 randomForest day 2~H N1084 FalsePo… 1.19e-16 NA NA +#> 6 randomForest day 2~H N327 FalsePo… 2.33e-15 NA NA +#> 7 randomForest day 2~H N580 FalsePo… 4.32e-14 NA NA +#> 8 randomForest day 2~H N1083 FalsePo… 7.49e-13 NA NA +#> 9 randomForest day 2~H N1085 FalsePo… 7.49e-13 NA NA +#> 10 randomForest day 2~H N503 FalsePo… 7.49e-13 NA NA +#> # … with 90 more rows, and 9 more variables: estimate2 <dbl>, statistic <dbl>, +#> # p.value <dbl>, parameter <dbl>, conf.low <dbl>, conf.high <dbl>, +#> # method <chr>, alternative <chr>, adjusted.p.value <dbl>

Heat maps of the explanatory features can also be plotted for both the modelling methods.

-plotExplanatoryHeatmap(analysis) %>% 
-  patchwork::wrap_plots()
+plotExplanatoryHeatmap(analysis) %>% + patchwork::wrap_plots()

@@ -840,11 +836,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -853,5 +851,7 @@

+ + diff --git a/docs/articles/modelling_files/figure-html/outlier-detect-1.png b/docs/articles/modelling_files/figure-html/outlier-detect-1.png index b4fef5f2..c2cfd51f 100644 Binary files a/docs/articles/modelling_files/figure-html/outlier-detect-1.png and b/docs/articles/modelling_files/figure-html/outlier-detect-1.png differ diff --git a/docs/articles/modelling_files/figure-html/regression-mds-1.png b/docs/articles/modelling_files/figure-html/regression-mds-1.png index 7e2e31de..bb6c56bd 100644 Binary files a/docs/articles/modelling_files/figure-html/regression-mds-1.png and b/docs/articles/modelling_files/figure-html/regression-mds-1.png differ diff --git a/docs/articles/pre_treatment.html b/docs/articles/pre_treatment.html index 0d70bf56..fc7b71a7 100644 --- a/docs/articles/pre_treatment.html +++ b/docs/articles/pre_treatment.html @@ -19,6 +19,8 @@ + +
+
-
-

-Introduction

+
+

Introduction +

Metabolomics data from any analytical technique requires various data pre-treatment steps prior to subsequent data mining or other downstream analyses. This aids both the data quality and integrity. It is important that appropriate pre-treatment strategies are used not only for the analytical technique being applied but are also suitable for the statistical or machine learning analyses that are to be utilised. Careful consideration of the pre-treatment steps to be undertaken are required as they can have a substantial influence on the results and inferences taken from metabolomic analyses.

Data pre-treatment is the most faceted aspect of the analysis elements in metabolyseR. It is itself made up of a number of elements, which themselves are made up of methods. The following document will outline the application of each of these pre-treatment elements for use in exploratory analyses then outline how to apply them in routine analyses. For an introduction to the usage of metabolyseR for both exploratory and routine analyses, see the introduction vignette using:

-vignette('introduction','metabolyseR')
+vignette('introduction','metabolyseR')

To further supplement this document, a quick start example analysis is also available as a vignette:

-vignette('quick_start','metabolyseR')
+vignette('quick_start','metabolyseR')

To begin, the package can be loaded using:

-library(metabolyseR)
+library(metabolyseR)
 #> 
 #> Attaching package: 'metabolyseR'
 #> The following object is masked from 'package:stats':
@@ -122,17 +118,17 @@ 

#> The following objects are masked from 'package:base': #> #> raw, split

-
-

-Example data

-

The examples used here will use the abr1 data set from the metaboData package. This is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The pipe %>% from the magrittr package will also be used. The example data can be loaded using:

+
+

Example data +

+

The examples used here will use the abr1 data set from the metaboData package. This is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The pipe %>% from the magrittr package will also be used. The example data can be loaded using:

+library(metaboData)

Only the negative acquisition mode data (abr1$neg) will be used along with the sample meta-information (abr1$fact). Create an AnalysisData class object, assigned to the variable d, using the following:

 d <- analysisData(abr1$neg,abr1$fact)
-print(d)
+print(d)
 #> 
 #> AnalysisData object containing:
 #> 
@@ -141,28 +137,28 @@ 

#> Info: 9

As can be seen above the data set contains a total of 120 samples and 2000 features.

-
-

-Parallel processing

-

The package supports parallel processing using the future package.

-

By default, processing by metabolyseR will be done seqentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel implementation using plan(). The following example specifies using the multisession implementation (muliple background R sessions) with two worker processes.

+
+

Parallel processing +

+

The package supports parallel processing using the future package.

+

By default, processing by metabolyseR will be done seqentially. However, parallel processing can be activated, prior to analysis, by specifying a parallel implementation using plan(). The following example specifies using the multisession implementation (muliple background R sessions) with two worker processes.

-plan(future::multisession,workers = 2)
-

See the future package documentation for more information on the types of parallel implementations that are available.

+plan(future::multisession,workers = 2)
+

See the future package documentation for more information on the types of parallel implementations that are available.

-
-

-Pre-treatment elements

+
+

Pre-treatment elements +

The following sections will outline the numerous pre-treatment elements available within metabolyseR. There will be examples of their application during exploratory analyses along with useful visualisations. These can aid interpretation of when particular treatments should be applied as well as their effect once they have been used.

-
-

-Removal of samples, classes or features

+
+

Removal of samples, classes or features +

In many situations, it will be necessary to exclude either individual samples, sample classes or certain features from further analysis.

-

Individual samples can be removed using removeSamples() as below, where the idx argument stipulates the sample information column cotaining the sample indexes and the samples argument a vector of sample indexes to remove.

+

Individual samples can be removed using removeSamples() as below, where the idx argument stipulates the sample information column cotaining the sample indexes and the samples argument a vector of sample indexes to remove.

-d %>%
-  removeSamples(idx = 'injorder',samples = 1)
+d %>%
+  removeSamples(idx = 'injorder',samples = 1)
 #> 
 #> AnalysisData object containing:
 #> 
@@ -171,8 +167,8 @@ 

#> Info: 9

The removeClasses function can be used similarly to remove whole classes from further analysis:

-d %>%
-  removeClasses(cls = 'day',classes = 'H')
+d %>%
+  removeClasses(cls = 'day',classes = 'H')
 #> 
 #> AnalysisData object containing:
 #> 
@@ -181,8 +177,8 @@ 

#> Info: 9

The following will enable the removal of specified features as a vector supplied to the features argument:

-d %>%
-  removeFeatures(features = c('N1','N2'))
+d %>%
+  removeFeatures(features = c('N1','N2'))
 #> 
 #> AnalysisData object containing:
 #> 
@@ -191,24 +187,24 @@ 

#> Info: 9

There could be occasions where the numbers of samples, classes or features to remove are greater than the numbers of samples, classes or features that are to be retained. In these situations it will be more convenient to directly specify the samples, classes or features to retain. Keeping samples, classes or features is outlined in the following section.

-
-

-Keeping samples, classes or features

+
+

Keeping samples, classes or features +

Often it will be necessary to retain only particular samples, sample classes or certain features for further analysis.

-

Individual samples can be kept using keepSamples() as below, where the idx argument stipulates the sample information column cotaining the sample indexes and the samples argument, a vector of sample indexes to keep.

+

Individual samples can be kept using keepSamples() as below, where the idx argument stipulates the sample information column cotaining the sample indexes and the samples argument, a vector of sample indexes to keep.

-d %>%
-  keepSamples(idx = 'injorder',samples = 1)
+d %>%
+  keepSamples(idx = 'injorder',samples = 1)
 #> 
 #> AnalysisData object containing:
 #> 
 #> Samples: 1 
 #> Features: 2000 
 #> Info: 9
-

The keepClasses() method can be used similarly to keep whole classes for further analysis:

+

The keepClasses() method can be used similarly to keep whole classes for further analysis:

-d %>%
-  keepClasses(cls = 'day',classes = 'H')
+d %>%
+  keepClasses(cls = 'day',classes = 'H')
 #> 
 #> AnalysisData object containing:
 #> 
@@ -217,8 +213,8 @@ 

#> Info: 9

The following will specify features to keep, with a vector of feature names supplied to the features argument:

-d %>%
-  keepFeatures(features = c('N1','N2'))
+d %>%
+  keepFeatures(features = c('N1','N2'))
 #> 
 #> AnalysisData object containing:
 #> 
@@ -227,41 +223,41 @@ 

#> Info: 9

There are likely to be occasions where the numbers of samples, classes or features to keep are greater than the numbers of samples, classes or features that are to be excluded. In these situations it will be more convenient to directly specify the samples, classes or features to remove. Removing samples, classes or features is outlined in the previous section.

-
-

-Feature filtering based on occupancy

+
+

Feature filtering based on occupancy +

Occupancy provides a useful metric by which to filter poorly represented features (features containing a majority zero or missing values). An occupancy threshold provides a means of specifying this majority with variables below the threshold excluded from further analyses. However, this can be complicated by an underlying class structure present within the data where a variable may be well represented within one class but not in another.

The proportional occupancy for each feature within a data set for a given class structure can be calculated using the occupancy() method, specifying the sample information column using the cls argument.

-d %>%
+d %>%
   occupancy(cls = 'day')
-#> # A tibble: 11,914 × 5
+#> # A tibble: 11,914 × 5
 #>    day   Feature     N `Class total` Occupancy
-#>    <fct> <chr>   <dbl>         <int>     <dbl>
-#>  1 1     N1          0            20         0
-#>  2 1     N10         0            20         0
-#>  3 1     N100        0            20         0
-#>  4 1     N1000      20            20         1
-#>  5 1     N1001      20            20         1
-#>  6 1     N1002      20            20         1
-#>  7 1     N1003      20            20         1
-#>  8 1     N1004      20            20         1
-#>  9 1     N1005      20            20         1
-#> 10 1     N1006      20            20         1
-#> # … with 11,904 more rows
+#> <fct> <chr> <dbl> <int> <dbl> +#> 1 1 N1 0 20 0 +#> 2 1 N10 0 20 0 +#> 3 1 N100 0 20 0 +#> 4 1 N1000 20 20 1 +#> 5 1 N1001 20 20 1 +#> 6 1 N1002 20 20 1 +#> 7 1 N1003 20 20 1 +#> 8 1 N1004 20 20 1 +#> 9 1 N1005 20 20 1 +#> 10 1 N1006 20 20 1 +#> # … with 11,904 more rows

Alternatively the occupancy distributions can be plotted providing a useful overview of the data set:

-d %>%
+d %>%
   plotOccupancy(cls = 'day')

It can be seen that there are a number of unoccupied features across all the sample classes with a small rise in the density distribution near 0.

There are two strategies for thresholding occupancy. The first is a maximum theshold; where the maximum occupancy across all classes is above the threshold. Therefore, for a feature to be retained, only a single class needs to have an occupancy above the threshold. It is this strategy that will be appropriate for most applications. A two-thirds maximum occupancy filter can be applied to the day sample information column of our data using:

-maximum_occupancy_filtered <- d %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3)
+maximum_occupancy_filtered <- d %>% + occupancyMaximum(cls = 'day',occupancy = 2/3)

It can be seen below that this removes 240 features.

-print(maximum_occupancy_filtered)
+print(maximum_occupancy_filtered)
 #> 
 #> AnalysisData object containing:
 #> 
@@ -270,16 +266,16 @@ 

#> Info: 9

Plotting the occupancy distributions shows that all the low occupancy features have now been removed.

-maximum_occupancy_filtered %>%
+maximum_occupancy_filtered %>%
   plotOccupancy(cls = 'day')

The alternative strategy is by applying a minimum threshold; where the minimum occupancy across all classes is required to be above the threshold. Therefore, for a feature to be retained, all classes would need to have an occupancy above the threshold. A two-thirds minimum occupancy filter can be applied to the day sample information column of our data using:

-minimum_occupancy_filtered <- d %>%
-  occupancyMinimum(cls = 'day',occupancy = 2/3)
+minimum_occupancy_filtered <- d %>% + occupancyMinimum(cls = 'day',occupancy = 2/3)

It can be seen below that this removes 344 features.

-print(minimum_occupancy_filtered)
+print(minimum_occupancy_filtered)
 #> 
 #> AnalysisData object containing:
 #> 
@@ -287,64 +283,64 @@ 

#> Features: 1656 #> Info: 9

-
-

-Data transformation

+
+

Data transformation +

Prior to downstream analyses, metabolomics data often require transformation to fulfill the assumptions of a particular statistical/data mining technique.

There are a wide range of transformation methods available that are commonly used for the analysis of metabolomics data. These methods are all named with the prefix transform.

The effects of a transformation on a data set can be assessed using a supervised classifcation approach. The following performs a supervised random forest analysis of the example data and plots the results using both multidimensional scaling (MDS) and reciever operator characteristic (ROC) curves.

-d %>%
+d %>%
   plotSupervisedRF(cls = 'day')

Alternatively a log10 transformation can be applied prior to analysis:

Or a total ion count (TIC) normalisation where each individual sample is corrected by its TIC. This is one method that can be used to account for small variablility in sample concentration.

The margin value is a metric that can be used to assess model perfomance. Positive values indicate a models ability, on average, to correctly predict the class labels of the analysed data.

As can be seen in the plots above, the transformations have little effect on the overall structure of the data set. However, there are small increases in the margins of the transformed data (model improvement). Note that here, a non-parametric machine learning approach has been applied to assess the effects of the transformations on the data. Using a different approach such as the parametric analysis Of variance (ANOVA) which different underlying assumptions will likely give different results to the assessment above.

-
-

-Sample aggregation

+
+

Sample aggregation +

Sample aggregation allows the electronic pooling of samples based on a grouping variable. This is useful in situations such as the presence of technical replicates that can be aggregated to reduce the effects of pseudo replication. metabolyseR provides methods for mean, median and sum aggregation and each starts with the aggregate prefix.

Below shows a principle component analysis (PCA) plot of the example data coloured by the classes of the day sample information column. It is first maximum occupancy filtered to remove empty features.

-d %>%
-  occupancyMaximum(cls = 'day') %>%
+d %>%
+  occupancyMaximum(cls = 'day') %>%
   plotPCA(cls = 'day')

The example below shows the mean aggregation of the data using the experimental classes within the day sample information column.

-day_mean <- d %>%
-  occupancyMaximum(cls = 'day') %>%
-  aggregateMean(cls = 'day')
+day_mean <- d %>% + occupancyMaximum(cls = 'day') %>% + aggregateMean(cls = 'day')

The PCA plot below shows these class averages of the data.

 plotPCA(day_mean,cls = 'day',ellipses = FALSE)

-
-

-Batch/block correction

+
+

Batch/block correction +

There can sometimes be artificial batch related variability introduced into metabolomics analyses as a result of analytical instrumentation or sample preparation. With appropriate sample randomisation (see section on feature filtering based on QC samples), batch related variability can be corrected for using an average centring correction method, applied to the individual features.

The plot below shows differences in the TIC distributions for each of the classes in the day sample information column.

-d %>%
+d %>%
   plotTIC(by = 'day',colour = 'day')

The data can then be corrected by class average centring as shown below.

-corrected_data <- d %>%
-  correctionCenter(block = 'day',type = 'median')
+corrected_data <- d %>% + correctionCenter(block = 'day',type = 'median')

The plot of the TICs below shows that the inter-class variability has been removed but the intra-class variability has been retained.

 plotTIC(corrected_data,
@@ -352,85 +348,85 @@ 

colour = 'day')

-
-

-Imputation of missing data

-

Missing values can have an important influence on downstream analyses with zero values heavily influencing the outcomes of parametric tests. Where and how they are imputed are important considerations and this is highly related to variable occupancy. The methods provided here allow both these aspects to be taken into account and utilise Random Forest imputation using the missForest package.

+
+

Imputation of missing data +

+

Missing values can have an important influence on downstream analyses with zero values heavily influencing the outcomes of parametric tests. Where and how they are imputed are important considerations and this is highly related to variable occupancy. The methods provided here allow both these aspects to be taken into account and utilise Random Forest imputation using the missForest package.

Below shows a Linear Discriminant Analysis (LDA) plot of the example data. The eigenvalue (Tw) gives a comparable indication of the separation between the sample classes.

-d %>%
-  keepClasses(cls = 'day',classes = c('H','5')) %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
+d %>%
+  keepClasses(cls = 'day',classes = c('H','5')) %>%
+  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
   plotLDA(cls = 'day')

The following shows the same, except there is an application of imputation prior to the LDA. The imputed data is based on the data of all the samples present on the data set. It shows a very slight drop in the eigenvalue and therefore reduced separation between the sample classes.

-d %>%
-  keepClasses(cls = 'day',classes = c('H','5')) %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
-  imputeAll(parallel = 'variables') %>%
+d %>%
+  keepClasses(cls = 'day',classes = c('H','5')) %>%
+  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
+  imputeAll(parallel = 'variables') %>%
   plotLDA(cls = 'day')

Imputation accuracy is likely to be reduced if data is sparse or there is underlying class structure where there is significant discrimination. Below shows the application imputation prior the LDA, except this time the imputation is class-wise. The imputed data is based only on the values of other samples within the class.

-d %>%
-  keepClasses(cls = 'day',classes = c('H','5')) %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
-  imputeClass(cls = 'day') %>%
+d %>%
+  keepClasses(cls = 'day',classes = c('H','5')) %>%
+  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
+  imputeClass(cls = 'day') %>%
   plotLDA(cls = 'day')

This shows a slight increase in the eigenvalue with the classes showing greater separation. This is likely due to the increased accuracy of the imputed data relative to the class structure.

-
-

-Feature filtering based on quality control (QC) samples

+
+

Feature filtering based on quality control (QC) samples +

A QC sample is an average pooled sample, equally representative in composition of all the samples present within an experimental set. Within an analytical run, the QC sample is analysed at equal intervals throughout the run. If there is class structure within the run, this should be randomised within a block fashion so that the classes are equally represented in each block throughout the run. A QC sample can then be injected and analysed between these randomised blocks. This provides a set of technical injections that allows the variability in instrument performance over the run to be accounted for and the robustness of the acquired variables to be assessed.

The technical reproducibility of an acquired variable can be assessed using it’s relative standard deviation (RSD) within the QC samples. The variable RSDs can then be filtered below a threshold value to remove metabolome features that are poorly reproducible across the analytical runs. This variable filtering strategy has an advantage over that of occupancy alone as it is not dependent on underlying class structure. Therefore, the variables and variable numbers will not alter if a new class structure is imposed upon the data.

The example data set does not include QC samples. For this example, the H class will be used.

Firstly, the RSD distribution will be assessed for the only H class. The following retains only the H class samples to aid visualisation.

-QC <- d %>%
-  keepClasses(cls = 'day',classes = 'H')
+QC <- d %>% + keepClasses(cls = 'day',classes = 'H')

The table of RSD values for each of the features can be computed as below.

-QC %>%
+QC %>%
   rsd(cls = 'day')
-#> # A tibble: 2,000 × 5
+#> # A tibble: 2,000 × 5
 #>    day   Feature  Mean    SD   RSD
-#>    <fct> <chr>   <dbl> <dbl> <dbl>
-#>  1 H     N1        0     0   NaN  
-#>  2 H     N10       0     0   NaN  
-#>  3 H     N100      0     0   NaN  
-#>  4 H     N1000   114.   19.4  17.0
-#>  5 H     N1001    99.2  21.6  21.7
-#>  6 H     N1002    86.7  23.9  27.6
-#>  7 H     N1003    82.3  18.0  21.9
-#>  8 H     N1004    91.6  18.8  20.5
-#>  9 H     N1005    78.2  14.0  17.9
-#> 10 H     N1006    78.6  21.3  27.1
-#> # … with 1,990 more rows
+#> <fct> <chr> <dbl> <dbl> <dbl> +#> 1 H N1 0 0 NaN +#> 2 H N10 0 0 NaN +#> 3 H N100 0 0 NaN +#> 4 H N1000 114. 19.4 17.0 +#> 5 H N1001 99.2 21.6 21.7 +#> 6 H N1002 86.7 23.9 27.6 +#> 7 H N1003 82.3 18.0 21.9 +#> 8 H N1004 91.6 18.8 20.5 +#> 9 H N1005 78.2 14.0 17.9 +#> 10 H N1006 78.6 21.3 27.1 +#> # … with 1,990 more rows

The distributions of the feature RSD values can be plotted for the H class.

-QC %>%
+QC %>%
   plotRSD(cls = 'day')
 #> Warning: Removed 123 rows containing non-finite values (stat_density).
 #> Warning: Removed 1 row(s) containing missing values (geom_path).

This shows that there are a number of features with very high RSD values and therefore poor analytical robustness. Many of these are likely to be as a result of poor occupancy and zero values. Applying an occupancy filter prior to plotting does indeed show a reduction in the upper range of RSD values retained.

-QC %>%
-  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
+QC %>%
+  occupancyMaximum(cls = 'day',occupancy = 2/3) %>%
   plotRSD(cls = 'day')

metabolyseR contains a number of methods for applying pre-treatment routines specifically on QC samples and are all prefixed with QC. These include methods for feature filtering of a data set based the occupancy of the QC class, imputation of the QC class only, feature filtering based in the RSD values of the QC class and removal of only the QC class.

Below shows an example of applying some of these QC methods. This will first filter the features in the data set based on the occupancy of the QC class. Then the features are filtered based on the RSD values of the QC class using an RSD threshold of 50%. The class index of the QC samples is specified using the QCidx argument.

-QC_filtered <- d %>%
-  QCoccupancy(cls = 'day',QCidx = 'H',occupancy = 2/3) %>%
-  QCrsdFilter(cls = 'day',QCidx = 'H',RSDthresh = 50)
+QC_filtered <- d %>% + QCoccupancy(cls = 'day',QCidx = 'H',occupancy = 2/3) %>% + QCrsdFilter(cls = 'day',QCidx = 'H',RSDthresh = 50)

This removes a total of 637 features.

-print(QC_filtered)
+print(QC_filtered)
 #> 
 #> AnalysisData object containing:
 #> 
@@ -439,25 +435,25 @@ 

#> Info: 9

-
-

-Routine analyses

+
+

Routine analyses +

For routine analyses, the available pre-treatment elements can retreived using:

-preTreatmentElements()
+preTreatmentElements()
 #> [1] "aggregate"       "correction"      "impute"          "keep"           
 #> [5] "occupancyFilter" "QC"              "remove"          "transform"

The available methods for a specified pre-treatment element can be viewed using:

-preTreatmentMethods('remove')
+preTreatmentMethods('remove')
 #> [1] "classes"  "features" "samples"

The default pre-treatment parameters can first be assigned to the variable p.

 p <- analysisParameters('pre-treatment')
-

The preTreatmentParameters() function allows the parameters for particular pre-treatment elements to be specified. The following specifies the pre-treatment elements that will be used for this data set. These will include the keeping of certain sample classes, the filtering of features based on class occupancy and the application of a TIC normalisation. These will be assigned to the p variable using the parameters() method.

+

The preTreatmentParameters() function allows the parameters for particular pre-treatment elements to be specified. The following specifies the pre-treatment elements that will be used for this data set. These will include the keeping of certain sample classes, the filtering of features based on class occupancy and the application of a TIC normalisation. These will be assigned to the p variable using the parameters() method.

-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
+parameters(p,'pre-treatment') <- preTreatmentParameters(
+  list(
     keep = 'classes',
     occupancyFilter = 'maximum',
     transform = 'TICnorm' 
@@ -465,8 +461,8 @@ 

)

Printing p shows these pre-treatment steps.

-print(p)
-#> Parameters:
+print(p)
+#> Parameters:
 #> pre-treatment
 #>  keep
 #>      classes
@@ -481,11 +477,11 @@ 

Next, the day sample information column can be specified, along with the classes to be kept which will be the H, the 1 and the 2 classes.

 changeParameter(p,'cls') <- 'day'
-changeParameter(p,'classes') <- c('H','1','2')
+changeParameter(p,'classes') <- c('H','1','2')

Printing p shows the final pre-treatment parameters that will be used for this analysis.

-print(p)
-#> Parameters:
+print(p)
+#> Parameters:
 #> pre-treatment
 #>  keep
 #>      classes
@@ -499,10 +495,10 @@ 

#> TICnorm

The pre-treatment routine can then be executed.

analysis <- metabolyse(abr1$neg,abr1$fact,p)
-#> 
-#> metabolyseR  v0.14.6 Wed Nov 17 10:30:55 2021
+#> 
[34m
+#> metabolyseR 
[39m 
[31mv0.14.7
[39m Fri Dec 17 18:01:39 2021
 #> ________________________________________________________________________________
-#> Parameters:
+#> 
[33m
[33mParameters:
[33m
[39m
 #> pre-treatment
 #>  keep
 #>      classes
@@ -515,41 +511,41 @@ 

#> transform #> TICnorm #> ________________________________________________________________________________ -#> Pre-treatment … +#> 
[34mPre-treatment 
[39m… -Pre-treatment ✓ [9.9S] +
[34mPre-treatment 
[39m 
[32m✓
[39m [9.5S] #> ________________________________________________________________________________ #> -#> Complete! [10S]

+#> 
[32mComplete! 
[39m[9.5S]

Printing the analysis object shows the resulting data from the pre-treatment routine.

-print(analysis)
+print(analysis)
 #> 
-#> metabolyseR v0.14.6
-#> Analysis:
-#>  Wed Nov 17 10:30:55 2021
+#> metabolyseR v0.14.7
+#> Analysis:
+#>     Fri Dec 17 18:01:39 2021
 #> 
 #>  Raw Data:
 #>      No. samples = 120
 #>      No. features = 2000
 #> 
 #>  Pre-treated Data:
-#>      Wed Nov 17 10:31:05 2021
+#>      Fri Dec 17 18:01:48 2021
 #>      No. samples = 60
 #>      No. features = 1723

The pre-treated data can be extracted from the Analysis object using several methods.

-

Firstly the analysisResults() method.

+

Firstly the analysisResults() method.

-analysisResults(analysis,'pre-treatment')
+analysisResults(analysis,'pre-treatment')
 #> 
 #> AnalysisData object containing:
 #> 
 #> Samples: 60 
 #> Features: 1723 
 #> Info: 9
-

And secondly the preTreated() method.

+

And secondly the preTreated() method.

-preTreated(analysis)
+preTreated(analysis)
 #> 
 #> AnalysisData object containing:
 #> 
@@ -558,7 +554,7 @@ 

#> Info: 9

A supervised random forest analysis can be used to visualise the structure of the resulting pre-treated data.

-analysis %>%
+analysis %>%
   plotSupervisedRF(cls = 'day',type = 'pre-treated')

@@ -575,11 +571,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -588,5 +586,7 @@

+ + diff --git a/docs/articles/pre_treatment_files/figure-html/impute_all_lda-1.png b/docs/articles/pre_treatment_files/figure-html/impute_all_lda-1.png index 6a8a21ff..b5bf7242 100644 Binary files a/docs/articles/pre_treatment_files/figure-html/impute_all_lda-1.png and b/docs/articles/pre_treatment_files/figure-html/impute_all_lda-1.png differ diff --git a/docs/articles/pre_treatment_files/figure-html/imputed_class_lda-1.png b/docs/articles/pre_treatment_files/figure-html/imputed_class_lda-1.png index 114faf7d..45fc8fd7 100644 Binary files a/docs/articles/pre_treatment_files/figure-html/imputed_class_lda-1.png and b/docs/articles/pre_treatment_files/figure-html/imputed_class_lda-1.png differ diff --git a/docs/articles/pre_treatment_files/figure-html/lda-1.png b/docs/articles/pre_treatment_files/figure-html/lda-1.png index 421b362d..47d29d86 100644 Binary files a/docs/articles/pre_treatment_files/figure-html/lda-1.png and b/docs/articles/pre_treatment_files/figure-html/lda-1.png differ diff --git a/docs/articles/quick_start.html b/docs/articles/quick_start.html index fb4034a4..b1d46039 100644 --- a/docs/articles/quick_start.html +++ b/docs/articles/quick_start.html @@ -19,6 +19,8 @@ + +
+
-

This example analysis will use the abr1 data set from the metaboData package. It is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The analysis will also include use of the pipe %>% from the magrittr package. First load the necessary packages.

+

This example analysis will use the abr1 data set from the metaboData package. It is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The analysis will also include use of the pipe %>% from the magrittr package. First load the necessary packages.

+library(metabolyseR) +library(metaboData)

For this example we will use only the negative acquisition mode data (abr1$neg) and sample meta-information (abr1$fact). Create an AnalysisData class object using the following:

 d <- analysisData(abr1$neg,abr1$fact)
@@ -117,25 +113,25 @@

17 November, 2021

#> Samples: 120 #> Features: 2000 #> Info: 9
-

The clsAvailable() function can be used to identify the columns available in our meta-information table.

+

The clsAvailable() function can be used to identify the columns available in our meta-information table.

-clsAvailable(d)
+clsAvailable(d)
 #> [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
 #> [8] "day"      "class"

For this analysis, we will be using the infection time course class information contained in the day column. This can be extracted and the class frequencies tabulated using the following:

-d %>%
-  clsExtract(cls = 'day') %>%
-  table()
+d %>%
+  clsExtract(cls = 'day') %>%
+  table()
 #> .
 #>  1  2  3  4  5  H 
 #> 20 20 20 20 20 20

As can be seen above, the experiment is made up of six infection time point classes that includes a healthy control class (H) and five day infection time points (1-5), each with 20 replicates.

For data pre-treatment prior to statistical analysis, a two-thirds maximum class occupancy filter can be applied. Features where the maximum proportion of non-missing data per class is above two-thirds are retained. A total ion count normalisation will also be applied.

-d <- d %>%
-  occupancyMaximum(cls = 'day', occupancy = 2/3) %>%
-  transformTICnorm()
+d <- d %>% + occupancyMaximum(cls = 'day', occupancy = 2/3) %>% + transformTICnorm()
 d
 #> 
@@ -156,27 +152,27 @@ 

17 November, 2021

A progression can clearly be seen from the earliest to latest infected time points.

For feature selection, one-way analysis of variance (ANOVA) can be performed for each feature to identify features significantly explanatory for the infection time point.

-anova_results <- d %>%
+anova_results <- d %>%
   anova(cls = 'day')

A table of the significantly explanatory features can be extracted with a bonferroni correction adjusted p value < 0.05 using:

-explan_feat <- explanatoryFeatures(anova_results,threshold = 0.05)
+explan_feat <- explanatoryFeatures(anova_results,threshold = 0.05)
 explan_feat
-#> # A tibble: 379 × 10
+#> # A tibble: 379 × 10
 #>    Response Comparison  Feature term      df    sumsq  meansq statistic  p.value
-#>    <chr>    <chr>       <chr>   <chr>  <dbl>    <dbl>   <dbl>     <dbl>    <dbl>
-#>  1 day      1~2~3~4~5~H N341    respo…     5  3.88e-4 7.76e-5     137.  1.55e-46
-#>  2 day      1~2~3~4~5~H N133    respo…     5  7.00e-5 1.40e-5     126.  8.63e-45
-#>  3 day      1~2~3~4~5~H N163    respo…     5  6.01e-5 1.20e-5     117.  2.95e-43
-#>  4 day      1~2~3~4~5~H N1087   respo…     5  2.42e-6 4.84e-7      99.8 5.61e-40
-#>  5 day      1~2~3~4~5~H N171    respo…     5  2.25e-7 4.50e-8      95.7 3.84e-39
-#>  6 day      1~2~3~4~5~H N513    respo…     5  3.38e-6 6.76e-7      95.3 4.78e-39
-#>  7 day      1~2~3~4~5~H N1025   respo…     5  2.78e-6 5.56e-7      91.0 3.91e-38
-#>  8 day      1~2~3~4~5~H N342    respo…     5  3.71e-6 7.41e-7      90.3 5.32e-38
-#>  9 day      1~2~3~4~5~H N1083   respo…     5  5.11e-5 1.02e-5      89.0 1.06e-37
-#> 10 day      1~2~3~4~5~H N1085   respo…     5  1.10e-5 2.19e-6      83.4 1.92e-36
-#> # … with 369 more rows, and 1 more variable: adjusted.p.value <dbl>
+#> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 day 1~2~3~4~5~H N341 respo… 5 3.88e-4 7.76e-5 137. 1.55e-46 +#> 2 day 1~2~3~4~5~H N133 respo… 5 7.00e-5 1.40e-5 126. 8.63e-45 +#> 3 day 1~2~3~4~5~H N163 respo… 5 6.01e-5 1.20e-5 117. 2.95e-43 +#> 4 day 1~2~3~4~5~H N1087 respo… 5 2.42e-6 4.84e-7 99.8 5.61e-40 +#> 5 day 1~2~3~4~5~H N171 respo… 5 2.25e-7 4.50e-8 95.7 3.84e-39 +#> 6 day 1~2~3~4~5~H N513 respo… 5 3.38e-6 6.76e-7 95.3 4.78e-39 +#> 7 day 1~2~3~4~5~H N1025 respo… 5 2.78e-6 5.56e-7 91.0 3.91e-38 +#> 8 day 1~2~3~4~5~H N342 respo… 5 3.71e-6 7.41e-7 90.3 5.32e-38 +#> 9 day 1~2~3~4~5~H N1083 respo… 5 5.11e-5 1.02e-5 89.0 1.06e-37 +#> 10 day 1~2~3~4~5~H N1085 respo… 5 1.10e-5 2.19e-6 83.4 1.92e-36 +#> # … with 369 more rows, and 1 more variable: adjusted.p.value <dbl>

The ANOVA has identified 379 features significantly explanatory over the infection time course. A heat map of the mean relative intensity for each class of these explanatory features can be plotted to visualise their trends between the infection time point classes.

 plotExplanatoryHeatmap(anova_results,
@@ -199,11 +195,13 @@ 

17 November, 2021

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -212,5 +210,7 @@

17 November, 2021

+ + diff --git a/docs/authors.html b/docs/authors.html index beb96ac6..a0e13dc2 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,66 +1,12 @@ - - - - - - - -Authors • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Authors and Citation • metabolyseR - - + + - - - -
-
-
- -
+
- @@ -144,22 +95,20 @@

Authors

-
- +
- - + + diff --git a/docs/index.html b/docs/index.html index 5a72de62..f6c248f8 100644 --- a/docs/index.html +++ b/docs/index.html @@ -19,6 +19,8 @@ + +
-
- +
+

A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data.

-
-

-Overview

+
+

Overview +

This package provides a tool kit of methods for metabolomics analyses that includes:

  • data pre-treatment
  • @@ -105,27 +101,27 @@

  • correlation analysis
-
-

-Installation

+
+

Installation +

The metabolyseR package can be installed from GitHub using the following:

-devtools::install_github('jasenfinch/metabolyseR',build_vignettes = TRUE)
+devtools::install_github('jasenfinch/metabolyseR',build_vignettes = TRUE)

-
-

-Learn more

+
+

Learn more +

The package documentation can be browsed online at https://jasenfinch.github.io/metabolyseR/.

If this is your first time using metabolyseR see the Introduction vignette or the quick start analysis below for information on how to get started.

-

If you believe you’ve found a bug in metabolyseR, please file a bug (and, if possible, a reproducible example) at https://github.com/jasenfinch/metabolyseR/issues.

+

If you believe you’ve found a bug in metabolyseR, please file a bug (and, if possible, a reproducible example) at https://github.com/jasenfinch/metabolyseR/issues.

-
-

-Quick start example analysis

-

This example analysis will use the abr1 data set from the metaboData package. It is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The analysis will also include use of the pipe %>% from the magrittr package. First load the necessary packages.

+
+

Quick start example analysis +

+

This example analysis will use the abr1 data set from the metaboData package. It is nominal mass flow-injection mass spectrometry (FI-MS) fingerprinting data from a plant-pathogen infection time course experiment. The analysis will also include use of the pipe %>% from the magrittr package. First load the necessary packages.

+library(metabolyseR) +library(metaboData)

For this example we will use only the negative acquisition mode data (abr1$neg) and sample meta-information (abr1$fact). Create an AnalysisData class object using the following:

 d <- analysisData(abr1$neg,abr1$fact)
@@ -138,25 +134,25 @@

#> Samples: 120 #> Features: 2000 #> Info: 9

-

The clsAvailable() function can be used to identify the columns available in our meta-information table.

+

The clsAvailable() function can be used to identify the columns available in our meta-information table.

-clsAvailable(d)
+clsAvailable(d)
 #> [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
 #> [8] "day"      "class"

For this analysis, we will be using the infection time course class information contained in the day column. This can be extracted and the class frequencies tabulated using the following:

-d %>%
-  clsExtract(cls = 'day') %>%
-  table()
+d %>%
+  clsExtract(cls = 'day') %>%
+  table()
 #> .
 #>  1  2  3  4  5  H 
 #> 20 20 20 20 20 20

As can be seen above, the experiment is made up of six infection time point classes that includes a healthy control class (H) and five day infection time points (1-5), each with 20 replicates.

For data pre-treatment prior to statistical analysis, a two-thirds maximum class occupancy filter can be applied. Features where the maximum proportion of non-missing data per class is above two-thirds are retained. A total ion count normalisation will also be applied.

-d <- d %>%
-  occupancyMaximum(cls = 'day', occupancy = 2/3) %>%
-  transformTICnorm()
+d <- d %>% + occupancyMaximum(cls = 'day', occupancy = 2/3) %>% + transformTICnorm()
 d
 #> 
@@ -177,11 +173,11 @@ 

A progression can clearly be seen from the earliest to latest infected time points.

For feature selection, one-way analysis of variance (ANOVA) can be performed for each feature to identify features significantly explanatory for the infection time point.

-anova_results <- d %>%
+anova_results <- d %>%
   anova(cls = 'day')

A table of the significantly explanatory features can be extracted with a bonferroni correction adjusted p value < 0.05 using:

-explan_feat <- explanatoryFeatures(anova_results,threshold = 0.05)
+explan_feat <- explanatoryFeatures(anova_results,threshold = 0.05)

 explan_feat
 #> # A tibble: 379 × 10
@@ -215,48 +211,59 @@ 

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -265,5 +272,7 @@

Dev status

+ + diff --git a/docs/news/index.html b/docs/news/index.html index 57fd42e0..05861051 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -1,66 +1,12 @@ - - - - - - - -Changelog • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Changelog • metabolyseR - + + - - - -
-
- -
- -
+
-
-

-metabolyseR 0.14.6

-
    -
  • plotExplanatoryHeatmap method for the Analysis class now returns the plot only if the number of plots is equal to 1.

  • +
    + +
    • Single replicate classes now automatically removed by plotLDA().
    • +
    +
    + + -
    -
    -

    -metabolyseR 0.14.5

    -
      -
    • Correlation analysis results now include an absolute correlation coefficient column by which the results are also arranged in descending order.
    • -
    -
    -
    -

    -metabolyseR 0.14.4

    - -
    -
    -

    -metabolyseR 0.14.3

    -
    +
    + +
    • Correlation analysis results now include an absolute correlation coefficient column by which the results are also arranged in descending order.
    • +
    +
    + +
    +
    + + -
    -
    -

    -metabolyseR 0.14.2

    -
      -
    • Package version, creation date and verbose argument added to prototype of Analysis class.

    • +
    +
    + +
    • Package version, creation date and verbose argument added to prototype of Analysis class.

    • All generics are now defined as standard generics.

    • Added metrics method for Analysis class.

    • metrics method for lists now ignores list elements that are not of class RandomForest.

    • -
    -
    -
    -

    -metabolyseR 0.14.1

    -
      -
    • Changed the RSDthresh argument default to 50% instead of 0.5% in QCrsdFilter generic.
    • -
    -
    -
    -

    -metabolyseR 0.14.0

    -
      -
    • Added a NEWS.md file to track changes to the package.

    • +
    +
    + +
    • Changed the RSDthresh argument default to 50% instead of 0.5% in QCrsdFilter generic.
    • +
    +
    + + -
    +
+
-
- +
- - + + diff --git a/docs/pkgdown.css b/docs/pkgdown.css index 1273238d..80ea5b83 100644 --- a/docs/pkgdown.css +++ b/docs/pkgdown.css @@ -56,8 +56,10 @@ img.icon { float: right; } -img { +/* Ensure in-page images don't run outside their container */ +.contents img { max-width: 100%; + height: auto; } /* Fix bug in bootstrap (only seen in firefox) */ @@ -78,11 +80,10 @@ dd { /* Section anchors ---------------------------------*/ a.anchor { - margin-left: -30px; - display:inline-block; - width: 30px; - height: 30px; - visibility: hidden; + display: none; + margin-left: 5px; + width: 20px; + height: 20px; background-image: url(./link.svg); background-repeat: no-repeat; @@ -90,17 +91,15 @@ a.anchor { background-position: center center; } -.hasAnchor:hover a.anchor { - visibility: visible; -} - -@media (max-width: 767px) { - .hasAnchor:hover a.anchor { - visibility: hidden; - } +h1:hover .anchor, +h2:hover .anchor, +h3:hover .anchor, +h4:hover .anchor, +h5:hover .anchor, +h6:hover .anchor { + display: inline-block; } - /* Fixes for fixed navbar --------------------------*/ .contents h1, .contents h2, .contents h3, .contents h4 { @@ -264,31 +263,26 @@ table { /* Syntax highlighting ---------------------------------------------------- */ -pre { - word-wrap: normal; - word-break: normal; - border: 1px solid #eee; -} - -pre, code { +pre, code, pre code { background-color: #f8f8f8; color: #333; } +pre, pre code { + white-space: pre-wrap; + word-break: break-all; + overflow-wrap: break-word; +} -pre code { - overflow: auto; - word-wrap: normal; - white-space: pre; +pre { + border: 1px solid #eee; } -pre .img { +pre .img, pre .r-plt { margin: 5px 0; } -pre .img img { +pre .img img, pre .r-plt img { background-color: #fff; - display: block; - height: auto; } code a, pre a { @@ -305,9 +299,8 @@ a.sourceLine:hover { .kw {color: #264D66;} /* keyword */ .co {color: #888888;} /* comment */ -.message { color: black; font-weight: bolder;} -.error { color: orange; font-weight: bolder;} -.warning { color: #6A0366; font-weight: bolder;} +.error {font-weight: bolder;} +.warning {font-weight: bolder;} /* Clipboard --------------------------*/ @@ -365,3 +358,27 @@ mark { content: ""; } } + +/* Section anchors --------------------------------- + Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 +*/ + +div.csl-bib-body { } +div.csl-entry { + clear: both; +} +.hanging-indent div.csl-entry { + margin-left:2em; + text-indent:-2em; +} +div.csl-left-margin { + min-width:2em; + float:left; +} +div.csl-right-inline { + margin-left:2em; + padding-left:1em; +} +div.csl-indent { + margin-left: 2em; +} diff --git a/docs/pkgdown.js b/docs/pkgdown.js index 7e7048fa..6f0eee40 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -80,7 +80,7 @@ $(document).ready(function() { var copyButton = ""; - $(".examples, div.sourceCode").addClass("hasCopyButton"); + $("div.sourceCode").addClass("hasCopyButton"); // Insert copy buttons: $(copyButton).prependTo(".hasCopyButton"); @@ -91,7 +91,7 @@ // Initialize clipboard: var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { text: function(trigger) { - return trigger.parentNode.textContent; + return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); } }); diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 00200bda..7e5acf98 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,13 +1,13 @@ pandoc: 2.11.4 -pkgdown: 1.6.1 +pkgdown: 2.0.1 pkgdown_sha: ~ articles: metabolyseR: metabolyseR.html modelling: modelling.html pre_treatment: pre_treatment.html quick_start: quick_start.html -last_built: 2021-11-17T10:23Z +last_built: 2021-12-17T17:54Z urls: - reference: https://jasenfinch.github.io/metabolyseR//reference - article: https://jasenfinch.github.io/metabolyseR//articles + reference: https://jasenfinch.github.io/metabolyseR/reference + article: https://jasenfinch.github.io/metabolyseR/articles diff --git a/docs/reference/Analysis-class.html b/docs/reference/Analysis-class.html index b918b86e..02889105 100644 --- a/docs/reference/Analysis-class.html +++ b/docs/reference/Analysis-class.html @@ -1,67 +1,12 @@ - - - - - - - -Analysis S4 class — Analysis-class • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Analysis S4 class — Analysis-class • metabolyseR - - - - + + -
-
- -
- -
+
@@ -139,51 +69,57 @@

Analysis S4 class

+
+

Slots

+ -

Slots

+
log
+

list containing analysis dates and time

+ + +
parameters
+

class AnalysisParameters containing the analysis parameters

- -
-
log

list containing analysis dates and time

+
raw
+

list containing info and raw data

-
parameters

class AnalysisParameters containing the analysis parameters

-
raw

list containing info and raw data

+
pre-treated
+

list containing preTreated info and raw data

-
pre-treated

list containing preTreated info and raw data

-
modelling

list containing modelling results

+
modelling
+

list containing modelling results

-
correlations

tibble containing weighted edgelist of correlations

-
+
correlations
+

tibble containing weighted edgelist of correlations

+ + +
+
-
- +
- - + + diff --git a/docs/reference/AnalysisData-class.html b/docs/reference/AnalysisData-class.html index c25ccca7..8ba630b1 100644 --- a/docs/reference/AnalysisData-class.html +++ b/docs/reference/AnalysisData-class.html @@ -1,67 +1,12 @@ - - - - - - - -AnalysisData S4 class — AnalysisData-class • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -AnalysisData S4 class — AnalysisData-class • metabolyseR - - - - + + -
-
- -
- -
+
@@ -139,43 +69,41 @@

AnalysisData S4 class

+
+

Slots

+ -

Slots

+
data
+

sample metabolomic data

- -
-
data

sample metabolomic data

+
info
+

sample meta information

-
info

sample meta information

-
+
+
-
- +
- - + + diff --git a/docs/reference/AnalysisParameters-class.html b/docs/reference/AnalysisParameters-class.html index 5d79ed41..f29fed68 100644 --- a/docs/reference/AnalysisParameters-class.html +++ b/docs/reference/AnalysisParameters-class.html @@ -1,67 +1,12 @@ - - - - - - - -AnalysisParameters S4 class — AnalysisParameters-class • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -AnalysisParameters S4 class — AnalysisParameters-class • metabolyseR - - - - + + -
-
- -
- -
+
@@ -139,45 +69,45 @@

AnalysisParameters S4 class

+
+

Slots

+ -

Slots

+
pre-treatment
+

list containing parameters for data pre-treatment

- -
-
pre-treatment

list containing parameters for data pre-treatment

+
modelling
+

list containing parameters for modelling

+ -
modelling

list containing parameters for modelling

+
correlations
+

list containing parameters for correlations

-
correlations

list containing parameters for correlations

-
+
+
-
- +
- - + + diff --git a/docs/reference/QC.html b/docs/reference/QC.html index 842365c0..581ee1d3 100644 --- a/docs/reference/QC.html +++ b/docs/reference/QC.html @@ -1,67 +1,12 @@ - - - - - - - -Quality control (QC) sample treatments — QCimpute • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Quality control (QC) sample treatments — QCimpute • metabolyseR - - - - + + -
-
- -
- -
+
@@ -138,78 +68,65 @@

Quality control (QC) sample treatments

Quality control (QC) sample pre-treatment methods.

-
QCimpute(
-  d,
-  cls = "class",
-  QCidx = "QC",
-  occupancy = 2/3,
-  parallel = "variables",
-  seed = 1234
-)
-
-# S4 method for AnalysisData
-QCimpute(
-  d,
-  cls = "class",
-  QCidx = "QC",
-  occupancy = 2/3,
-  parallel = "variables",
-  seed = 1234
-)
-
-QCoccupancy(d, cls = "class", QCidx = "QC", occupancy = 2/3)
+    
+
QCimpute(
+  d,
+  cls = "class",
+  QCidx = "QC",
+  occupancy = 2/3,
+  parallel = "variables",
+  seed = 1234
+)
 
-# S4 method for AnalysisData
-QCoccupancy(d, cls = "class", QCidx = "QC", occupancy = 2/3)
+# S4 method for AnalysisData
+QCimpute(
+  d,
+  cls = "class",
+  QCidx = "QC",
+  occupancy = 2/3,
+  parallel = "variables",
+  seed = 1234
+)
 
-QCremove(d, cls = "class", QCidx = "QC")
+QCoccupancy(d, cls = "class", QCidx = "QC", occupancy = 2/3)
 
-# S4 method for AnalysisData
-QCremove(d, cls = "class", QCidx = "QC")
+# S4 method for AnalysisData
+QCoccupancy(d, cls = "class", QCidx = "QC", occupancy = 2/3)
 
-QCrsdFilter(d, cls = "class", QCidx = "QC", RSDthresh = 50)
+QCremove(d, cls = "class", QCidx = "QC")
 
-# S4 method for AnalysisData
-QCrsdFilter(d, cls = "class", QCidx = "QC", RSDthresh = 50)
+# S4 method for AnalysisData +QCremove(d, cls = "class", QCidx = "QC") -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
d

S4 object of class AnalysisData

cls

info column to use for class labels

QCidx

QC sample label

occupancy

occupancy threshold for filtering

parallel

parallel type to use. See ?missForest for details

seed

random number seed

RSDthresh

RSD (%) threshold for filtering

+QCrsdFilter(d, cls = "class", QCidx = "QC", RSDthresh = 50) -

Value

+# S4 method for AnalysisData +QCrsdFilter(d, cls = "class", QCidx = "QC", RSDthresh = 50)
+
+
+

Arguments

+
d
+

S4 object of class AnalysisData

+
cls
+

info column to use for class labels

+
QCidx
+

QC sample label

+
occupancy
+

occupancy threshold for filtering

+
parallel
+

parallel type to use. See ?missForest for details

+
seed
+

random number seed

+
RSDthresh
+

RSD (%) threshold for filtering

+
+
+

Value

An S4 object of class AnalysisData containing QC treated data.

-

Details

- +
+
+

Details

A QC sample is an average pooled sample, equally representative in composition of all the samples present within an experimental set. Within an analytical run, the QC sample is analysed at equal intervals throughout the run. If there is class structure within the run, this should be randomised within a block fashion so that the classes are equally represented in each block throughout the run. @@ -219,77 +136,74 @@

Details The variable RSDs can then be filtered below a threshold value to remove metabolome features that are poorly reproducible across the analytical runs. This variable filtering strategy has an advantage over that of occupancy alone as it is not dependent on underlying class structure. Therefore, the variables and variable numbers will not alter if a new class structure is imposed upon the data.

-

Methods

- +
+
+

Methods

-
    -
  • QCimpute: Missing value imputation of QC samples.

  • +
    • QCimpute: Missing value imputation of QC samples.

    • QCoccupancy: Feature maximum occupancy filtering based on QC samples.

    • QCremove: Remove QC samples.

    • QCrsdFilter: Feature filtering based RSD of QC sample features.

    • -
    - - -

    Examples

    -
    
    -## Initial example data preparation
    -library(metaboData)
    -d <- analysisData(abr1$neg[,1:1000],abr1$fact)
    -
    -## Plot the feature RSD distributions of the H class only
    -d %>% 
    - keepClasses(cls = 'day',classes = 'H') %>% 
    - plotRSD(cls = 'day')
    -#> Warning: Removed 119 rows containing non-finite values (stat_density).
    -#> Warning: Removed 1 row(s) containing missing values (geom_path).
    -
    -
    -## Apply QC feature occupancy filtering and QC feature RSD filtering
    -QC_treated <- d %>% 
    - QCoccupancy(cls = 'day',QCidx = 'H',occupancy = 2/3) %>%
    - QCrsdFilter(cls = 'day',QCidx = 'H',RSDthresh = 50)
    -
    -print(QC_treated)
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 120 
    -#> Features: 404 
    -#> Info: 9 
    -#> 
    -
    -## Plot the feature RSD distributions of the H class after QC treatments
    -QC_treated %>% 
    - keepClasses(cls = 'day',classes = 'H') %>% 
    - plotRSD(cls = 'day')
    -
    -
    +
+ +
+

Examples

+

+## Initial example data preparation
+library(metaboData)
+d <- analysisData(abr1$neg[,1:1000],abr1$fact)
+
+## Plot the feature RSD distributions of the H class only
+d %>% 
+ keepClasses(cls = 'day',classes = 'H') %>% 
+ plotRSD(cls = 'day')
+#> Warning: Removed 119 rows containing non-finite values (stat_density).
+#> Warning: Removed 1 row(s) containing missing values (geom_path).
+
+
+## Apply QC feature occupancy filtering and QC feature RSD filtering
+QC_treated <- d %>% 
+ QCoccupancy(cls = 'day',QCidx = 'H',occupancy = 2/3) %>%
+ QCrsdFilter(cls = 'day',QCidx = 'H',RSDthresh = 50)
+
+print(QC_treated)
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 120 
+#> Features: 404 
+#> Info: 9 
+#> 
+
+## Plot the feature RSD distributions of the H class after QC treatments
+QC_treated %>% 
+ keepClasses(cls = 'day',classes = 'H') %>% 
+ plotRSD(cls = 'day')
+
+
+
+ -
- +
- - + + diff --git a/docs/reference/RandomForest-class.html b/docs/reference/RandomForest-class.html index 5703377d..d0ae9276 100644 --- a/docs/reference/RandomForest-class.html +++ b/docs/reference/RandomForest-class.html @@ -1,67 +1,12 @@ - - - - - - - -RandomForest S4 class — RandomForest-class • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -RandomForest S4 class — RandomForest-class • metabolyseR - - - - + + -
-
- -
- -
+
@@ -139,55 +69,65 @@

RandomForest S4 class

+
+

Slots

+ -

Slots

+
type
+

random forest type

- -
-
type

random forest type

+
response
+

response variable name

+ + +
results
+

list of measure and importance results tables

+ -
response

response variable name

+
predictions
+

tibble of model observation predictions

-
results

list of measure and importance results tables

-
predictions

tibble of model observation predictions

+
permutations
+

list of permutations measure and importance results tables

-
permutations

list of permutations measure and importance results tables

-
importances

tibble of model feature importances

+
importances
+

tibble of model feature importances

-
proximities

tibble of model observation proximities

-
models

list of random forest models

+
proximities
+

tibble of model observation proximities

-
+ +
models
+

list of random forest models

+ + +
+
-
- +
- - + + diff --git a/docs/reference/Rplot002.png b/docs/reference/Rplot002.png index 68a51218..52603ab3 100644 Binary files a/docs/reference/Rplot002.png and b/docs/reference/Rplot002.png differ diff --git a/docs/reference/Rplot003.png b/docs/reference/Rplot003.png index f3909bb2..e0a455f3 100644 Binary files a/docs/reference/Rplot003.png and b/docs/reference/Rplot003.png differ diff --git a/docs/reference/Rplot004.png b/docs/reference/Rplot004.png index e05106b6..889f8af7 100644 Binary files a/docs/reference/Rplot004.png and b/docs/reference/Rplot004.png differ diff --git a/docs/reference/Rplot005.png b/docs/reference/Rplot005.png index 670d579c..3e1c1f6d 100644 Binary files a/docs/reference/Rplot005.png and b/docs/reference/Rplot005.png differ diff --git a/docs/reference/Rplot006.png b/docs/reference/Rplot006.png index 5c76ba44..eeecf104 100644 Binary files a/docs/reference/Rplot006.png and b/docs/reference/Rplot006.png differ diff --git a/docs/reference/Rplot007.png b/docs/reference/Rplot007.png index 2db04422..93b57c67 100644 Binary files a/docs/reference/Rplot007.png and b/docs/reference/Rplot007.png differ diff --git a/docs/reference/Rplot008.png b/docs/reference/Rplot008.png index 07b274c2..b9aaf427 100644 Binary files a/docs/reference/Rplot008.png and b/docs/reference/Rplot008.png differ diff --git a/docs/reference/Rplot009.png b/docs/reference/Rplot009.png index ebca94ee..9dfda762 100644 Binary files a/docs/reference/Rplot009.png and b/docs/reference/Rplot009.png differ diff --git a/docs/reference/Rplot010.png b/docs/reference/Rplot010.png index e1f6f8ff..be994793 100644 Binary files a/docs/reference/Rplot010.png and b/docs/reference/Rplot010.png differ diff --git a/docs/reference/Rplot011.png b/docs/reference/Rplot011.png index a7ac9860..289f937e 100644 Binary files a/docs/reference/Rplot011.png and b/docs/reference/Rplot011.png differ diff --git a/docs/reference/Rplot012.png b/docs/reference/Rplot012.png index d866bc2e..3a2038bf 100644 Binary files a/docs/reference/Rplot012.png and b/docs/reference/Rplot012.png differ diff --git a/docs/reference/Univariate-class.html b/docs/reference/Univariate-class.html index b668f5e7..0d63c4a0 100644 --- a/docs/reference/Univariate-class.html +++ b/docs/reference/Univariate-class.html @@ -1,67 +1,12 @@ - - - - - - - -Univariate S4 class — Univariate-class • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Univariate S4 class — Univariate-class • metabolyseR - - - - + + -
-
- -
- -
+
@@ -139,45 +69,45 @@

Univariate S4 class

+
+

Slots

+ -

Slots

+
type
+

univariate test type

- -
-
type

univariate test type

+
models
+

list of model objects

+ -
models

list of model objects

+
results
+

tibble containing test results

-
results

tibble containing test results

-
+
+
-
- +
- - + + diff --git a/docs/reference/aggregate.html b/docs/reference/aggregate.html index a520531d..3ffa2159 100644 --- a/docs/reference/aggregate.html +++ b/docs/reference/aggregate.html @@ -1,67 +1,12 @@ - - - - - - - -Sample aggregation — aggregateMean • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Sample aggregation — aggregateMean • metabolyseR - - - - + + -
-
- -
- -
+
@@ -138,109 +68,103 @@

Sample aggregation

Aggregation of sample features based on a grouping variable.

-
aggregateMean(d, cls = "class")
+    
+
aggregateMean(d, cls = "class")
 
-# S4 method for AnalysisData
-aggregateMean(d, cls = "class")
+# S4 method for AnalysisData
+aggregateMean(d, cls = "class")
 
-aggregateMedian(d, cls = "class")
+aggregateMedian(d, cls = "class")
 
-# S4 method for AnalysisData
-aggregateMedian(d, cls = "class")
+# S4 method for AnalysisData
+aggregateMedian(d, cls = "class")
 
-aggregateSum(d, cls = "class")
+aggregateSum(d, cls = "class")
 
-# S4 method for AnalysisData
-aggregateSum(d, cls = "class")
- -

Arguments

- - - - - - - - - - -
d

S4 object of class AnalysisData

cls

info column to use for class data

- -

Value

+# S4 method for AnalysisData +aggregateSum(d, cls = "class")
+
+
+

Arguments

+
d
+

S4 object of class AnalysisData

+
cls
+

info column to use for class data

+
+
+

Value

An S4 object of class AnalysisData containing the aggregated data.

-

Details

- +
+
+

Details

Sample aggregation allows the electronic pooling of sample features based on a grouping variable. This is useful in situations such as the presence of technical replicates that can be aggregated to reduce the effects of pseudo replication.

-

Methods

- +
+
+

Methods

-
    -
  • aggregateMean: Aggregate sample features to the group mean.

  • +
    • aggregateMean: Aggregate sample features to the group mean.

    • aggregateMedian: Aggregate sample features to the group median.

    • aggregateSum: Aggregate sample features to the group total.

    • -
    - - -

    Examples

    -
    ## Each of the following examples shows the application of the aggregation method and then 
    -## a Principle Component Analysis is plotted to show it's effect on the data structure.
    -
    -## Initial example data preparation
    -library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% 
    - occupancyMaximum(occupancy = 2/3)
    -
    -d %>% 
    - plotPCA(cls = 'day')
    -
    - 
    -## Mean aggregation
    -d %>% 
    - aggregateMean(cls = 'day') %>% 
    - plotPCA(cls = 'day',ellipses = FALSE)
    -
    - 
    -## Median aggregation
    -d %>% 
    - aggregateMedian(cls = 'day') %>% 
    - plotPCA(cls = 'day',ellipses = FALSE)
    -
    - 
    -## Sum aggregation
    -d %>% 
    - aggregateSum(cls = 'day') %>% 
    - plotPCA(cls = 'day',ellipses = FALSE)
    -
    -
    +
+ +
+

Examples

+
## Each of the following examples shows the application of the aggregation method and then 
+## a Principle Component Analysis is plotted to show it's effect on the data structure.
+
+## Initial example data preparation
+library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% 
+ occupancyMaximum(occupancy = 2/3)
+
+d %>% 
+ plotPCA(cls = 'day')
+
+ 
+## Mean aggregation
+d %>% 
+ aggregateMean(cls = 'day') %>% 
+ plotPCA(cls = 'day',ellipses = FALSE)
+
+ 
+## Median aggregation
+d %>% 
+ aggregateMedian(cls = 'day') %>% 
+ plotPCA(cls = 'day',ellipses = FALSE)
+
+ 
+## Sum aggregation
+d %>% 
+ aggregateSum(cls = 'day') %>% 
+ plotPCA(cls = 'day',ellipses = FALSE)
+
+
+
+ -
- +
- - + + diff --git a/docs/reference/analysis-accessors.html b/docs/reference/analysis-accessors.html index bd0ab301..94353752 100644 --- a/docs/reference/analysis-accessors.html +++ b/docs/reference/analysis-accessors.html @@ -1,67 +1,12 @@ - - - - - - - -AnalysisData and Analysis class accessors — dat • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -AnalysisData and Analysis class accessors — dat • metabolyseR - - - - + + -
-
- -
- -
+
@@ -138,118 +68,107 @@

AnalysisData and Analysis class accessors

Accessor methods for the AnalysisData and Analysis S4 classes.

-
dat(x, ...)
-
-# S4 method for AnalysisData
-dat(x)
-
-# S4 method for Analysis
-dat(x, type = c("raw", "pre-treated"))
+    
+
dat(x, ...)
 
-dat(x, ...) <- value
+# S4 method for AnalysisData
+dat(x)
 
-# S4 method for AnalysisData
-dat(x) <- value
+# S4 method for Analysis
+dat(x, type = c("raw", "pre-treated"))
 
-# S4 method for Analysis
-dat(x, type = c("raw", "pre-treated")) <- value
+dat(x, ...) <- value
 
-sinfo(x, ...)
+# S4 method for AnalysisData
+dat(x) <- value
 
-# S4 method for AnalysisData
-sinfo(x)
+# S4 method for Analysis
+dat(x, type = c("raw", "pre-treated")) <- value
 
-# S4 method for Analysis
-sinfo(x, type = c("raw", "pre-treated"), value)
+sinfo(x, ...)
 
-sinfo(x, ...) <- value
+# S4 method for AnalysisData
+sinfo(x)
 
-# S4 method for AnalysisData
-sinfo(x) <- value
+# S4 method for Analysis
+sinfo(x, type = c("raw", "pre-treated"), value)
 
-# S4 method for Analysis
-sinfo(x, type = c("raw", "pre-treated")) <- value
+sinfo(x, ...) <- value
 
-raw(x)
+# S4 method for AnalysisData
+sinfo(x) <- value
 
-# S4 method for Analysis
-raw(x)
+# S4 method for Analysis
+sinfo(x, type = c("raw", "pre-treated")) <- value
 
-raw(x) <- value
+raw(x)
 
-# S4 method for Analysis
-raw(x) <- value
+# S4 method for Analysis
+raw(x)
 
-preTreated(x)
+raw(x) <- value
 
-# S4 method for Analysis
-preTreated(x)
+# S4 method for Analysis
+raw(x) <- value
 
-preTreated(x) <- value
+preTreated(x)
 
-# S4 method for Analysis
-preTreated(x) <- value
+# S4 method for Analysis
+preTreated(x)
 
-features(x, ...)
+preTreated(x) <- value
 
-# S4 method for AnalysisData
-features(x)
+# S4 method for Analysis
+preTreated(x) <- value
 
-# S4 method for Analysis
-features(x, type = c("raw", "pre-treated"))
+features(x, ...)
 
-nSamples(x, ...)
+# S4 method for AnalysisData
+features(x)
 
-# S4 method for AnalysisData
-nSamples(x)
+# S4 method for Analysis
+features(x, type = c("raw", "pre-treated"))
 
-# S4 method for Analysis
-nSamples(x, type = c("raw", "pre-treated"))
+nSamples(x, ...)
 
-nFeatures(x, ...)
+# S4 method for AnalysisData
+nSamples(x)
 
-# S4 method for AnalysisData
-nFeatures(x)
+# S4 method for Analysis
+nSamples(x, type = c("raw", "pre-treated"))
 
-# S4 method for Analysis
-nFeatures(x, type = c("raw", "pre-treated"))
+nFeatures(x, ...)
 
-analysisResults(x, element)
+# S4 method for AnalysisData
+nFeatures(x)
 
-# S4 method for Analysis
-analysisResults(x, element)
+# S4 method for Analysis +nFeatures(x, type = c("raw", "pre-treated")) -

Arguments

- - - - - - - - - - - - - - - - - - - - - - -
x

S4 object of class AnalysisData or Analysis

...

arguments to pass to the appropriate method

type

get or set raw or pre-treated data

value

value to set

element

analysis element results to return

+analysisResults(x, element) -

Methods

+# S4 method for Analysis +analysisResults(x, element)
+
+
+

Arguments

+
x
+

S4 object of class AnalysisData or Analysis

+
...
+

arguments to pass to the appropriate method

+
type
+

get or set raw or pre-treated data

+
value
+

value to set

+
element
+

analysis element results to return

+
+
+

Methods

-
    -
  • dat: Return a metabolomic data table.

  • +
    • dat: Return a metabolomic data table.

    • dat<-: Set a metabolomic data table.

    • sinfo: Return a sample information data table.

    • sinfo<-: Set a sample information data table.

    • @@ -261,107 +180,104 @@

      Methods
    • nSamples: Return the number of samples.

    • nFeatures: Return the number of features.

    • analysisResults: Return results from an Analysis object of an analysis element.

    • -

    - - -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    -
    -## Return the metabolomic data
    -dat(d)
    -#> # A tibble: 120 × 101
    -#>     N200  N201  N202  N203  N204  N205   N206  N207  N208  N209  N210   N211
    -#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
    -#>  1     0  0    0     2.98  0     0     0      0.468 0     1.43  0     0.170 
    -#>  2     0  0    0     1.30  0     1.15  0      0     0     0.492 0     0     
    -#>  3     0  0    0     6.08  0.214 2.53  0      1.85  0     1.06  0.184 0.0827
    -#>  4     0  4.24 0     1.48  0     0     0.147  0     0     0.929 0     0.286 
    -#>  5     0  0    0     0.530 0     0.233 0.376  1.41  0     0.274 0     0.139 
    -#>  6     0  0    0     0     0     0.438 0      0     0.219 0.325 0     0     
    -#>  7     0  0    0     0.547 0     0     0      0     0     0     0     0     
    -#>  8     0  0    0.195 1.37  0.594 1.11  0.0902 0     0     0     0.162 0     
    -#>  9     0  0    0     1.24  0     0.196 0.675  0.528 0.128 2.61  0.294 2.66  
    -#> 10     0  0    0     0.113 0     1.06  0      0     0     1.76  2.96  0     
    -#> # … with 110 more rows, and 89 more variables: N212 <dbl>, N213 <dbl>,
    -#> #   N214 <dbl>, N215 <dbl>, N216 <dbl>, N217 <dbl>, N218 <dbl>, N219 <dbl>,
    -#> #   N220 <dbl>, N221 <dbl>, N222 <dbl>, N223 <dbl>, N224 <dbl>, N225 <dbl>,
    -#> #   N226 <dbl>, N227 <dbl>, N228 <dbl>, N229 <dbl>, N230 <dbl>, N231 <dbl>,
    -#> #   N232 <dbl>, N233 <dbl>, N234 <dbl>, N235 <dbl>, N236 <dbl>, N237 <dbl>,
    -#> #   N238 <dbl>, N239 <dbl>, N240 <dbl>, N241 <dbl>, N242 <dbl>, N243 <dbl>,
    -#> #   N244 <dbl>, N245 <dbl>, N246 <dbl>, N247 <dbl>, N248 <dbl>, N249 <dbl>, …
    -
    -## Set the metabolomic data
    -dat(d) <- abr1$neg[,300:400]
    -
    -## Return the sample information
    -sinfo(d)
    -#> # A tibble: 120 × 9
    -#>    injorder pathcdf              filecdf name.org remark name    rep day   class
    -#>       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
    -#>  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
    -#>  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
    -#>  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
    -#>  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
    -#>  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
    -#>  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
    -#>  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
    -#>  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
    -#>  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
    -#> 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
    -#> # … with 110 more rows
    -
    -## Set the sample information
    -sinfo(d) <- abr1$fact
    -
    -## Return the feature names
    -features(d)
    -#>   [1] "N300" "N301" "N302" "N303" "N304" "N305" "N306" "N307" "N308" "N309"
    -#>  [11] "N310" "N311" "N312" "N313" "N314" "N315" "N316" "N317" "N318" "N319"
    -#>  [21] "N320" "N321" "N322" "N323" "N324" "N325" "N326" "N327" "N328" "N329"
    -#>  [31] "N330" "N331" "N332" "N333" "N334" "N335" "N336" "N337" "N338" "N339"
    -#>  [41] "N340" "N341" "N342" "N343" "N344" "N345" "N346" "N347" "N348" "N349"
    -#>  [51] "N350" "N351" "N352" "N353" "N354" "N355" "N356" "N357" "N358" "N359"
    -#>  [61] "N360" "N361" "N362" "N363" "N364" "N365" "N366" "N367" "N368" "N369"
    -#>  [71] "N370" "N371" "N372" "N373" "N374" "N375" "N376" "N377" "N378" "N379"
    -#>  [81] "N380" "N381" "N382" "N383" "N384" "N385" "N386" "N387" "N388" "N389"
    -#>  [91] "N390" "N391" "N392" "N393" "N394" "N395" "N396" "N397" "N398" "N399"
    -#> [101] "N400"
    -
    -## Return the number of samples
    -nSamples(d)
    -#> [1] 120
    -
    -## Return the number of features
    -nFeatures(d)
    -#> [1] 101
    -
    +
+ +
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact)
+
+## Return the metabolomic data
+dat(d)
+#> # A tibble: 120 × 101
+#>     N200  N201  N202  N203  N204  N205   N206  N207  N208  N209  N210   N211
+#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
+#>  1     0  0    0     2.98  0     0     0      0.468 0     1.43  0     0.170 
+#>  2     0  0    0     1.30  0     1.15  0      0     0     0.492 0     0     
+#>  3     0  0    0     6.08  0.214 2.53  0      1.85  0     1.06  0.184 0.0827
+#>  4     0  4.24 0     1.48  0     0     0.147  0     0     0.929 0     0.286 
+#>  5     0  0    0     0.530 0     0.233 0.376  1.41  0     0.274 0     0.139 
+#>  6     0  0    0     0     0     0.438 0      0     0.219 0.325 0     0     
+#>  7     0  0    0     0.547 0     0     0      0     0     0     0     0     
+#>  8     0  0    0.195 1.37  0.594 1.11  0.0902 0     0     0     0.162 0     
+#>  9     0  0    0     1.24  0     0.196 0.675  0.528 0.128 2.61  0.294 2.66  
+#> 10     0  0    0     0.113 0     1.06  0      0     0     1.76  2.96  0     
+#> # … with 110 more rows, and 89 more variables: N212 <dbl>, N213 <dbl>,
+#> #   N214 <dbl>, N215 <dbl>, N216 <dbl>, N217 <dbl>, N218 <dbl>, N219 <dbl>,
+#> #   N220 <dbl>, N221 <dbl>, N222 <dbl>, N223 <dbl>, N224 <dbl>, N225 <dbl>,
+#> #   N226 <dbl>, N227 <dbl>, N228 <dbl>, N229 <dbl>, N230 <dbl>, N231 <dbl>,
+#> #   N232 <dbl>, N233 <dbl>, N234 <dbl>, N235 <dbl>, N236 <dbl>, N237 <dbl>,
+#> #   N238 <dbl>, N239 <dbl>, N240 <dbl>, N241 <dbl>, N242 <dbl>, N243 <dbl>,
+#> #   N244 <dbl>, N245 <dbl>, N246 <dbl>, N247 <dbl>, N248 <dbl>, N249 <dbl>, …
+
+## Set the metabolomic data
+dat(d) <- abr1$neg[,300:400]
+
+## Return the sample information
+sinfo(d)
+#> # A tibble: 120 × 9
+#>    injorder pathcdf              filecdf name.org remark name    rep day   class
+#>       <int> <fct>                <fct>   <fct>    <fct>  <fct> <int> <fct> <int>
+#>  1        1 C:/Xcalibur/ANDI-LT… 01.cdf  12_2     ok     12_2      2 2         2
+#>  2        2 C:/Xcalibur/ANDI-LT… 02.cdf  13_3     ok     13_4      3 3         3
+#>  3        3 C:/Xcalibur/ANDI-LT… 03.cdf  15_4     ok     15_5      5 4         4
+#>  4        4 C:/Xcalibur/ANDI-LT… 04.cdf  12_1     ok     12_2      2 1         1
+#>  5        5 C:/Xcalibur/ANDI-LT… 05.cdf  12_2     ok     12_2      2 2         2
+#>  6        6 C:/Xcalibur/ANDI-LT… 06.cdf  11_1     ok     11_2      1 1         1
+#>  7        7 C:/Xcalibur/ANDI-LT… 07.cdf  14_2     ok     14_3      4 2         2
+#>  8        8 C:/Xcalibur/ANDI-LT… 08.cdf  11_4     ok     11_5      1 4         4
+#>  9        9 C:/Xcalibur/ANDI-LT… 09.cdf  13_H     ok     13_H      3 H         6
+#> 10       10 C:/Xcalibur/ANDI-LT… 10.cdf  15_H     ok     15_H      5 H         6
+#> # … with 110 more rows
+
+## Set the sample information
+sinfo(d) <- abr1$fact
+
+## Return the feature names
+features(d)
+#>   [1] "N300" "N301" "N302" "N303" "N304" "N305" "N306" "N307" "N308" "N309"
+#>  [11] "N310" "N311" "N312" "N313" "N314" "N315" "N316" "N317" "N318" "N319"
+#>  [21] "N320" "N321" "N322" "N323" "N324" "N325" "N326" "N327" "N328" "N329"
+#>  [31] "N330" "N331" "N332" "N333" "N334" "N335" "N336" "N337" "N338" "N339"
+#>  [41] "N340" "N341" "N342" "N343" "N344" "N345" "N346" "N347" "N348" "N349"
+#>  [51] "N350" "N351" "N352" "N353" "N354" "N355" "N356" "N357" "N358" "N359"
+#>  [61] "N360" "N361" "N362" "N363" "N364" "N365" "N366" "N367" "N368" "N369"
+#>  [71] "N370" "N371" "N372" "N373" "N374" "N375" "N376" "N377" "N378" "N379"
+#>  [81] "N380" "N381" "N382" "N383" "N384" "N385" "N386" "N387" "N388" "N389"
+#>  [91] "N390" "N391" "N392" "N393" "N394" "N395" "N396" "N397" "N398" "N399"
+#> [101] "N400"
+
+## Return the number of samples
+nSamples(d)
+#> [1] 120
+
+## Return the number of features
+nFeatures(d)
+#> [1] 101
+
+
+ -
- +
- - + + diff --git a/docs/reference/analysisData.html b/docs/reference/analysisData.html index 9bf0997b..9da42eb9 100644 --- a/docs/reference/analysisData.html +++ b/docs/reference/analysisData.html @@ -1,67 +1,12 @@ - - - - - - - -AnalysisData class constructor — analysisData • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -AnalysisData class constructor — analysisData • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,63 +68,58 @@

AnalysisData class constructor

Create an AnalysisData S4 object.

-
analysisData(data, info)
- -

Arguments

- - - - - - - - - - -
data

table containing sample metabolomic data

info

table containing sample meta information

- -

Value

+
+
analysisData(data, info)
+
+
+

Arguments

+
data
+

table containing sample metabolomic data

+
info
+

table containing sample meta information

+
+
+

Value

An S4 object of class Analysis.

+
-

Examples

-
library(metaboData)
-d <- analysisData(data = abr1$neg,info = abr1$fact)
-
-print(d)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 120 
-#> Features: 2000 
-#> Info: 9 
-#> 
-
+
+

Examples

+
library(metaboData)
+d <- analysisData(data = abr1$neg,info = abr1$fact)
+
+print(d)
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 120 
+#> Features: 2000 
+#> Info: 9 
+#> 
+
+
+
-
- +
- - + + diff --git a/docs/reference/analysisElements.html b/docs/reference/analysisElements.html index ef5b2527..a61039ed 100644 --- a/docs/reference/analysisElements.html +++ b/docs/reference/analysisElements.html @@ -1,67 +1,12 @@ - - - - - - - -Analysis elements — analysisElements • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Analysis elements — analysisElements • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,42 +68,42 @@

Analysis elements

Return the analysis elements available in metabolyseR.

-
analysisElements()
- - -

Value

+
+
analysisElements()
+
+
+

Value

A character vector of analysis elements.

+
-

Examples

-
analysisElements()
-#> [1] "pre-treatment" "modelling"     "correlations" 
-
+
+

Examples

+
analysisElements()
+#> [1] "pre-treatment" "modelling"     "correlations" 
+
+
+
-
- +
- - + + diff --git a/docs/reference/analysisParameters.html b/docs/reference/analysisParameters.html index 11af90de..6c5bdec7 100644 --- a/docs/reference/analysisParameters.html +++ b/docs/reference/analysisParameters.html @@ -1,67 +1,12 @@ - - - - - - - -Create an AnalysisParameters S4 class object — analysisParameters • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create an AnalysisParameters S4 class object — analysisParameters • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,98 +68,95 @@

Create an AnalysisParameters S4 class object

Initiate an AnalysisParameters object with the default analysis parameters for each of the analysis elements.

-
analysisParameters(elements = analysisElements())
- -

Arguments

- - - - - - -
elements

character vector containing elements for analysis.

- -

Value

+
+
analysisParameters(elements = analysisElements())
+
+
+

Arguments

+
elements
+

character vector containing elements for analysis.

+
+
+

Value

An S4 object of class AnalysisParameters containing the default analysis parameters.

+
-

Examples

-
p <- analysisParameters()
-
-print(p)
-#> Parameters:
-#> pre-treatment
-#> 	QC
-#> 		occupancyFilter
-#> 			cls = class
-#> 			QCidx = QC
-#> 			occupancy = 2/3
-#> 		impute
-#> 			cls = class
-#> 			QCidx = QC
-#> 			occupancy = 2/3
-#> 			parallel = variables
-#> 			seed = 1234
-#> 		RSDfilter
-#> 			cls = class
-#> 			QCidx = QC
-#> 			RSDthresh = 50
-#> 		removeQC
-#> 			cls = class
-#> 			QCidx = QC
-#> 	occupancyFilter
-#> 		maximum
-#> 			cls = class
-#> 			occupancy = 2/3
-#> 	impute
-#> 		class
-#> 			cls = class
-#> 			occupancy = 2/3
-#> 			seed = 1234
-#> 	transform
-#> 		TICnorm
-#> 
-#> modelling
-#> 	randomForest
-#> 		cls = class
-#> 		rf = list()
-#> 		reps = 1
-#> 		binary = FALSE
-#> 		comparisons = list()
-#> 		perm = 0
-#> 		returnModels = FALSE
-#> 		seed = 1234
-#> 
-#> correlations
-#> 	method = pearson
-#> 	pAdjustMethod = bonferroni
-#> 	corPvalue = 0.05
-
+
+

Examples

+
p <- analysisParameters()
+
+print(p)
+#> Parameters:
+#> pre-treatment
+#> 	QC
+#> 		occupancyFilter
+#> 			cls = class
+#> 			QCidx = QC
+#> 			occupancy = 2/3
+#> 		impute
+#> 			cls = class
+#> 			QCidx = QC
+#> 			occupancy = 2/3
+#> 			parallel = variables
+#> 			seed = 1234
+#> 		RSDfilter
+#> 			cls = class
+#> 			QCidx = QC
+#> 			RSDthresh = 50
+#> 		removeQC
+#> 			cls = class
+#> 			QCidx = QC
+#> 	occupancyFilter
+#> 		maximum
+#> 			cls = class
+#> 			occupancy = 2/3
+#> 	impute
+#> 		class
+#> 			cls = class
+#> 			occupancy = 2/3
+#> 			seed = 1234
+#> 	transform
+#> 		TICnorm
+#> 
+#> modelling
+#> 	randomForest
+#> 		cls = class
+#> 		rf = list()
+#> 		reps = 1
+#> 		binary = FALSE
+#> 		comparisons = list()
+#> 		perm = 0
+#> 		returnModels = FALSE
+#> 		seed = 1234
+#> 
+#> correlations
+#> 	method = pearson
+#> 	pAdjustMethod = bonferroni
+#> 	corPvalue = 0.05
+
+
+
-
- +
- - + + diff --git a/docs/reference/anova.html b/docs/reference/anova.html index 16a9a9a1..8e6ea814 100644 --- a/docs/reference/anova.html +++ b/docs/reference/anova.html @@ -1,67 +1,12 @@ - - - - - - - -ANOVA — anova • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -ANOVA — anova • metabolyseR - - + + - - -
-
- -
- -
+
@@ -138,99 +68,87 @@

ANOVA

One-way analysis of variance (ANOVA).

-
anova(
-  x,
-  cls = "class",
-  pAdjust = "bonferroni",
-  comparisons = list(),
-  returnModels = FALSE
-)
-
-# S4 method for AnalysisData
-anova(
-  x,
-  cls = "class",
-  pAdjust = "bonferroni",
-  comparisons = list(),
-  returnModels = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - -
x

S4 object of class AnalysisData

cls

a vector of sample info column names to analyse

pAdjust

p value adjustment method

comparisons

list of comparisons to perform

returnModels

should models be returned

- - -

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg[,200:300],abr1$fact)
-
-## Perform ANOVA
-anova_analysis <- anova(d,cls = 'day')
-
-## Extract significant features
-explanatoryFeatures(anova_analysis)
-#> # A tibble: 21 × 10
-#>    Response Comparison  Feature term        df  sumsq  meansq statistic  p.value
-#>    <chr>    <chr>       <chr>   <chr>    <dbl>  <dbl>   <dbl>     <dbl>    <dbl>
-#>  1 day      1~2~3~4~5~H N277    response     5 63072. 12614.       39.1 3.14e-23
-#>  2 day      1~2~3~4~5~H N229    response     5 43549.  8710.       18.1 3.54e-13
-#>  3 day      1~2~3~4~5~H N299    response     5  1211.   242.       16.4 3.87e-12
-#>  4 day      1~2~3~4~5~H N295    response     5   271.    54.2      13.6 2.02e-10
-#>  5 day      1~2~3~4~5~H N281    response     5   192.    38.5      12.5 1.16e- 9
-#>  6 day      1~2~3~4~5~H N245    response     5  6268.  1254.       11.6 4.38e- 9
-#>  7 day      1~2~3~4~5~H N255    response     5  5363.  1073.       11.0 1.14e- 8
-#>  8 day      1~2~3~4~5~H N278    response     5   277.    55.4      10.9 1.48e- 8
-#>  9 day      1~2~3~4~5~H N259    response     5  1236.   247.       10.8 1.72e- 8
-#> 10 day      1~2~3~4~5~H N279    response     5   810.   162.       10.5 2.77e- 8
-#> # … with 11 more rows, and 1 more variable: adjusted.p.value <dbl>
-
+
+
anova(
+  x,
+  cls = "class",
+  pAdjust = "bonferroni",
+  comparisons = list(),
+  returnModels = FALSE
+)
+
+# S4 method for AnalysisData
+anova(
+  x,
+  cls = "class",
+  pAdjust = "bonferroni",
+  comparisons = list(),
+  returnModels = FALSE
+)
+
+ +
+

Arguments

+
x
+

S4 object of class AnalysisData

+
cls
+

a vector of sample info column names to analyse

+
pAdjust
+

p value adjustment method

+
comparisons
+

list of comparisons to perform

+
returnModels
+

should models be returned

+
+ +
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact)
+
+## Perform ANOVA
+anova_analysis <- anova(d,cls = 'day')
+
+## Extract significant features
+explanatoryFeatures(anova_analysis)
+#> # A tibble: 21 × 10
+#>    Response Comparison  Feature term        df  sumsq  meansq statistic  p.value
+#>    <chr>    <chr>       <chr>   <chr>    <dbl>  <dbl>   <dbl>     <dbl>    <dbl>
+#>  1 day      1~2~3~4~5~H N277    response     5 63072. 12614.       39.1 3.14e-23
+#>  2 day      1~2~3~4~5~H N229    response     5 43549.  8710.       18.1 3.54e-13
+#>  3 day      1~2~3~4~5~H N299    response     5  1211.   242.       16.4 3.87e-12
+#>  4 day      1~2~3~4~5~H N295    response     5   271.    54.2      13.6 2.02e-10
+#>  5 day      1~2~3~4~5~H N281    response     5   192.    38.5      12.5 1.16e- 9
+#>  6 day      1~2~3~4~5~H N245    response     5  6268.  1254.       11.6 4.38e- 9
+#>  7 day      1~2~3~4~5~H N255    response     5  5363.  1073.       11.0 1.14e- 8
+#>  8 day      1~2~3~4~5~H N278    response     5   277.    55.4      10.9 1.48e- 8
+#>  9 day      1~2~3~4~5~H N259    response     5  1236.   247.       10.8 1.72e- 8
+#> 10 day      1~2~3~4~5~H N279    response     5   810.   162.       10.5 2.77e- 8
+#> # … with 11 more rows, and 1 more variable: adjusted.p.value <dbl>
+
+
+
-
- +
- - + + diff --git a/docs/reference/bind.html b/docs/reference/bind.html index 6ff98dbb..da7ea650 100644 --- a/docs/reference/bind.html +++ b/docs/reference/bind.html @@ -1,67 +1,12 @@ - - - - - - - -Bind AnalysisData objects by row — bindRows • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Bind AnalysisData objects by row — bindRows • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,65 +68,62 @@

Bind AnalysisData objects by row

Bind the rows of AnalysisData objects contained within a list.

-
bindRows(d)
-
-# S4 method for list
-bindRows(d)
+
+
bindRows(d)
 
-    

Arguments

- - - - - - -
d

list object containing S4 objects of class AnalysisData to be bound

- -

Value

+# S4 method for list +bindRows(d)
+
+
+

Arguments

+
d
+

list object containing S4 objects of class AnalysisData to be bound

+
+
+

Value

An S4 object of class AnalysisData containg the bound data sets.

+
-

Examples

-
library(metaboData)
-d <- list(
- negative = analysisData(abr1$neg,abr1$fact),
- positive = analysisData(abr1$pos,abr1$fact)
- )
-
-bindRows(d)
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 240 
-#> Features: 4000 
-#> Info: 9 
-#> 
-
+
+

Examples

+
library(metaboData)
+d <- list(
+ negative = analysisData(abr1$neg,abr1$fact),
+ positive = analysisData(abr1$pos,abr1$fact)
+ )
+
+bindRows(d)
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 240 
+#> Features: 4000 
+#> Info: 9 
+#> 
+
+
+
-
- +
- - + + diff --git a/docs/reference/changeParameter.html b/docs/reference/changeParameter.html index 7c28aeec..27ad36e5 100644 --- a/docs/reference/changeParameter.html +++ b/docs/reference/changeParameter.html @@ -1,67 +1,12 @@ - - - - - - - -Change analysis parameters — changeParameter<- • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Change analysis parameters — changeParameter<- • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,105 +68,97 @@

Change analysis parameters

Change analysis parameters.

-
changeParameter(x, parameterName, elements = analysisElements()) <- value
-
-# S4 method for AnalysisParameters
-changeParameter(x, parameterName, elements = analysisElements()) <- value
- -

Arguments

- - - - - - - - - - - - - - - - - - -
x

S4 object of class AnalysisParameters

parameterName

name of the parameter to change

elements

character vector of analysis elements to target parameter -change. Can be any returned by analysisElements().

value

New value of the parameter

- -

Value

+
+
changeParameter(x, parameterName, elements = analysisElements()) <- value
 
-    

An S4 object of class AnalysisParameters.

-

Details

+# S4 method for AnalysisParameters +changeParameter(x, parameterName, elements = analysisElements()) <- value
+
+
+

Arguments

+
x
+

S4 object of class AnalysisParameters

+
parameterName
+

name of the parameter to change

+
elements
+

character vector of analysis elements to target parameter +change. Can be any returned by analysisElements().

+
value
+

New value of the parameter

+
+
+

Value

+

An S4 object of class AnalysisParameters.

+
+
+

Details

For the parameter name selected, all parameters with that name will be altered.

+
-

Examples

-
p <- analysisParameters('pre-treatment')
-
-changeParameter(p,'cls') <- 'day'
-
-print(p)
-#> Parameters:
-#> pre-treatment
-#> 	QC
-#> 		occupancyFilter
-#> 			cls = day
-#> 			QCidx = QC
-#> 			occupancy = 2/3
-#> 		impute
-#> 			cls = day
-#> 			QCidx = QC
-#> 			occupancy = 2/3
-#> 			parallel = variables
-#> 			seed = 1234
-#> 		RSDfilter
-#> 			cls = day
-#> 			QCidx = QC
-#> 			RSDthresh = 50
-#> 		removeQC
-#> 			cls = day
-#> 			QCidx = QC
-#> 	occupancyFilter
-#> 		maximum
-#> 			cls = day
-#> 			occupancy = 2/3
-#> 	impute
-#> 		class
-#> 			cls = day
-#> 			occupancy = 2/3
-#> 			seed = 1234
-#> 	transform
-#> 		TICnorm
-#> 
-
+
+

Examples

+
p <- analysisParameters('pre-treatment')
+
+changeParameter(p,'cls') <- 'day'
+
+print(p)
+#> Parameters:
+#> pre-treatment
+#> 	QC
+#> 		occupancyFilter
+#> 			cls = day
+#> 			QCidx = QC
+#> 			occupancy = 2/3
+#> 		impute
+#> 			cls = day
+#> 			QCidx = QC
+#> 			occupancy = 2/3
+#> 			parallel = variables
+#> 			seed = 1234
+#> 		RSDfilter
+#> 			cls = day
+#> 			QCidx = QC
+#> 			RSDthresh = 50
+#> 		removeQC
+#> 			cls = day
+#> 			QCidx = QC
+#> 	occupancyFilter
+#> 		maximum
+#> 			cls = day
+#> 			occupancy = 2/3
+#> 	impute
+#> 		class
+#> 			cls = day
+#> 			occupancy = 2/3
+#> 			seed = 1234
+#> 	transform
+#> 		TICnorm
+#> 
+
+
+
-
- +
- - + + diff --git a/docs/reference/cls.html b/docs/reference/cls.html index 85a8806f..b7f0d4cb 100644 --- a/docs/reference/cls.html +++ b/docs/reference/cls.html @@ -1,69 +1,14 @@ - - - - - - - -Sample meta information wrangling — clsAdd • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Sample meta information wrangling — clsAdd • metabolyseR - - - - - - - - - - - - - + + -
-
- -
- -
+
@@ -142,202 +72,184 @@

Sample meta information wrangling

AnalysisData object.

-
clsAdd(d, cls, value, ...)
-
-# S4 method for AnalysisData
-clsAdd(d, cls, value)
-
-# S4 method for Analysis
-clsAdd(d, cls, value, type = c("raw", "pre-treated"))
+    
+
clsAdd(d, cls, value, ...)
 
-clsArrange(d, cls = "class", descending = FALSE, ...)
+# S4 method for AnalysisData
+clsAdd(d, cls, value)
 
-# S4 method for AnalysisData
-clsArrange(d, cls = "class", descending = FALSE)
+# S4 method for Analysis
+clsAdd(d, cls, value, type = c("raw", "pre-treated"))
 
-# S4 method for Analysis
-clsArrange(
-  d,
-  cls = "class",
-  descending = FALSE,
-  type = c("raw", "pre-treated")
-)
+clsArrange(d, cls = "class", descending = FALSE, ...)
 
-clsAvailable(d, ...)
+# S4 method for AnalysisData
+clsArrange(d, cls = "class", descending = FALSE)
 
-# S4 method for AnalysisData
-clsAvailable(d)
+# S4 method for Analysis
+clsArrange(
+  d,
+  cls = "class",
+  descending = FALSE,
+  type = c("raw", "pre-treated")
+)
 
-# S4 method for Analysis
-clsAvailable(d, type = c("raw", "pre-treated"))
+clsAvailable(d, ...)
 
-clsExtract(d, cls = "class", ...)
+# S4 method for AnalysisData
+clsAvailable(d)
 
-# S4 method for AnalysisData
-clsExtract(d, cls = "class")
+# S4 method for Analysis
+clsAvailable(d, type = c("raw", "pre-treated"))
 
-# S4 method for Analysis
-clsExtract(d, cls = "class", type = c("raw", "pre-treated"))
+clsExtract(d, cls = "class", ...)
 
-clsRemove(d, cls, ...)
+# S4 method for AnalysisData
+clsExtract(d, cls = "class")
 
-# S4 method for AnalysisData
-clsRemove(d, cls)
+# S4 method for Analysis
+clsExtract(d, cls = "class", type = c("raw", "pre-treated"))
 
-# S4 method for Analysis
-clsRemove(d, cls, type = c("raw", "pre-treated"))
+clsRemove(d, cls, ...)
 
-clsRename(d, cls, newName, ...)
+# S4 method for AnalysisData
+clsRemove(d, cls)
 
-# S4 method for AnalysisData
-clsRename(d, cls, newName)
+# S4 method for Analysis
+clsRemove(d, cls, type = c("raw", "pre-treated"))
 
-# S4 method for Analysis
-clsRename(d, cls, newName, type = c("raw", "pre-treated"))
+clsRename(d, cls, newName, ...)
 
-clsReplace(d, value, cls = "class", ...)
+# S4 method for AnalysisData
+clsRename(d, cls, newName)
 
-# S4 method for AnalysisData
-clsReplace(d, value, cls = "class")
+# S4 method for Analysis
+clsRename(d, cls, newName, type = c("raw", "pre-treated"))
 
-# S4 method for Analysis
-clsReplace(d, value, cls = "class", type = c("raw", "pre-treated"))
+clsReplace(d, value, cls = "class", ...) -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
d

S4 object of class Analysis or AnalysisData

cls

sample info column to extract

value

vactor of new sample information for replacement

...

arguments to pass to specific method

type

raw or pre-treated sample information

descending

TRUE/FALSE, arrange samples in descending order

newName

new column name

+# S4 method for AnalysisData +clsReplace(d, value, cls = "class") -

Methods

+# S4 method for Analysis +clsReplace(d, value, cls = "class", type = c("raw", "pre-treated"))
+
+
+

Arguments

+
d
+

S4 object of class Analysis or AnalysisData

+
cls
+

sample info column to extract

+
value
+

vactor of new sample information for replacement

+
...
+

arguments to pass to specific method

+
type
+

raw or pre-treated sample information

+
descending
+

TRUE/FALSE, arrange samples in descending order

+
newName
+

new column name

+
+
+

Methods

-
    -
  • clsAdd: Add a sample information column.

  • +
    • clsAdd: Add a sample information column.

    • clsArrange: Arrange sample row order by a specified sample information column.

    • clsAvailable: Retrieve the names of the available sample information columns.

    • clsExtract: Extract the values of a specified sample information column.

    • clsRemove: Remove a sample information column.

    • clsRename: Rename a sample information column.

    • clsReplace: Replace a sample information column.

    • -
    - - -

    Examples

    -
    library(metaboData)
    -d <- analysisData(abr1$neg,abr1$fact)
    -
    -## Add a sample information column named 'new'
    -d <- clsAdd(d,'new',1:nSamples(d))
    -
    -print(d)
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 120 
    -#> Features: 2000 
    -#> Info: 10 
    -#> 
    -
    -## Arrange the row orders by the 'day' column
    -d <- clsArrange(d,'day')
    -
    -clsExtract(d,'day')
    -#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
    -#>  [38] 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
    -#>  [75] 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 H H H H H H H H H H H
    -#> [112] H H H H H H H H H
    -#> Levels: 1 2 3 4 5 H
    -
    -## Retreive the available sample information column names
    -clsAvailable(d)
    -#>  [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"    
    -#>  [7] "rep"      "day"      "class"    "new"     
    -
    -## Extract the values of the 'day' column
    -clsExtract(d,'day')
    -#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
    -#>  [38] 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
    -#>  [75] 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 H H H H H H H H H H H
    -#> [112] H H H H H H H H H
    -#> Levels: 1 2 3 4 5 H
    -
    -## Remove the 'class' column
    -d <- clsRemove(d,'class')
    -
    -clsAvailable(d)
    -#> [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
    -#> [8] "day"      "new"     
    -
    -## Rename the 'day' column to 'treatment'
    -d <- clsRename(d,'day','treatment')
    -
    -clsAvailable(d)
    -#> [1] "injorder"  "pathcdf"   "filecdf"   "name.org"  "remark"    "name"     
    -#> [7] "rep"       "treatment" "new"      
    -
    -## Replace the values of the 'treatment' column
    -d <- clsReplace(d,rep(1,nSamples(d)),'treatment')
    -
    -clsExtract(d,'treatment')
    -#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    -#>  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    -#>  [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    -#> [112] 1 1 1 1 1 1 1 1 1
    -
    +
+ +
+

Examples

+
library(metaboData)
+d <- analysisData(abr1$neg,abr1$fact)
+
+## Add a sample information column named 'new'
+d <- clsAdd(d,'new',1:nSamples(d))
+
+print(d)
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 120 
+#> Features: 2000 
+#> Info: 10 
+#> 
+
+## Arrange the row orders by the 'day' column
+d <- clsArrange(d,'day')
+
+clsExtract(d,'day')
+#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
+#>  [38] 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
+#>  [75] 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 H H H H H H H H H H H
+#> [112] H H H H H H H H H
+#> Levels: 1 2 3 4 5 H
+
+## Retreive the available sample information column names
+clsAvailable(d)
+#>  [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"    
+#>  [7] "rep"      "day"      "class"    "new"     
+
+## Extract the values of the 'day' column
+clsExtract(d,'day')
+#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
+#>  [38] 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
+#>  [75] 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 H H H H H H H H H H H
+#> [112] H H H H H H H H H
+#> Levels: 1 2 3 4 5 H
+
+## Remove the 'class' column
+d <- clsRemove(d,'class')
+
+clsAvailable(d)
+#> [1] "injorder" "pathcdf"  "filecdf"  "name.org" "remark"   "name"     "rep"     
+#> [8] "day"      "new"     
+
+## Rename the 'day' column to 'treatment'
+d <- clsRename(d,'day','treatment')
+
+clsAvailable(d)
+#> [1] "injorder"  "pathcdf"   "filecdf"   "name.org"  "remark"    "name"     
+#> [7] "rep"       "treatment" "new"      
+
+## Replace the values of the 'treatment' column
+d <- clsReplace(d,rep(1,nSamples(d)),'treatment')
+
+clsExtract(d,'treatment')
+#>   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+#>  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+#>  [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+#> [112] 1 1 1 1 1 1 1 1 1
+
+
+ -
- +
- - + + diff --git a/docs/reference/correction-1.png b/docs/reference/correction-1.png index 949f19a9..2fa75dcc 100644 Binary files a/docs/reference/correction-1.png and b/docs/reference/correction-1.png differ diff --git a/docs/reference/correction-2.png b/docs/reference/correction-2.png index 5c9a5a69..ac5789e2 100644 Binary files a/docs/reference/correction-2.png and b/docs/reference/correction-2.png differ diff --git a/docs/reference/correction.html b/docs/reference/correction.html index c4f7af25..8446427f 100644 --- a/docs/reference/correction.html +++ b/docs/reference/correction.html @@ -1,67 +1,12 @@ - - - - - - - -Batch/block correction — correctionCenter • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Batch/block correction — correctionCenter • metabolyseR - - - - + + -
-
- -
- -
+
@@ -138,89 +68,81 @@

Batch/block correction

Correction of batch/block differences.

-
correctionCenter(d, block = "block", type = c("mean", "median"))
-
-# S4 method for AnalysisData
-correctionCenter(d, block = "block", type = c("mean", "median"))
- -

Arguments

- - - - - - - - - - - - - - -
d

S4 object of class AnalysisData

block

sample information column name to use containing sample block -groupings

type

type of average to use

- -

Value

+
+
correctionCenter(d, block = "block", type = c("mean", "median"))
 
-    

An S4 object of class AnalysisData containing the corrected data.

-

Details

+# S4 method for AnalysisData +correctionCenter(d, block = "block", type = c("mean", "median"))
+
+
+

Arguments

+
d
+

S4 object of class AnalysisData

+
block
+

sample information column name to use containing sample block +groupings

+
type
+

type of average to use

+
+
+

Value

+

An S4 object of class AnalysisData containing the corrected data.

+
+
+

Details

There can sometimes be artificial batch related variability introduced into metabolomics analyses as a result of analytical instrumentation or sample preparation. With an appropriate randomised block design of sample injection order, batch related variability can be corrected using an average centring correction method of the individual features.

-

Methods

- +
+
+

Methods

-
    -
  • correctionCenter: Correction using group average centring.

  • -
- - -

Examples

-

-## Initial example data preparation
-library(metaboData)
-
-d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% 
- occupancyMaximum(occupancy = 2/3)
- 
-## Group total ion count distributions prior to correction
-d %>% 
- plotTIC(by = 'day',colour = 'day')
-
- 
-## Group total ion count distributions after group median correction
-d %>% 
- correctionCenter(block = 'day',type = 'median') %>% 
- plotTIC(by = 'day',colour = 'day')
-
-
+
  • correctionCenter: Correction using group average centring.

  • +
+ +
+

Examples

+

+## Initial example data preparation
+library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% 
+ occupancyMaximum(occupancy = 2/3)
+ 
+## Group total ion count distributions prior to correction
+d %>% 
+ plotTIC(by = 'day',colour = 'day')
+
+ 
+## Group total ion count distributions after group median correction
+d %>% 
+ correctionCenter(block = 'day',type = 'median') %>% 
+ plotTIC(by = 'day',colour = 'day')
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/correlations.html b/docs/reference/correlations.html index a5c79cc3..956b823f 100644 --- a/docs/reference/correlations.html +++ b/docs/reference/correlations.html @@ -1,67 +1,12 @@ - - - - - - - -Feature correlation analysis — correlations • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Feature correlation analysis — correlations • metabolyseR - + + - - - -
-
- -
- -
+
@@ -138,99 +68,89 @@

Feature correlation analysis

Feature correlation analysis.

-
correlations(d, ...)
-
-# S4 method for AnalysisData
-correlations(
-  d,
-  method = "pearson",
-  pAdjustMethod = "bonferroni",
-  corPvalue = 0.05
-)
-
-# S4 method for Analysis
-correlations(d)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - -
d

S4 object of class AnalysisData

...

arguments to pass to specific method

method

correlation method. One of pearson or spearman.

pAdjustMethod

p-value adjustment method. See ?p.adjust for available methods.

corPvalue

p-value cut-off threshold for significance

- -

Value

+
+
correlations(d, ...)
 
-    

A tibble containing results of significantly correlated features.

-

Details

+# S4 method for AnalysisData +correlations( + d, + method = "pearson", + pAdjustMethod = "bonferroni", + corPvalue = 0.05 +) + +# S4 method for Analysis +correlations(d)
+
+
+

Arguments

+
d
+

S4 object of class AnalysisData

+
...
+

arguments to pass to specific method

+
method
+

correlation method. One of pearson or spearman.

+
pAdjustMethod
+

p-value adjustment method. See ?p.adjust for available methods.

+
corPvalue
+

p-value cut-off threshold for significance

+
+
+

Value

+

A tibble containing results of significantly correlated features.

+
+
+

Details

Correlation analyses can be used to identify associated features within data sets. This can be useful to identifying clusters of related features that can be used to annotate metabolites within data sets. All features are compared and the returned table of correlations are p-value thresholded using the specified cut-off.

+
-

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg[,200:300],abr1$fact)
-
-correlations(d)
-#> # A tibble: 130 × 7
-#>    Feature1 Feature2 log2IntensityRatio     r `|r|`           p     n
-#>    <chr>    <chr>                 <dbl> <dbl> <dbl>       <dbl> <int>
-#>  1 N212     N227                 -0.884 0.980 0.980 0.0107          7
-#>  2 N224     N286                  1.85  0.971 0.971 0.00612         8
-#>  3 N215     N276                  0.227 0.965 0.965 0.0419          7
-#>  4 N224     N265                  0.576 0.943 0.943 0.00138        11
-#>  5 N201     N275                 -1.59  0.909 0.909 0.0264         10
-#>  6 N213     N231                 -1.63  0.883 0.883 0             108
-#>  7 N224     N225                 -0.792 0.863 0.863 0.000000176    29
-#>  8 N258     N263                 -2.89  0.857 0.857 0.0181         13
-#>  9 N267     N297                 -0.671 0.853 0.853 0             120
-#> 10 N211     N291                 -1.55  0.831 0.831 0.00106        19
-#> # … with 120 more rows
-
+
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact)
+
+correlations(d)
+#> # A tibble: 130 × 7
+#>    Feature1 Feature2 log2IntensityRatio     r `|r|`           p     n
+#>    <chr>    <chr>                 <dbl> <dbl> <dbl>       <dbl> <int>
+#>  1 N212     N227                 -0.884 0.980 0.980 0.0107          7
+#>  2 N224     N286                  1.85  0.971 0.971 0.00612         8
+#>  3 N215     N276                  0.227 0.965 0.965 0.0419          7
+#>  4 N224     N265                  0.576 0.943 0.943 0.00138        11
+#>  5 N201     N275                 -1.59  0.909 0.909 0.0264         10
+#>  6 N213     N231                 -1.63  0.883 0.883 0             108
+#>  7 N224     N225                 -0.792 0.863 0.863 0.000000176    29
+#>  8 N258     N263                 -2.89  0.857 0.857 0.0181         13
+#>  9 N267     N297                 -0.671 0.853 0.853 0             120
+#> 10 N211     N291                 -1.55  0.831 0.831 0.00106        19
+#> # … with 120 more rows
+
+
+
-
- +
- - + + diff --git a/docs/reference/correlationsParameters.html b/docs/reference/correlationsParameters.html index 5c649a6e..8e6077fc 100644 --- a/docs/reference/correlationsParameters.html +++ b/docs/reference/correlationsParameters.html @@ -1,67 +1,12 @@ - - - - - - - -Correlations parameters — correlationsParameters • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Correlations parameters — correlationsParameters • metabolyseR - - + + - - -
-
- -
- -
+
@@ -138,50 +68,49 @@

Correlations parameters

Retrieve the default parameters for correlation analysis.

-
correlationsParameters()
- - - -

Examples

-
## Retrieve the default correlation parameters
-p <- correlationsParameters()
-
-## Assign the correlation parameters to analysis parameters
-cp <- analysisParameters('correlations')
-parameters(cp,'correlations') <- p
-
-print(cp)
-#> Parameters:
-#> correlations
-#> 	method = pearson
-#> 	pAdjustMethod = bonferroni
-#> 	corPvalue = 0.05
-
+
+
correlationsParameters()
+
+ + +
+

Examples

+
## Retrieve the default correlation parameters
+p <- correlationsParameters()
+
+## Assign the correlation parameters to analysis parameters
+cp <- analysisParameters('correlations')
+parameters(cp,'correlations') <- p
+
+print(cp)
+#> Parameters:
+#> correlations
+#> 	method = pearson
+#> 	pAdjustMethod = bonferroni
+#> 	corPvalue = 0.05
+
+
+
-
- +
- - + + diff --git a/docs/reference/impute-1.png b/docs/reference/impute-1.png index 831565bb..0cb7225d 100644 Binary files a/docs/reference/impute-1.png and b/docs/reference/impute-1.png differ diff --git a/docs/reference/impute-2.png b/docs/reference/impute-2.png index 9c32632f..feb285e4 100644 Binary files a/docs/reference/impute-2.png and b/docs/reference/impute-2.png differ diff --git a/docs/reference/impute-3.png b/docs/reference/impute-3.png index f595ffe2..8b551ec3 100644 Binary files a/docs/reference/impute-3.png and b/docs/reference/impute-3.png differ diff --git a/docs/reference/impute.html b/docs/reference/impute.html index 1730762e..04509b29 100644 --- a/docs/reference/impute.html +++ b/docs/reference/impute.html @@ -1,67 +1,12 @@ - - - - - - - -Missing data imputation — imputeAll • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Missing data imputation — imputeAll • metabolyseR - - - - + + -
-
- -
- -
+
@@ -138,110 +68,98 @@

Missing data imputation

Impute missing values using random forest imputation.

-
imputeAll(d, occupancy = 2/3, parallel = "variables", seed = 1234)
-
-# S4 method for AnalysisData
-imputeAll(d, occupancy = 2/3, parallel = "variables", seed = 1234)
-
-imputeClass(d, cls = "class", occupancy = 2/3, seed = 1234)
-
-# S4 method for AnalysisData
-imputeClass(d, cls = "class", occupancy = 2/3, seed = 1234)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - -
d

S4 object of class AnalysisData

occupancy

occupancy threshold above which missing values of a feature will be imputed

parallel

parallel type to use. See ?missForest for details

seed

random number seed

cls

info column to use for class labels

- -

Value

+
+
imputeAll(d, occupancy = 2/3, parallel = "variables", seed = 1234)
 
-    

An S4 object of class AnalysisData containing the data after imputation.

-

Details

+# S4 method for AnalysisData +imputeAll(d, occupancy = 2/3, parallel = "variables", seed = 1234) +imputeClass(d, cls = "class", occupancy = 2/3, seed = 1234) + +# S4 method for AnalysisData +imputeClass(d, cls = "class", occupancy = 2/3, seed = 1234)
+
+ +
+

Arguments

+
d
+

S4 object of class AnalysisData

+
occupancy
+

occupancy threshold above which missing values of a feature will be imputed

+
parallel
+

parallel type to use. See ?missForest for details

+
seed
+

random number seed

+
cls
+

info column to use for class labels

+
+
+

Value

+

An S4 object of class AnalysisData containing the data after imputation.

+
+
+

Details

Missing values can have an important influence on downstream analyses with zero values heavily influencing the outcomes of parametric tests. Where and how they are imputed are important considerations and is highly related to variable occupancy. The methods provided here allow both these aspects to be taken into account and utilise random forest imputation using the missForest package.

-

Methods

- +
+
+

Methods

-
    -
  • imputeAll: Impute missing values across all sample features.

  • +
    • imputeAll: Impute missing values across all sample features.

    • imputeClass: Impute missing values class-wise.

    • -
    - - -

    Examples

    -
    ## Each of the following examples shows the application of each imputation method and then 
    -## a Linear Discriminant Analysis is plotted to show it's effect on the data structure.
    -
    -## Initial example data preparation
    -library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:250],abr1$fact) %>% 
    - occupancyMaximum(occupancy = 2/3)
    -
    -d %>% 
    - plotLDA(cls = 'day')
    -
    - 
    -## Missing value imputation across all samples
    -d %>% 
    - imputeAll(parallel = 'no') %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Missing value imputation class-wise
    -d %>% 
    - imputeClass(cls = 'day') %>% 
    - plotLDA(cls = 'day')
    -
    -
    +
+ +
+

Examples

+
## Each of the following examples shows the application of each imputation method and then 
+## a Linear Discriminant Analysis is plotted to show it's effect on the data structure.
+
+## Initial example data preparation
+library(metaboData)
+
+d <- analysisData(abr1$neg[,200:250],abr1$fact) %>% 
+ occupancyMaximum(occupancy = 2/3)
+
+d %>% 
+ plotLDA(cls = 'day')
+
+ 
+## Missing value imputation across all samples
+d %>% 
+ imputeAll(parallel = 'no') %>% 
+ plotLDA(cls = 'day')
+
+
+## Missing value imputation class-wise
+d %>% 
+ imputeClass(cls = 'day') %>% 
+ plotLDA(cls = 'day')
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/index.html b/docs/reference/index.html index ed2a1cb7..5898e7da 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,66 +1,12 @@ - - - - - - - -Function reference • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Function reference • metabolyseR - + + - - - -
-
- -
- -
+
- - - - - - - - - - -
-

Analysis S4 classes

+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+

Analysis S4 classes

+

Analysis-class

Analysis S4 class

+

AnalysisData-class

AnalysisData S4 class

+

metabolyse() reAnalyse()

Perform an analysis

+

analysisData()

AnalysisData class constructor

+

dat() `dat<-`() sinfo() `sinfo<-`() raw() `raw<-`() preTreated() `preTreated<-`() features() nSamples() nFeatures() analysisResults()

AnalysisData and Analysis class accessors

+

clsAdd() clsArrange() clsAvailable() clsExtract() clsRemove() clsRename() clsReplace()

Sample meta information wrangling

-

Analysis parameters

+
+

Analysis parameters

+

AnalysisParameters-class

AnalysisParameters S4 class

+

analysisElements()

Analysis elements

+

analysisParameters()

Create an AnalysisParameters S4 class object

+

parameters() `parameters<-`()

Get or set analysis parameters

+

`changeParameter<-`()

Change analysis parameters

+

parseParameters() exportParameters()

Parse/export analysis parameters

+

preTreatmentElements() preTreatmentMethods() preTreatmentParameters()

Pre-treatment parameters

+

modellingMethods() modellingParameters()

Modelling parameters

+

correlationsParameters()

Correlations parameters

-

Pre-treatment

+
+

Pre-treatment

+

aggregateMean() aggregateMedian() aggregateSum()

Sample aggregation

+

correctionCenter()

Batch/block correction

+

imputeAll() imputeClass()

Missing data imputation

+

keepClasses() keepFeatures() keepSamples()

Keep samples, classes or features

+

occupancyMaximum() occupancyMinimum()

Feature occupancy filtering

+

QCimpute() QCoccupancy() QCremove() QCrsdFilter()

Quality control (QC) sample treatments

+

removeClasses() removeFeatures() removeSamples()

Remove samples, classes or features

+

transformArcSine() transformAuto() transformCenter() transformLevel() transformLn() transformLog10() transformPareto() transformRange() transformSQRT() transformTICnorm() transformVast()

Scaling, transformation and normalisation methods

-

Modelling

+
+

Modelling

+

RandomForest-class

RandomForest S4 class

+

Univariate-class

Univariate S4 class

+

randomForest()

Random forest analysis

+

anova()

ANOVA

+

ttest()

Welch's t-test

+

linearRegression()

Linear regression

+

binaryComparisons() type() response() metrics() importanceMetrics() importance() explanatoryFeatures()

Modelling accessor methods

-

Correlations

+
+

Correlations

+

correlations()

Feature correlation analysis

-

Plotting

+
+

Plotting

+

plotFeature()

Plot a feature

+

plotOccupancy()

Plot class occupancy distributions

+

plotRSD()

Plot RSD distributions

+

plotTIC()

Plot sample total ion counts

+

plotPCA()

Principle Component Analysis plot

+

plotLDA()

Principle Component - Linear Discriminant Analysis plot

+

plotUnsupervisedRF()

Unsupervised random forest MDS plot

+

plotSupervisedRF()

Supervised random forest MDS plot

+

plotMDS()

Multidimensional scaling (MDS) plot

+

plotROC()

Plot receiver operator characteristic (ROC) curves

+

plotMetrics()

Plot model performance metrics

+

plotImportance()

Plot feature importance

+

plotExplanatoryHeatmap()

Heatmap plot of explantory features

-

Miscellaneous

+
+

Miscellaneous

+

bindRows()

Bind AnalysisData objects by row

+

split()

Split an AnalysisData object

+

rsd()

Calculate feature relative standard deviations

+

occupancy()

Calculate feature class occupancies

- +
+
-
- +
- - + + diff --git a/docs/reference/io-parameters.html b/docs/reference/io-parameters.html index f5e1a4bf..87af50f8 100644 --- a/docs/reference/io-parameters.html +++ b/docs/reference/io-parameters.html @@ -1,67 +1,12 @@ - - - - - - - -Parse/export analysis parameters — parseParameters • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Parse/export analysis parameters — parseParameters • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,103 +68,95 @@

Parse/export analysis parameters

Import analysis parameters from a .yaml format file or export an AnalysisParameters object to .yaml format.

-
parseParameters(path)
-
-exportParameters(d, file = "analysis_parameters.yaml")
-
-# S4 method for AnalysisParameters
-exportParameters(d, file = "analysis_parameters.yaml")
-
-# S4 method for Analysis
-exportParameters(d, file = "analysis_parameters.yaml")
- -

Arguments

- - - - - - - - - - - - - - -
path

file path of .yaml file to parse

d

S4 object of class AnalysisParameters or Analysis

file

File name and path to export to

- - -

Examples

-
## Import analysis parameters
-paramFile <- system.file('defaultParameters.yaml',package = 'metabolyseR')
-p <- parseParameters(paramFile)
-p
-#> Parameters:
-#> pre-treatment
-#> 	QC
-#> 		occupancyFilter
-#> 			cls = class
-#> 			QCidx = QC
-#> 			occupancy = 0.667
-#> 		impute
-#> 			cls = class
-#> 			QCidx = QC
-#> 			occupancy = 0.667
-#> 		RSDfilter
-#> 			cls = class
-#> 			QCidx = QC
-#> 			RSDthresh = 0.5
-#> 		removeQC
-#> 			cls = class
-#> 			QCidx = QC
-#> 	occupancyFilter
-#> 		maximum
-#> 			cls = class
-#> 			occupancy = 0.667
-#> 	impute
-#> 		class
-#> 			cls = class
-#> 			occupancy = 0.667
-#> 	transform
-#> 		TICnorm
-#> 
-#> correlations
-#> 	method = pearson
-#> 	pAdjustMethod = bonferroni
-#> 	corPvalue = 0.05
-
-if (FALSE) {
-## Export analysis parameters
-exportParameters(p,file = 'analysis_parameters.yaml')
-}
-
+
+
parseParameters(path)
+
+exportParameters(d, file = "analysis_parameters.yaml")
+
+# S4 method for AnalysisParameters
+exportParameters(d, file = "analysis_parameters.yaml")
+
+# S4 method for Analysis
+exportParameters(d, file = "analysis_parameters.yaml")
+
+ +
+

Arguments

+
path
+

file path of .yaml file to parse

+
d
+

S4 object of class AnalysisParameters or Analysis

+
file
+

File name and path to export to

+
+ +
+

Examples

+
## Import analysis parameters
+paramFile <- system.file('defaultParameters.yaml',package = 'metabolyseR')
+p <- parseParameters(paramFile)
+p
+#> Parameters:
+#> pre-treatment
+#> 	QC
+#> 		occupancyFilter
+#> 			cls = class
+#> 			QCidx = QC
+#> 			occupancy = 0.667
+#> 		impute
+#> 			cls = class
+#> 			QCidx = QC
+#> 			occupancy = 0.667
+#> 		RSDfilter
+#> 			cls = class
+#> 			QCidx = QC
+#> 			RSDthresh = 0.5
+#> 		removeQC
+#> 			cls = class
+#> 			QCidx = QC
+#> 	occupancyFilter
+#> 		maximum
+#> 			cls = class
+#> 			occupancy = 0.667
+#> 	impute
+#> 		class
+#> 			cls = class
+#> 			occupancy = 0.667
+#> 	transform
+#> 		TICnorm
+#> 
+#> correlations
+#> 	method = pearson
+#> 	pAdjustMethod = bonferroni
+#> 	corPvalue = 0.05
+
+if (FALSE) {
+## Export analysis parameters
+exportParameters(p,file = 'analysis_parameters.yaml')
+}
+
+
+
-
- +
- - + + diff --git a/docs/reference/keep.html b/docs/reference/keep.html index b41ef80a..6b886cca 100644 --- a/docs/reference/keep.html +++ b/docs/reference/keep.html @@ -1,67 +1,12 @@ - - - - - - - -Keep samples, classes or features — keepClasses • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Keep samples, classes or features — keepClasses • metabolyseR - - - - + + -
-
- -
- -
+
@@ -138,126 +68,111 @@

Keep samples, classes or features

Retain samples, classes or features in an AnalysisData object.

-
keepClasses(d, cls = "class", classes = c())
+    
+
keepClasses(d, cls = "class", classes = c())
 
-# S4 method for AnalysisData
-keepClasses(d, cls = "class", classes = c())
+# S4 method for AnalysisData
+keepClasses(d, cls = "class", classes = c())
 
-keepFeatures(d, features = character())
+keepFeatures(d, features = character())
 
-# S4 method for AnalysisData
-keepFeatures(d, features = character())
+# S4 method for AnalysisData
+keepFeatures(d, features = character())
 
-keepSamples(d, idx = "fileOrder", samples = c())
+keepSamples(d, idx = "fileOrder", samples = c())
 
-# S4 method for AnalysisData
-keepSamples(d, idx = "fileOrder", samples = c())
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - -
d

S4 object of class AnalysisData

cls

info column to use for class information

classes

classes to keep

features

features to remove

idx

info column containing sample indexes

samples

sample indexes to keep

- -

Value

+# S4 method for AnalysisData +keepSamples(d, idx = "fileOrder", samples = c())
+
+
+

Arguments

+
d
+

S4 object of class AnalysisData

+
cls
+

info column to use for class information

+
classes
+

classes to keep

+
features
+

features to remove

+
idx
+

info column containing sample indexes

+
samples
+

sample indexes to keep

+
+
+

Value

An S4 object of class AnalysisData with specified samples, classes or features retained.

-

Methods

- +
+
+

Methods

-
    -
  • keepClasses: Keep classes.

  • +
    • keepClasses: Keep classes.

    • keepFeatures: Keep features.

    • keepSamples: Keep samples.

    • -
    - - -

    Examples

    -
    library(metaboData)
    - d <- analysisData(abr1$neg[,200:300],abr1$fact)
    - 
    - ## Keep classes
    - d %>% 
    -  keepClasses(cls = 'day',classes = 'H')
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 20 
    -#> Features: 101 
    -#> Info: 9 
    -#> 
    - 
    - ## Keep features
    - d %>% 
    -  keepFeatures(features = c('N200','N201'))
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 120 
    -#> Features: 2 
    -#> Info: 9 
    -#> 
    - 
    - ## Keep samples
    - d %>% 
    -  keepSamples(idx = 'injorder',samples = c(1,10))
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 2 
    -#> Features: 101 
    -#> Info: 9 
    -#> 
    -
    +
+ +
+

Examples

+
library(metaboData)
+ d <- analysisData(abr1$neg[,200:300],abr1$fact)
+ 
+ ## Keep classes
+ d %>% 
+  keepClasses(cls = 'day',classes = 'H')
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 20 
+#> Features: 101 
+#> Info: 9 
+#> 
+ 
+ ## Keep features
+ d %>% 
+  keepFeatures(features = c('N200','N201'))
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 120 
+#> Features: 2 
+#> Info: 9 
+#> 
+ 
+ ## Keep samples
+ d %>% 
+  keepSamples(idx = 'injorder',samples = c(1,10))
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 2 
+#> Features: 101 
+#> Info: 9 
+#> 
+
+
+ -
- +
- - + + diff --git a/docs/reference/linearRegression.html b/docs/reference/linearRegression.html index 6becc828..5ee96f09 100644 --- a/docs/reference/linearRegression.html +++ b/docs/reference/linearRegression.html @@ -1,67 +1,12 @@ - - - - - - - -Linear regression — linearRegression • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Linear regression — linearRegression • metabolyseR - - + + - - -
-
- -
- -
+
@@ -138,92 +68,83 @@

Linear regression

Linear regression

-
linearRegression(
-  x,
-  cls = "class",
-  pAdjust = "bonferroni",
-  returnModels = FALSE
-)
-
-# S4 method for AnalysisData
-linearRegression(
-  x,
-  cls = "class",
-  pAdjust = "bonferroni",
-  returnModels = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
x

S4 object of class AnalysisData

cls

vector of sample information column names to regress

pAdjust

p value adjustment method

returnModels

should models be returned

- -

Value

+
+
linearRegression(
+  x,
+  cls = "class",
+  pAdjust = "bonferroni",
+  returnModels = FALSE
+)
+
+# S4 method for AnalysisData
+linearRegression(
+  x,
+  cls = "class",
+  pAdjust = "bonferroni",
+  returnModels = FALSE
+)
+
+
+

Arguments

+
x
+

S4 object of class AnalysisData

+
cls
+

vector of sample information column names to regress

+
pAdjust
+

p value adjustment method

+
returnModels
+

should models be returned

+
+
+

Value

An S4 object of class Univariate.

+
-

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg[,200:300],abr1$fact)
-
-## Perform linear regression
-lr_analysis <- linearRegression(d,cls = 'injorder')
-
-## Extract significant features
-explanatoryFeatures(lr_analysis)
-#> # A tibble: 5 × 15
-#>   Response Feature r.squared adj.r.squared sigma statistic  p.value    df logLik
-#>   <chr>    <chr>       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl>
-#> 1 injorder N283        0.310        0.304   4.27      53.0 4.10e-11     1  -343.
-#> 2 injorder N221        0.140        0.133   5.87      19.3 2.50e- 5     1  -382.
-#> 3 injorder N255        0.119        0.111  11.1       15.9 1.17e- 4     1  -458.
-#> 4 injorder N267        0.118        0.111  26.4       15.8 1.22e- 4     1  -562.
-#> 5 injorder N297        0.107        0.0995 44.7       14.1 2.65e- 4     1  -625.
-#> # … with 6 more variables: AIC <dbl>, BIC <dbl>, deviance <dbl>,
-#> #   df.residual <int>, nobs <int>, adjusted.p.value <dbl>
-
+
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact)
+
+## Perform linear regression
+lr_analysis <- linearRegression(d,cls = 'injorder')
+
+## Extract significant features
+explanatoryFeatures(lr_analysis)
+#> # A tibble: 5 × 15
+#>   Response Feature r.squared adj.r.squared sigma statistic  p.value    df logLik
+#>   <chr>    <chr>       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl>
+#> 1 injorder N283        0.310        0.304   4.27      53.0 4.10e-11     1  -343.
+#> 2 injorder N221        0.140        0.133   5.87      19.3 2.50e- 5     1  -382.
+#> 3 injorder N255        0.119        0.111  11.1       15.9 1.17e- 4     1  -458.
+#> 4 injorder N267        0.118        0.111  26.4       15.8 1.22e- 4     1  -562.
+#> 5 injorder N297        0.107        0.0995 44.7       14.1 2.65e- 4     1  -625.
+#> # … with 6 more variables: AIC <dbl>, BIC <dbl>, deviance <dbl>,
+#> #   df.residual <int>, nobs <int>, adjusted.p.value <dbl>
+
+
+
-
- +
- - + + diff --git a/docs/reference/metabolyse.html b/docs/reference/metabolyse.html index 88ce0442..d007df47 100644 --- a/docs/reference/metabolyse.html +++ b/docs/reference/metabolyse.html @@ -1,67 +1,12 @@ - - - - - - - -Perform an analysis — metabolyse • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Perform an analysis — metabolyse • metabolyseR - + + - - - -
-
- -
- -
+
@@ -138,167 +68,157 @@

Perform an analysis

Perform analyses containing multiple analysis element steps.

-
metabolyse(data, info, parameters = analysisParameters(), verbose = TRUE)
+    
+
metabolyse(data, info, parameters = analysisParameters(), verbose = TRUE)
 
-reAnalyse(analysis, parameters = analysisParameters(), verbose = TRUE)
+reAnalyse(analysis, parameters = analysisParameters(), verbose = TRUE)
 
-# S4 method for Analysis
-reAnalyse(analysis, parameters = analysisParameters(), verbose = TRUE)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - -
data

tibble or data.frame containing data to analyse

info

tibble or data.frame containing data info or meta data

parameters

an object of AnalysisParameters class containing -parameters for analysis. Default calls analysisParameters()

verbose

should output be printed to the console

analysis

an object of class Analysis containing previous -analysis results

- -

Value

+# S4 method for Analysis +reAnalyse(analysis, parameters = analysisParameters(), verbose = TRUE)
+
+
+

Arguments

+
data
+

tibble or data.frame containing data to analyse

+
info
+

tibble or data.frame containing data info or meta data

+
parameters
+

an object of AnalysisParameters class containing +parameters for analysis. Default calls analysisParameters()

+
verbose
+

should output be printed to the console

+
analysis
+

an object of class Analysis containing previous +analysis results

+
+
+

Value

An S4 object of class Analysis.

-

Details

- +
+
+

Details

Routine analyses are those that are often made up of numerous steps where parameters have likely already been previously established. The emphasis here is on convenience with as little code as possible required. In these analyses, the necessary analysis elements, order and parameters are first prepared and then the analysis routine subsequently performed in a single step. The metabolyse function provides this utility, where the metabolome data, sample meta information and analysis parameters are provided. The reAnalyse method can be used to perform further analyses on the results.

+
-

Examples

-
library(metaboData)
-
-## Generate analysis parameters
-p <- analysisParameters(c('pre-treatment','modelling'))
-
-## Alter pre-treatment and modelling parameters to use different methods
-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(occupancyFilter = 'maximum',
-       transform = 'TICnorm')
-)
-parameters(p,'modelling') <- modellingParameters('anova')
-
-## Change "cls" parameters 
-changeParameter(p,'cls') <- 'day'
-
-## Run analysis using a subset of the abr1 negative mode data set
-analysis <- metabolyse(abr1$neg[,1:200],
-                       abr1$fact,
-                       p)
-#> 
-#> metabolyseR  v0.14.6 Wed Nov 17 10:24:15 2021
-#> ________________________________________________________________________________
-#> Parameters:
-#> pre-treatment
-#> 	occupancyFilter
-#> 		maximum
-#> 			cls = day
-#> 			occupancy = 2/3
-#> 	transform
-#> 		TICnorm
-#> 
-#> modelling
-#> 	anova
-#> 		cls = day
-#> 		pAdjust = bonferroni
-#> 		comparisons = list()
-#> 		returnModels = FALSE
-#> ________________________________________________________________________________
-#> Pre-treatment 
-#> 
Pre-treatment 	 [0.9S]
-#> Modelling 
-#> 
Modelling 	 [0.6S]
-#> ________________________________________________________________________________
-#> 
-#> Complete! [1.5S]
-                       
-## Re-analyse to include correlation analysis
-analysis <- reAnalyse(analysis,
-                      parameters = analysisParameters('correlations'))
-#> 
-#> metabolyseR v0.14.6 Wed Nov 17 10:24:16 2021
-#> ________________________________________________________________________________
-#> Parameters:
-#> correlations
-#> 	method = pearson
-#> 	pAdjustMethod = bonferroni
-#> 	corPvalue = 0.05
-#> ________________________________________________________________________________
-#> 
-#> Correlations 
-#> 
Correlations 	 [0.1S]
-#> ________________________________________________________________________________
-#> 
-#> Complete! [0.1S]
-#> 
-
-print(analysis)
-#> 
-#> metabolyseR v0.14.6
-#> Analysis:
-#> 	Wed Nov 17 10:24:15 2021
-#> 
-#> 	Raw Data:
-#> 		No. samples = 120
-#> 		No. features = 200
-#> 
-#> 	Pre-treated Data:
-#> 		Wed Nov 17 10:24:16 2021
-#> 		No. samples = 120
-#> 		No. features = 48
-#> 
-#> 	Modelling:
-#> 		Wed Nov 17 10:24:16 2021
-#> 		Methods: anova
-#> 
-#> 	Correlations:
-#> 		Wed Nov 17 10:24:17 2021
-#> 		No. correlations = 140
-
-
+
+

Examples

+
library(metaboData)
+
+## Generate analysis parameters
+p <- analysisParameters(c('pre-treatment','modelling'))
+
+## Alter pre-treatment and modelling parameters to use different methods
+parameters(p,'pre-treatment') <- preTreatmentParameters(
+  list(occupancyFilter = 'maximum',
+       transform = 'TICnorm')
+)
+parameters(p,'modelling') <- modellingParameters('anova')
+
+## Change "cls" parameters 
+changeParameter(p,'cls') <- 'day'
+
+## Run analysis using a subset of the abr1 negative mode data set
+analysis <- metabolyse(abr1$neg[,1:200],
+                       abr1$fact,
+                       p)
+#> 
+#> metabolyseR  v0.14.7 Fri Dec 17 17:55:29 2021
+#> ________________________________________________________________________________
+#> Parameters:
+#> pre-treatment
+#> 	occupancyFilter
+#> 		maximum
+#> 			cls = day
+#> 			occupancy = 2/3
+#> 	transform
+#> 		TICnorm
+#> 
+#> modelling
+#> 	anova
+#> 		cls = day
+#> 		pAdjust = bonferroni
+#> 		comparisons = list()
+#> 		returnModels = FALSE
+#> ________________________________________________________________________________
+#> Pre-treatment 
+#> 
Pre-treatment 	 [0.8S]
+#> Modelling 
+#> 
Modelling 	 [0.6S]
+#> ________________________________________________________________________________
+#> 
+#> Complete! [1.4S]
+                       
+## Re-analyse to include correlation analysis
+analysis <- reAnalyse(analysis,
+                      parameters = analysisParameters('correlations'))
+#> 
+#> metabolyseR v0.14.7 Fri Dec 17 17:55:30 2021
+#> ________________________________________________________________________________
+#> Parameters:
+#> correlations
+#> 	method = pearson
+#> 	pAdjustMethod = bonferroni
+#> 	corPvalue = 0.05
+#> ________________________________________________________________________________
+#> 
+#> Correlations 
+#> 
Correlations 	 [0.1S]
+#> ________________________________________________________________________________
+#> 
+#> Complete! [0.1S]
+#> 
+
+print(analysis)
+#> 
+#> metabolyseR v0.14.7
+#> Analysis:
+#> 	Fri Dec 17 17:55:29 2021
+#> 
+#> 	Raw Data:
+#> 		No. samples = 120
+#> 		No. features = 200
+#> 
+#> 	Pre-treated Data:
+#> 		Fri Dec 17 17:55:30 2021
+#> 		No. samples = 120
+#> 		No. features = 48
+#> 
+#> 	Modelling:
+#> 		Fri Dec 17 17:55:30 2021
+#> 		Methods: anova
+#> 
+#> 	Correlations:
+#> 		Fri Dec 17 17:55:30 2021
+#> 		No. correlations = 140
+
+
+
+ -
- +
- - + + diff --git a/docs/reference/modelling-accessors.html b/docs/reference/modelling-accessors.html index 582afd84..b2f464b6 100644 --- a/docs/reference/modelling-accessors.html +++ b/docs/reference/modelling-accessors.html @@ -1,67 +1,12 @@ - - - - - - - -Modelling accessor methods — binaryComparisons • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Modelling accessor methods — binaryComparisons • metabolyseR - - - - + + -
-
- -
- -
+
@@ -138,202 +68,188 @@

Modelling accessor methods

Methods for accessing modelling results.

-
binaryComparisons(x, cls = "class")
-
-# S4 method for AnalysisData
-binaryComparisons(x, cls = "class")
-
-type(x)
+    
+
binaryComparisons(x, cls = "class")
 
-# S4 method for RandomForest
-type(x)
+# S4 method for AnalysisData
+binaryComparisons(x, cls = "class")
 
-response(x)
+type(x)
 
-# S4 method for RandomForest
-response(x)
+# S4 method for RandomForest
+type(x)
 
-metrics(x)
+response(x)
 
-# S4 method for RandomForest
-metrics(x)
+# S4 method for RandomForest
+response(x)
 
-# S4 method for list
-metrics(x)
+metrics(x)
 
-# S4 method for Analysis
-metrics(x)
+# S4 method for RandomForest
+metrics(x)
 
-importanceMetrics(x)
+# S4 method for list
+metrics(x)
 
-# S4 method for RandomForest
-importanceMetrics(x)
+# S4 method for Analysis
+metrics(x)
 
-importance(x)
+importanceMetrics(x)
 
-# S4 method for RandomForest
-importance(x)
+# S4 method for RandomForest
+importanceMetrics(x)
 
-# S4 method for Univariate
-importance(x)
+importance(x)
 
-# S4 method for list
-importance(x)
+# S4 method for RandomForest
+importance(x)
 
-# S4 method for Analysis
-importance(x)
+# S4 method for Univariate
+importance(x)
 
-explanatoryFeatures(x, ...)
+# S4 method for list
+importance(x)
 
-# S4 method for Univariate
-explanatoryFeatures(x, threshold = 0.05)
+# S4 method for Analysis
+importance(x)
 
-# S4 method for RandomForest
-explanatoryFeatures(x, metric = "FalsePositiveRate", threshold = 0.05)
+explanatoryFeatures(x, ...)
 
-# S4 method for list
-explanatoryFeatures(x, ...)
+# S4 method for Univariate
+explanatoryFeatures(x, threshold = 0.05)
 
-# S4 method for Analysis
-explanatoryFeatures(x, ...)
+# S4 method for RandomForest +explanatoryFeatures(x, metric = "FalsePositiveRate", threshold = 0.05) -

Arguments

- - - - - - - - - - - - - - - - - - - - - - -
x

S4 object of class AnalysisData,RandomForest, Univariate, Analysis or a list.

cls

sample information column to use

...

arguments to parse to method for specific class

threshold

threshold below which explanatory features are extracted

metric

importance metric for which to retrieve explanatory features

+# S4 method for list +explanatoryFeatures(x, ...) -

Methods

+# S4 method for Analysis +explanatoryFeatures(x, ...)
+
+
+

Arguments

+
x
+

S4 object of class AnalysisData,RandomForest, Univariate, Analysis or a list.

+
cls
+

sample information column to use

+
...
+

arguments to parse to method for specific class

+
threshold
+

threshold below which explanatory features are extracted

+
metric
+

importance metric for which to retrieve explanatory features

+
+
+

Methods

-
    -
  • binaryComparisons: Return a vector of all possible binary comparisons for a given sample information column.

  • +
    • binaryComparisons: Return a vector of all possible binary comparisons for a given sample information column.

    • type: Return the type of random forest analysis.

    • response: Return the response variable name used for a random forest analysis.

    • metrics: Retrieve the model performance metrics for a random forest analysis

    • importanceMetrics: Retrieve the available feature importance metrics for a random forest analysis.

    • importance: Retrieve feature importance results.

    • explanatoryFeatures: Retrieve explanatory features.

    • -
    - - -

    Examples

    -
    library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    -
    -## Return possible binary comparisons for the 'day' column
    -binaryComparisons(d,cls = 'day')
    -#>  [1] "1~2" "1~3" "1~4" "1~5" "1~H" "2~3" "2~4" "2~5" "2~H" "3~4" "3~5" "3~H"
    -#> [13] "4~5" "4~H" "5~H"
    -
    -## Perform random forest analysis
    -rf_analysis <- randomForest(d,cls = 'day')
    -
    -## Return the type of random forest
    -type(rf_analysis)
    -#> [1] "classification"
    -
    -## Return the response variable name used
    -response(rf_analysis)
    -#> [1] "day"
    -
    -## Retrieve the model performance metrics
    -metrics(rf_analysis)
    -#> # A tibble: 4 × 5
    -#>   Response Comparison  .metric  .estimator .estimate
    -#>   <chr>    <chr>       <chr>    <chr>          <dbl>
    -#> 1 day      1~2~3~4~5~H accuracy multiclass    0.567 
    -#> 2 day      1~2~3~4~5~H kap      multiclass    0.48  
    -#> 3 day      1~2~3~4~5~H roc_auc  hand_till     0.886 
    -#> 4 day      1~2~3~4~5~H margin   NA            0.0424
    -
    -## Show the available feature importance metrics
    -importanceMetrics(rf_analysis)
    -#>  [1] "1"                    "2"                    "3"                   
    -#>  [4] "4"                    "5"                    "FalsePositiveRate"   
    -#>  [7] "H"                    "MeanDecreaseAccuracy" "MeanDecreaseGini"    
    -#> [10] "SelectionFrequency"  
    -
    -## Retrieve the feature importance results
    -importance(rf_analysis)
    -#> # A tibble: 1,010 × 5
    -#>    Response Comparison  Feature Metric                  Value
    -#>    <chr>    <chr>       <chr>   <chr>                   <dbl>
    -#>  1 day      1~2~3~4~5~H N200    1                    0       
    -#>  2 day      1~2~3~4~5~H N200    2                    0       
    -#>  3 day      1~2~3~4~5~H N200    3                    0       
    -#>  4 day      1~2~3~4~5~H N200    4                    0       
    -#>  5 day      1~2~3~4~5~H N200    5                    0       
    -#>  6 day      1~2~3~4~5~H N200    FalsePositiveRate    2.35e-40
    -#>  7 day      1~2~3~4~5~H N200    H                    0       
    -#>  8 day      1~2~3~4~5~H N200    MeanDecreaseAccuracy 0       
    -#>  9 day      1~2~3~4~5~H N200    MeanDecreaseGini     6.00e- 2
    -#> 10 day      1~2~3~4~5~H N200    SelectionFrequency   1.6 e+ 1
    -#> # … with 1,000 more rows
    -
    -## Retrieve the explanatory features
    -explanatoryFeatures(rf_analysis,metric = 'FalsePositiveRate',threshold = 0.05)
    -#> # A tibble: 35 × 5
    -#>    Response Comparison  Feature Metric                Value
    -#>    <chr>    <chr>       <chr>   <chr>                 <dbl>
    -#>  1 day      1~2~3~4~5~H N229    FalsePositiveRate 5.75e-129
    -#>  2 day      1~2~3~4~5~H N259    FalsePositiveRate 4.88e- 72
    -#>  3 day      1~2~3~4~5~H N277    FalsePositiveRate 3.98e- 67
    -#>  4 day      1~2~3~4~5~H N255    FalsePositiveRate 3.27e- 53
    -#>  5 day      1~2~3~4~5~H N213    FalsePositiveRate 4.92e- 45
    -#>  6 day      1~2~3~4~5~H N200    FalsePositiveRate 2.35e- 40
    -#>  7 day      1~2~3~4~5~H N221    FalsePositiveRate 1.80e- 38
    -#>  8 day      1~2~3~4~5~H N299    FalsePositiveRate 4.91e- 36
    -#>  9 day      1~2~3~4~5~H N245    FalsePositiveRate 9.75e- 27
    -#> 10 day      1~2~3~4~5~H N279    FalsePositiveRate 2.38e- 20
    -#> # … with 25 more rows
    -
    +
+ +
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact)
+
+## Return possible binary comparisons for the 'day' column
+binaryComparisons(d,cls = 'day')
+#>  [1] "1~2" "1~3" "1~4" "1~5" "1~H" "2~3" "2~4" "2~5" "2~H" "3~4" "3~5" "3~H"
+#> [13] "4~5" "4~H" "5~H"
+
+## Perform random forest analysis
+rf_analysis <- randomForest(d,cls = 'day')
+
+## Return the type of random forest
+type(rf_analysis)
+#> [1] "classification"
+
+## Return the response variable name used
+response(rf_analysis)
+#> [1] "day"
+
+## Retrieve the model performance metrics
+metrics(rf_analysis)
+#> # A tibble: 4 × 5
+#>   Response Comparison  .metric  .estimator .estimate
+#>   <chr>    <chr>       <chr>    <chr>          <dbl>
+#> 1 day      1~2~3~4~5~H accuracy multiclass    0.567 
+#> 2 day      1~2~3~4~5~H kap      multiclass    0.48  
+#> 3 day      1~2~3~4~5~H roc_auc  hand_till     0.886 
+#> 4 day      1~2~3~4~5~H margin   NA            0.0424
+
+## Show the available feature importance metrics
+importanceMetrics(rf_analysis)
+#>  [1] "1"                    "2"                    "3"                   
+#>  [4] "4"                    "5"                    "FalsePositiveRate"   
+#>  [7] "H"                    "MeanDecreaseAccuracy" "MeanDecreaseGini"    
+#> [10] "SelectionFrequency"  
+
+## Retrieve the feature importance results
+importance(rf_analysis)
+#> # A tibble: 1,010 × 5
+#>    Response Comparison  Feature Metric                  Value
+#>    <chr>    <chr>       <chr>   <chr>                   <dbl>
+#>  1 day      1~2~3~4~5~H N200    1                    0       
+#>  2 day      1~2~3~4~5~H N200    2                    0       
+#>  3 day      1~2~3~4~5~H N200    3                    0       
+#>  4 day      1~2~3~4~5~H N200    4                    0       
+#>  5 day      1~2~3~4~5~H N200    5                    0       
+#>  6 day      1~2~3~4~5~H N200    FalsePositiveRate    2.35e-40
+#>  7 day      1~2~3~4~5~H N200    H                    0       
+#>  8 day      1~2~3~4~5~H N200    MeanDecreaseAccuracy 0       
+#>  9 day      1~2~3~4~5~H N200    MeanDecreaseGini     6.00e- 2
+#> 10 day      1~2~3~4~5~H N200    SelectionFrequency   1.6 e+ 1
+#> # … with 1,000 more rows
+
+## Retrieve the explanatory features
+explanatoryFeatures(rf_analysis,metric = 'FalsePositiveRate',threshold = 0.05)
+#> # A tibble: 35 × 5
+#>    Response Comparison  Feature Metric                Value
+#>    <chr>    <chr>       <chr>   <chr>                 <dbl>
+#>  1 day      1~2~3~4~5~H N229    FalsePositiveRate 5.75e-129
+#>  2 day      1~2~3~4~5~H N259    FalsePositiveRate 4.88e- 72
+#>  3 day      1~2~3~4~5~H N277    FalsePositiveRate 3.98e- 67
+#>  4 day      1~2~3~4~5~H N255    FalsePositiveRate 3.27e- 53
+#>  5 day      1~2~3~4~5~H N213    FalsePositiveRate 4.92e- 45
+#>  6 day      1~2~3~4~5~H N200    FalsePositiveRate 2.35e- 40
+#>  7 day      1~2~3~4~5~H N221    FalsePositiveRate 1.80e- 38
+#>  8 day      1~2~3~4~5~H N299    FalsePositiveRate 4.91e- 36
+#>  9 day      1~2~3~4~5~H N245    FalsePositiveRate 9.75e- 27
+#> 10 day      1~2~3~4~5~H N279    FalsePositiveRate 2.38e- 20
+#> # … with 25 more rows
+
+
+ -
- +
- - + + diff --git a/docs/reference/modelling-parameters.html b/docs/reference/modelling-parameters.html index 86d2b8a4..8a364e9f 100644 --- a/docs/reference/modelling-parameters.html +++ b/docs/reference/modelling-parameters.html @@ -1,67 +1,12 @@ - - - - - - - -Modelling parameters — modellingMethods • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Modelling parameters — modellingMethods • metabolyseR - - + + - - -
-
- -
- -
+
@@ -138,68 +68,64 @@

Modelling parameters

Retrieve the available modelling methods and parameters.

-
modellingMethods()
-
-modellingParameters(methods)
- -

Arguments

- - - - - - -
methods

character vector of available modelling methods

- - -

Examples

-
## Retrieve the available modelling methods
-modellingMethods()
-#> [1] "anova"            "ttest"            "linearRegression" "randomForest"    
-
-## Retrieve the modelling parameters for the anova method
-p <- modellingParameters('anova')
-
-## Assign the modelling parameters to analysis parameters
-mp <- analysisParameters('modelling')
-
-parameters(mp,'modelling') <- p
-
-print(mp)
-#> Parameters:
-#> modelling
-#> 	anova
-#> 		cls = class
-#> 		pAdjust = bonferroni
-#> 		comparisons = list()
-#> 		returnModels = FALSE
-#> 
-
+
+
modellingMethods()
+
+modellingParameters(methods)
+
+ +
+

Arguments

+
methods
+

character vector of available modelling methods

+
+ +
+

Examples

+
## Retrieve the available modelling methods
+modellingMethods()
+#> [1] "anova"            "ttest"            "linearRegression" "randomForest"    
+
+## Retrieve the modelling parameters for the anova method
+p <- modellingParameters('anova')
+
+## Assign the modelling parameters to analysis parameters
+mp <- analysisParameters('modelling')
+
+parameters(mp,'modelling') <- p
+
+print(mp)
+#> Parameters:
+#> modelling
+#> 	anova
+#> 		cls = class
+#> 		pAdjust = bonferroni
+#> 		comparisons = list()
+#> 		returnModels = FALSE
+#> 
+
+
+
-
- +
- - + + diff --git a/docs/reference/occupancy.html b/docs/reference/occupancy.html index 11d12072..5785ed4c 100644 --- a/docs/reference/occupancy.html +++ b/docs/reference/occupancy.html @@ -1,67 +1,12 @@ - - - - - - - -Calculate feature class occupancies — occupancy • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Calculate feature class occupancies — occupancy • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,74 +68,69 @@

Calculate feature class occupancies

Calculate the class occupancies of all features in an AnalysisData object.

-
occupancy(d, cls = "class")
-
-# S4 method for AnalysisData
-occupancy(d, cls = "class")
+
+
occupancy(d, cls = "class")
 
-    

Arguments

- - - - - - - - - - -
d

S4 object of class AnalysisData

cls

sample information column to use for which to compute class occupancies

- -

Value

+# S4 method for AnalysisData +occupancy(d, cls = "class")
+
+
+

Arguments

+
d
+

S4 object of class AnalysisData

+
cls
+

sample information column to use for which to compute class occupancies

+
+
+

Value

A tibble containing feature class proportional occupancies.

+
-

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg[,200:300],abr1$fact)
-
-occupancy(d,cls = 'day')
-#> # A tibble: 596 × 5
-#>    day   Feature     N `Class total` Occupancy
-#>    <fct> <chr>   <dbl>         <int>     <dbl>
-#>  1 1     N200        1            20      0.05
-#>  2 1     N201        3            20      0.15
-#>  3 1     N202        3            20      0.15
-#>  4 1     N203       19            20      0.95
-#>  5 1     N204        4            20      0.2 
-#>  6 1     N205       17            20      0.85
-#>  7 1     N206        4            20      0.2 
-#>  8 1     N207        8            20      0.4 
-#>  9 1     N208        7            20      0.35
-#> 10 1     N209       16            20      0.8 
-#> # … with 586 more rows
-
+
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact)
+
+occupancy(d,cls = 'day')
+#> # A tibble: 596 × 5
+#>    day   Feature     N `Class total` Occupancy
+#>    <fct> <chr>   <dbl>         <int>     <dbl>
+#>  1 1     N200        1            20      0.05
+#>  2 1     N201        3            20      0.15
+#>  3 1     N202        3            20      0.15
+#>  4 1     N203       19            20      0.95
+#>  5 1     N204        4            20      0.2 
+#>  6 1     N205       17            20      0.85
+#>  7 1     N206        4            20      0.2 
+#>  8 1     N207        8            20      0.4 
+#>  9 1     N208        7            20      0.35
+#> 10 1     N209       16            20      0.8 
+#> # … with 586 more rows
+
+
+
-
- +
- - + + diff --git a/docs/reference/occupancyFilter.html b/docs/reference/occupancyFilter.html index 0e920a5d..56e2316a 100644 --- a/docs/reference/occupancyFilter.html +++ b/docs/reference/occupancyFilter.html @@ -1,67 +1,12 @@ - - - - - - - -Feature occupancy filtering — occupancyMaximum • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Feature occupancy filtering — occupancyMaximum • metabolyseR - - - - + + -
-
- -
- -
+
@@ -138,99 +68,91 @@

Feature occupancy filtering

Feature filtering based on class occupancy.

-
occupancyMaximum(d, cls = "class", occupancy = 2/3)
+    
+
occupancyMaximum(d, cls = "class", occupancy = 2/3)
 
-# S4 method for AnalysisData
-occupancyMaximum(d, cls = "class", occupancy = 2/3)
+# S4 method for AnalysisData
+occupancyMaximum(d, cls = "class", occupancy = 2/3)
 
-occupancyMinimum(d, cls = "class", occupancy = 2/3)
+occupancyMinimum(d, cls = "class", occupancy = 2/3)
 
-# S4 method for AnalysisData
-occupancyMinimum(d, cls = "class", occupancy = 2/3)
- -

Arguments

- - - - - - - - - - - - - - -
d

S4 object of class AnalysisData

cls

sample information column name to use for class data

occupancy

feature occupancy filtering threshold, below which features will be removed

- -

Value

+# S4 method for AnalysisData +occupancyMinimum(d, cls = "class", occupancy = 2/3)
+
+
+

Arguments

+
d
+

S4 object of class AnalysisData

+
cls
+

sample information column name to use for class data

+
occupancy
+

feature occupancy filtering threshold, below which features will be removed

+
+
+

Value

An S4 object of class AnalysisData containing the class occupancy filtered data.

-

Details

- +
+
+

Details

Occupancy provides a useful metric by which to filter poorly represented features (features containing a majority zero or missing values). An occupancy threshold provides a means of specifying this majority with variables below the threshold excluded from further analyses. However, this can be complicated by an underlying class structure present within the data where a variable may be well represented within one class but not in another.

-

Methods

- +
+
+

Methods

-
    -
  • occupancyMaximium: Maximum occupancy threshold feature filtering. Where the maximum occupancy across all classes is above the threshold. Therefore, for a feature to be retained, only a single class needs to have an occupancy above the threshold.

  • +
    • occupancyMaximium: Maximum occupancy threshold feature filtering. Where the maximum occupancy across all classes is above the threshold. Therefore, for a feature to be retained, only a single class needs to have an occupancy above the threshold.

    • occupancyMinimum: Minimum occupancy threshold feature filtering. Where the minimum occupancy across all classes is required to be above the threshold. Therefore, for a feature to be retained, all classes would need to have an occupancy above the threshold.

    • -
    - - -

    Examples

    -
    ## Each of the following examples shows the application 
    -## of the feature occupancy filtering method method and 
    -## then a Principle Component Analysis is plotted to show 
    -## its effect on the data structure.
    -
    -## Initial example data preparation
    -library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact)
    - 
    -## Maximum occupancy threshold feature filtering
    -d %>% 
    - occupancyMaximum(cls = 'day') %>% 
    - plotPCA(cls = 'day')
    -
    - 
    -## Minimum occupancy threshold feature filtering
    -d %>% 
    - occupancyMinimum(cls = 'day') %>% 
    - plotPCA(cls = 'day')
    -
    -
    +
+ +
+

Examples

+
## Each of the following examples shows the application 
+## of the feature occupancy filtering method method and 
+## then a Principle Component Analysis is plotted to show 
+## its effect on the data structure.
+
+## Initial example data preparation
+library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact)
+ 
+## Maximum occupancy threshold feature filtering
+d %>% 
+ occupancyMaximum(cls = 'day') %>% 
+ plotPCA(cls = 'day')
+
+ 
+## Minimum occupancy threshold feature filtering
+d %>% 
+ occupancyMinimum(cls = 'day') %>% 
+ plotPCA(cls = 'day')
+
+
+
+ -
- +
- - + + diff --git a/docs/reference/parameters.html b/docs/reference/parameters.html index 2b874a9f..f6db3510 100644 --- a/docs/reference/parameters.html +++ b/docs/reference/parameters.html @@ -1,67 +1,12 @@ - - - - - - - -Get or set analysis parameters — parameters • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get or set analysis parameters — parameters • metabolyseR - - + + - - -
-
- -
- -
+
@@ -138,181 +68,171 @@

Get or set analysis parameters

Get or set parameters for AnalysisParameters or Analysis class objects.

-
parameters(d, ...)
-
-# S4 method for AnalysisParameters
-parameters(d, element)
-
-# S4 method for Analysis
-parameters(d)
+    
+
parameters(d, ...)
 
-parameters(d, element) <- value
+# S4 method for AnalysisParameters
+parameters(d, element)
 
-# S4 method for AnalysisParameters
-parameters(d, element) <- value
+# S4 method for Analysis
+parameters(d)
 
-# S4 method for Analysis
-parameters(d) <- value
+parameters(d, element) <- value -

Arguments

- - - - - - - - - - - - - - - - - - -
d

S4 object of class AnalysisParameters or Analysis

...

arguments to pass to the appropriate method

element

analysis element for parameters to extract or assign. -Should be one of those returned by analysisElements()

value

list containing parameter values

+# S4 method for AnalysisParameters +parameters(d, element) <- value +# S4 method for Analysis +parameters(d) <- value
+
-

Examples

-
p <- analysisParameters('pre-treatment')
-
-## extract pre-treatment parameters
-parameters(p,'pre-treatment')
-#> $QC
-#> $QC$occupancyFilter
-#> $QC$occupancyFilter$cls
-#> [1] "class"
-#> 
-#> $QC$occupancyFilter$QCidx
-#> [1] "QC"
-#> 
-#> $QC$occupancyFilter$occupancy
-#> 2/3
-#> 
-#> 
-#> $QC$impute
-#> $QC$impute$cls
-#> [1] "class"
-#> 
-#> $QC$impute$QCidx
-#> [1] "QC"
-#> 
-#> $QC$impute$occupancy
-#> 2/3
-#> 
-#> $QC$impute$parallel
-#> [1] "variables"
-#> 
-#> $QC$impute$seed
-#> [1] 1234
-#> 
-#> 
-#> $QC$RSDfilter
-#> $QC$RSDfilter$cls
-#> [1] "class"
-#> 
-#> $QC$RSDfilter$QCidx
-#> [1] "QC"
-#> 
-#> $QC$RSDfilter$RSDthresh
-#> [1] 50
-#> 
-#> 
-#> $QC$removeQC
-#> $QC$removeQC$cls
-#> [1] "class"
-#> 
-#> $QC$removeQC$QCidx
-#> [1] "QC"
-#> 
-#> 
-#> 
-#> $occupancyFilter
-#> $occupancyFilter$maximum
-#> $occupancyFilter$maximum$cls
-#> [1] "class"
-#> 
-#> $occupancyFilter$maximum$occupancy
-#> 2/3
-#> 
-#> 
-#> 
-#> $impute
-#> $impute$class
-#> $impute$class$cls
-#> [1] "class"
-#> 
-#> $impute$class$occupancy
-#> 2/3
-#> 
-#> $impute$class$seed
-#> [1] 1234
-#> 
-#> 
-#> 
-#> $transform
-#> $transform$TICnorm
-#> named list()
-#> 
-#> 
-
-## set pre-treatment parameters
-parameters(p,'pre-treatment') <- preTreatmentParameters(
-  list(
-    remove = 'classes',
-    QC = c('RSDfilter','removeQC'),
-    transform = 'TICnorm'
-  )
-)
-
-print(p)
-#> Parameters:
-#> pre-treatment
-#> 	remove
-#> 		classes
-#> 			cls = class
-#> 			classes = c()
-#> 	QC
-#> 		RSDfilter
-#> 			cls = class
-#> 			QCidx = QC
-#> 			RSDthresh = 50
-#> 		removeQC
-#> 			cls = class
-#> 			QCidx = QC
-#> 	transform
-#> 		TICnorm
-#> 
-
+
+

Arguments

+
d
+

S4 object of class AnalysisParameters or Analysis

+
...
+

arguments to pass to the appropriate method

+
element
+

analysis element for parameters to extract or assign. +Should be one of those returned by analysisElements()

+
value
+

list containing parameter values

+
+ +
+

Examples

+
p <- analysisParameters('pre-treatment')
+
+## extract pre-treatment parameters
+parameters(p,'pre-treatment')
+#> $QC
+#> $QC$occupancyFilter
+#> $QC$occupancyFilter$cls
+#> [1] "class"
+#> 
+#> $QC$occupancyFilter$QCidx
+#> [1] "QC"
+#> 
+#> $QC$occupancyFilter$occupancy
+#> 2/3
+#> 
+#> 
+#> $QC$impute
+#> $QC$impute$cls
+#> [1] "class"
+#> 
+#> $QC$impute$QCidx
+#> [1] "QC"
+#> 
+#> $QC$impute$occupancy
+#> 2/3
+#> 
+#> $QC$impute$parallel
+#> [1] "variables"
+#> 
+#> $QC$impute$seed
+#> [1] 1234
+#> 
+#> 
+#> $QC$RSDfilter
+#> $QC$RSDfilter$cls
+#> [1] "class"
+#> 
+#> $QC$RSDfilter$QCidx
+#> [1] "QC"
+#> 
+#> $QC$RSDfilter$RSDthresh
+#> [1] 50
+#> 
+#> 
+#> $QC$removeQC
+#> $QC$removeQC$cls
+#> [1] "class"
+#> 
+#> $QC$removeQC$QCidx
+#> [1] "QC"
+#> 
+#> 
+#> 
+#> $occupancyFilter
+#> $occupancyFilter$maximum
+#> $occupancyFilter$maximum$cls
+#> [1] "class"
+#> 
+#> $occupancyFilter$maximum$occupancy
+#> 2/3
+#> 
+#> 
+#> 
+#> $impute
+#> $impute$class
+#> $impute$class$cls
+#> [1] "class"
+#> 
+#> $impute$class$occupancy
+#> 2/3
+#> 
+#> $impute$class$seed
+#> [1] 1234
+#> 
+#> 
+#> 
+#> $transform
+#> $transform$TICnorm
+#> named list()
+#> 
+#> 
+
+## set pre-treatment parameters
+parameters(p,'pre-treatment') <- preTreatmentParameters(
+  list(
+    remove = 'classes',
+    QC = c('RSDfilter','removeQC'),
+    transform = 'TICnorm'
+  )
+)
+
+print(p)
+#> Parameters:
+#> pre-treatment
+#> 	remove
+#> 		classes
+#> 			cls = class
+#> 			classes = c()
+#> 	QC
+#> 		RSDfilter
+#> 			cls = class
+#> 			QCidx = QC
+#> 			RSDthresh = 50
+#> 		removeQC
+#> 			cls = class
+#> 			QCidx = QC
+#> 	transform
+#> 		TICnorm
+#> 
+
+
+ -
- +
- - + + diff --git a/docs/reference/plotExplanatoryHeatmap.html b/docs/reference/plotExplanatoryHeatmap.html index a5836ca8..9a09a3d8 100644 --- a/docs/reference/plotExplanatoryHeatmap.html +++ b/docs/reference/plotExplanatoryHeatmap.html @@ -1,67 +1,12 @@ - - - - - - - -Heatmap plot of explantory features — plotExplanatoryHeatmap • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Heatmap plot of explantory features — plotExplanatoryHeatmap • metabolyseR - - + + - - -
-
- -
- -
+
@@ -138,145 +68,124 @@

Heatmap plot of explantory features

Plot a heatmap of explanatory features.

-
plotExplanatoryHeatmap(x, ...)
-
-# S4 method for Univariate
-plotExplanatoryHeatmap(
-  x,
-  threshold = 0.05,
-  title = "",
-  distanceMeasure = "euclidean",
-  clusterMethod = "ward.D2",
-  featureNames = TRUE,
-  dendrogram = TRUE,
-  featureLimit = Inf
-)
-
-# S4 method for RandomForest
-plotExplanatoryHeatmap(
-  x,
-  metric = "FalsePositiveRate",
-  threshold = 0.05,
-  title = "",
-  distanceMeasure = "euclidean",
-  clusterMethod = "ward.D2",
-  featureNames = TRUE,
-  dendrogram = TRUE,
-  featureLimit = Inf
-)
-
-# S4 method for list
-plotExplanatoryHeatmap(
-  x,
-  threshold = 0.05,
-  distanceMeasure = "euclidean",
-  clusterMethod = "ward.D2",
-  featureNames = TRUE,
-  featureLimit = Inf
-)
-
-# S4 method for Analysis
-plotExplanatoryHeatmap(
-  x,
-  threshold = 0.05,
-  distanceMeasure = "euclidean",
-  clusterMethod = "ward.D2",
-  featureNames = TRUE,
-  featureLimit = Inf
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x

object of class Univariate, RandomForest or -Analysis

...

arguments to pass to the appropriate method

threshold

score threshold to use for specifying explanatory features

title

plot title

distanceMeasure

distance measure to use for clustering. See details.

clusterMethod

clustering method to use. See details

featureNames

should feature names be plotted?

dendrogram

TRUE/FALSE. Should the dendrogram be plotted?

featureLimit

The maximum number of features to plot

metric

importance metric on which to retrieve explanatory features

- -

Details

+
+
plotExplanatoryHeatmap(x, ...)
+
+# S4 method for Univariate
+plotExplanatoryHeatmap(
+  x,
+  threshold = 0.05,
+  title = "",
+  distanceMeasure = "euclidean",
+  clusterMethod = "ward.D2",
+  featureNames = TRUE,
+  dendrogram = TRUE,
+  featureLimit = Inf
+)
+
+# S4 method for RandomForest
+plotExplanatoryHeatmap(
+  x,
+  metric = "FalsePositiveRate",
+  threshold = 0.05,
+  title = "",
+  distanceMeasure = "euclidean",
+  clusterMethod = "ward.D2",
+  featureNames = TRUE,
+  dendrogram = TRUE,
+  featureLimit = Inf
+)
+
+# S4 method for list
+plotExplanatoryHeatmap(
+  x,
+  threshold = 0.05,
+  distanceMeasure = "euclidean",
+  clusterMethod = "ward.D2",
+  featureNames = TRUE,
+  featureLimit = Inf
+)
+
+# S4 method for Analysis
+plotExplanatoryHeatmap(
+  x,
+  threshold = 0.05,
+  distanceMeasure = "euclidean",
+  clusterMethod = "ward.D2",
+  featureNames = TRUE,
+  featureLimit = Inf
+)
+
-

Distance measures can be one of any that can be used for the method argument of dist().

-

Cluster methods can be one of any that can be used for the method argument of hclust().

+
+

Arguments

+
x
+

object of class Univariate, RandomForest or +Analysis

+
...
+

arguments to pass to the appropriate method

+
threshold
+

score threshold to use for specifying explanatory features

+
title
+

plot title

+
distanceMeasure
+

distance measure to use for clustering. See details.

+
clusterMethod
+

clustering method to use. See details

+
featureNames
+

should feature names be plotted?

+
dendrogram
+

TRUE/FALSE. Should the dendrogram be plotted?

+
featureLimit
+

The maximum number of features to plot

+
metric
+

importance metric on which to retrieve explanatory features

+
+
+

Details

+

Distance measures can be one of any that can be used for the method argument of dist().

+

Cluster methods can be one of any that can be used for the method argument of hclust().

+
-

Examples

-
library(metaboData)
-x <- analysisData(data = abr1$neg[,200:300],info = abr1$fact)
-
-## random forest classification example
-random_forest <- randomForest(x,cls = 'day')
-
-plotExplanatoryHeatmap(random_forest)
-
-
-## random forest regression example
-random_forest <- randomForest(x,cls = 'injorder')
-
-plotExplanatoryHeatmap(random_forest,metric = '%IncMSE',threshold = 2)
-
-
+
+

Examples

+
library(metaboData)
+x <- analysisData(data = abr1$neg[,200:300],info = abr1$fact)
+
+## random forest classification example
+random_forest <- randomForest(x,cls = 'day')
+
+plotExplanatoryHeatmap(random_forest)
+
+
+## random forest regression example
+random_forest <- randomForest(x,cls = 'injorder')
+
+plotExplanatoryHeatmap(random_forest,metric = '%IncMSE',threshold = 2)
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/plotFeature.html b/docs/reference/plotFeature.html index 900620fd..bdd38e48 100644 --- a/docs/reference/plotFeature.html +++ b/docs/reference/plotFeature.html @@ -1,67 +1,12 @@ - - - - - - - -Plot a feature — plotFeature • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot a feature — plotFeature • metabolyseR - - + + - - -
-
- -
- -
+
@@ -138,93 +68,77 @@

Plot a feature

Plot the trend of a feature.

-
plotFeature(analysis, feature, cls = "class", label = NULL, labelSize = 2, ...)
-
-# S4 method for AnalysisData
-plotFeature(analysis, feature, cls = "class", label = NULL, labelSize = 2)
-
-# S4 method for Analysis
-plotFeature(
-  analysis,
-  feature,
-  cls = "class",
-  label = NULL,
-  labelSize = 2,
-  type = "pre-treated"
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
analysis

an object of class AnalysisData or`` Analysis`

feature

feature name to plot

cls

information column to use for class labels

label

information column to use for sample labels

labelSize

sample label size

...

arguments to pass to the appropriate method

type

raw or pre-treated data to plot

- - -

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg,abr1$fact)
-
-## Plot a categorical response variable
-plotFeature(d,'N133',cls = 'day')
-
-
-## Plot a continuous response variable
-plotFeature(d,'N133',cls = 'injorder')
-
-
+
+
plotFeature(analysis, feature, cls = "class", label = NULL, labelSize = 2, ...)
+
+# S4 method for AnalysisData
+plotFeature(analysis, feature, cls = "class", label = NULL, labelSize = 2)
+
+# S4 method for Analysis
+plotFeature(
+  analysis,
+  feature,
+  cls = "class",
+  label = NULL,
+  labelSize = 2,
+  type = "pre-treated"
+)
+
+ +
+

Arguments

+
analysis
+

an object of class AnalysisData or`` Analysis`

+
feature
+

feature name to plot

+
cls
+

information column to use for class labels

+
label
+

information column to use for sample labels

+
labelSize
+

sample label size

+
...
+

arguments to pass to the appropriate method

+
type
+

raw or pre-treated data to plot

+
+ +
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg,abr1$fact)
+
+## Plot a categorical response variable
+plotFeature(d,'N133',cls = 'day')
+
+
+## Plot a continuous response variable
+plotFeature(d,'N133',cls = 'injorder')
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/plotImportance.html b/docs/reference/plotImportance.html index 9a676428..665d0fa5 100644 --- a/docs/reference/plotImportance.html +++ b/docs/reference/plotImportance.html @@ -1,67 +1,12 @@ - - - - - - - -Plot feature importance — plotImportance • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot feature importance — plotImportance • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,85 +68,71 @@

Plot feature importance

Plot Univariate or random forest feature importance.

-
plotImportance(x, ...)
-
-# S4 method for Univariate
-plotImportance(x, response = "class", rank = TRUE, threshold = 0.05)
-
-# S4 method for RandomForest
-plotImportance(x, metric = "FalsePositiveRate", rank = TRUE)
-
-# S4 method for list
-plotImportance(x, metric = "FalsePositiveRate")
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - -
x

S4 object of class Univariate or RandomForest

...

arguments to pass to specific method

response

response results to plot

rank

rank feature order for plotting

threshold

explanatory threshold line for the output plot

metric

importance metric to plot

- - -

Examples

-
library(metaboData)
-
-x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
-       keepClasses(cls = 'day',classes = c('H','1','5')) %>% 
-       occupancyMaximum(cls = 'day') %>%
-       transformTICnorm()
-       
-rf <- randomForest(x,cls = 'day')
-
-plotImportance(rf,rank = FALSE)
-
-
+
+
plotImportance(x, ...)
+
+# S4 method for Univariate
+plotImportance(x, response = "class", rank = TRUE, threshold = 0.05)
+
+# S4 method for RandomForest
+plotImportance(x, metric = "FalsePositiveRate", rank = TRUE)
+
+# S4 method for list
+plotImportance(x, metric = "FalsePositiveRate")
+
+ +
+

Arguments

+
x
+

S4 object of class Univariate or RandomForest

+
...
+

arguments to pass to specific method

+
response
+

response results to plot

+
rank
+

rank feature order for plotting

+
threshold
+

explanatory threshold line for the output plot

+
metric
+

importance metric to plot

+
+ +
+

Examples

+
library(metaboData)
+
+x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
+       keepClasses(cls = 'day',classes = c('H','1','5')) %>% 
+       occupancyMaximum(cls = 'day') %>%
+       transformTICnorm()
+       
+rf <- randomForest(x,cls = 'day')
+
+plotImportance(rf,rank = FALSE)
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/plotLDA-1.png b/docs/reference/plotLDA-1.png index a91fbe48..1d2b5b15 100644 Binary files a/docs/reference/plotLDA-1.png and b/docs/reference/plotLDA-1.png differ diff --git a/docs/reference/plotLDA.html b/docs/reference/plotLDA.html index 240fa302..f834c759 100644 --- a/docs/reference/plotLDA.html +++ b/docs/reference/plotLDA.html @@ -1,67 +1,12 @@ - - - - - - - -Principle Component - Linear Discriminant Analysis plot — plotLDA • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Principle Component - Linear Discriminant Analysis plot — plotLDA • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,155 +68,125 @@

Principle Component - Linear Discriminant Analysis plot

Plot linear discriminant analysis results of pre-treated data

-
plotLDA(
-  analysis,
-  cls = "class",
-  label = NULL,
-  scale = TRUE,
-  center = TRUE,
-  xAxis = "DF1",
-  yAxis = "DF2",
-  shape = FALSE,
-  ellipses = TRUE,
-  title = "PC-LDA",
-  legendPosition = "bottom",
-  labelSize = 2,
-  ...
-)
-
-# S4 method for AnalysisData
-plotLDA(
-  analysis,
-  cls = "class",
-  label = NULL,
-  scale = TRUE,
-  center = TRUE,
-  xAxis = "DF1",
-  yAxis = "DF2",
-  shape = FALSE,
-  ellipses = TRUE,
-  title = "PC-LDA",
-  legendPosition = "bottom",
-  labelSize = 2
-)
-
-# S4 method for Analysis
-plotLDA(
-  analysis,
-  cls = "class",
-  label = NULL,
-  scale = TRUE,
-  center = TRUE,
-  xAxis = "DF1",
-  yAxis = "DF2",
-  shape = FALSE,
-  ellipses = TRUE,
-  title = "PC-LDA",
-  legendPosition = "bottom",
-  labelSize = 2,
-  type = "raw"
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
analysis

S4 object of class AnalysisData or Analysis

cls

name of sample information column to use for class labels

label

name of sample information column to use for sample labels. Set to NULL for no labels.

scale

scale the data

center

center the data

xAxis

principle component to plot on the x-axis

yAxis

principle component to plot on the y-axis

shape

TRUE/FALSE use shape aesthetic for plot points. -Defaults to TRUE when the number of classes is greater than 12

ellipses

TRUE/FALSE, plot multivariate normal distribution 95\ -confidence ellipses for each class

title

plot title

legendPosition

legend position to pass to legend.position argument -of ggplot2::theme. Set to "none" to remove legend.

labelSize

label size. Ignored if label is NULL

...

arguments to pass to the appropriate method

type

raw or pre-treated data to plot

- +
+
plotLDA(
+  analysis,
+  cls = "class",
+  label = NULL,
+  scale = TRUE,
+  center = TRUE,
+  xAxis = "DF1",
+  yAxis = "DF2",
+  shape = FALSE,
+  ellipses = TRUE,
+  title = "PC-LDA",
+  legendPosition = "bottom",
+  labelSize = 2,
+  ...
+)
+
+# S4 method for AnalysisData
+plotLDA(
+  analysis,
+  cls = "class",
+  label = NULL,
+  scale = TRUE,
+  center = TRUE,
+  xAxis = "DF1",
+  yAxis = "DF2",
+  shape = FALSE,
+  ellipses = TRUE,
+  title = "PC-LDA",
+  legendPosition = "bottom",
+  labelSize = 2
+)
+
+# S4 method for Analysis
+plotLDA(
+  analysis,
+  cls = "class",
+  label = NULL,
+  scale = TRUE,
+  center = TRUE,
+  xAxis = "DF1",
+  yAxis = "DF2",
+  shape = FALSE,
+  ellipses = TRUE,
+  title = "PC-LDA",
+  legendPosition = "bottom",
+  labelSize = 2,
+  type = "raw"
+)
+
-

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg,abr1$fact) %>% 
- occupancyMaximum(cls = 'day')
-
-## LDA plot
-plotLDA(d,cls = 'day')
-
-
+
+

Arguments

+
analysis
+

S4 object of class AnalysisData or Analysis

+
cls
+

name of sample information column to use for class labels

+
label
+

name of sample information column to use for sample labels. Set to NULL for no labels.

+
scale
+

scale the data

+
center
+

center the data

+
xAxis
+

principle component to plot on the x-axis

+
yAxis
+

principle component to plot on the y-axis

+
shape
+

TRUE/FALSE use shape aesthetic for plot points. +Defaults to TRUE when the number of classes is greater than 12

+
ellipses
+

TRUE/FALSE, plot multivariate normal distribution 95\ +confidence ellipses for each class

+
title
+

plot title

+
legendPosition
+

legend position to pass to legend.position argument +of ggplot2::theme. Set to "none" to remove legend.

+
labelSize
+

label size. Ignored if label is NULL

+
...
+

arguments to pass to the appropriate method

+
type
+

raw or pre-treated data to plot

+
+ +
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg,abr1$fact) %>% 
+ occupancyMaximum(cls = 'day')
+
+## LDA plot
+plotLDA(d,cls = 'day')
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/plotMDS.html b/docs/reference/plotMDS.html index ef1c6d4c..d36ced49 100644 --- a/docs/reference/plotMDS.html +++ b/docs/reference/plotMDS.html @@ -1,67 +1,12 @@ - - - - - - - -Multidimensional scaling (MDS) plot — plotMDS • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Multidimensional scaling (MDS) plot — plotMDS • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,119 +68,101 @@

Multidimensional scaling (MDS) plot

Plot multidimensional scaling plot for a RandomForest class object.

-
plotMDS(
-  x,
-  cls = "class",
-  label = NULL,
-  shape = FALSE,
-  ellipses = TRUE,
-  title = "",
-  legendPosition = "bottom",
-  labelSize = 2
-)
-
-# S4 method for RandomForest
-plotMDS(
-  x,
-  cls = "class",
-  label = NULL,
-  shape = FALSE,
-  ellipses = TRUE,
-  title = "",
-  legendPosition = "bottom",
-  labelSize = 2
-)
-
-# S4 method for list
-plotMDS(
-  x,
-  label = NULL,
-  shape = FALSE,
-  ellipses = TRUE,
-  title = "",
-  legendPosition = "bottom",
-  labelSize = 2
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x

S4 object of class RandomForest

cls

sample information column to use for sample labelling, -Set to NULL for no labelling.

label

sample information column to use for sample labels. Set to NULL for no labels.

shape

TRUE/FALSE use shape aesthetic for plot points. -Defaults to TRUE when the number of classes is greater than 12

ellipses

TRUE/FALSE, plot multivariate normal distribution 95% -confidence ellipses for each class

title

plot title

legendPosition

legend position to pass to legend.position argument -of ggplot2::theme. Set to "none" to remove legend.

labelSize

label size. Ignored if label is NULL

- +
+
plotMDS(
+  x,
+  cls = "class",
+  label = NULL,
+  shape = FALSE,
+  ellipses = TRUE,
+  title = "",
+  legendPosition = "bottom",
+  labelSize = 2
+)
+
+# S4 method for RandomForest
+plotMDS(
+  x,
+  cls = "class",
+  label = NULL,
+  shape = FALSE,
+  ellipses = TRUE,
+  title = "",
+  legendPosition = "bottom",
+  labelSize = 2
+)
+
+# S4 method for list
+plotMDS(
+  x,
+  label = NULL,
+  shape = FALSE,
+  ellipses = TRUE,
+  title = "",
+  legendPosition = "bottom",
+  labelSize = 2
+)
+
-

Examples

-
library(metaboData)
-
-x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
-       occupancyMaximum(cls = 'day') %>%
-       transformTICnorm()
-       
-rf <- randomForest(x,cls = 'day')
-
-plotMDS(rf,cls = 'day')
-
-
+
+

Arguments

+
x
+

S4 object of class RandomForest

+
cls
+

sample information column to use for sample labelling, +Set to NULL for no labelling.

+
label
+

sample information column to use for sample labels. Set to NULL for no labels.

+
shape
+

TRUE/FALSE use shape aesthetic for plot points. +Defaults to TRUE when the number of classes is greater than 12

+
ellipses
+

TRUE/FALSE, plot multivariate normal distribution 95% +confidence ellipses for each class

+
title
+

plot title

+
legendPosition
+

legend position to pass to legend.position argument +of ggplot2::theme. Set to "none" to remove legend.

+
labelSize
+

label size. Ignored if label is NULL

+
+ +
+

Examples

+
library(metaboData)
+
+x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
+       occupancyMaximum(cls = 'day') %>%
+       transformTICnorm()
+       
+rf <- randomForest(x,cls = 'day')
+
+plotMDS(rf,cls = 'day')
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/plotMetrics.html b/docs/reference/plotMetrics.html index 3cbcd38c..8b3a2c58 100644 --- a/docs/reference/plotMetrics.html +++ b/docs/reference/plotMetrics.html @@ -1,67 +1,12 @@ - - - - - - - -Plot model performance metrics — plotMetrics • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot model performance metrics — plotMetrics • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,66 +68,60 @@

Plot model performance metrics

Plot random forest model performance metrics

-
plotMetrics(x, response = "class")
-
-# S4 method for RandomForest
-plotMetrics(x)
-
-# S4 method for list
-plotMetrics(x)
- -

Arguments

- - - - - - - - - - -
x

S4 object of class RandomForest

response

response results to plot

- - -

Examples

-
library(metaboData)
-
-x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
-       keepClasses(cls = 'day',classes = c('H','1','5')) %>% 
-       occupancyMaximum(cls = 'day') %>%
-       transformTICnorm()
-       
-rf <- randomForest(x,cls = 'day',binary = TRUE)
-
-plotMetrics(rf,response = 'day')
-
-
+
+
plotMetrics(x, response = "class")
+
+# S4 method for RandomForest
+plotMetrics(x)
+
+# S4 method for list
+plotMetrics(x)
+
+ +
+

Arguments

+
x
+

S4 object of class RandomForest

+
response
+

response results to plot

+
+ +
+

Examples

+
library(metaboData)
+
+x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
+       keepClasses(cls = 'day',classes = c('H','1','5')) %>% 
+       occupancyMaximum(cls = 'day') %>%
+       transformTICnorm()
+       
+rf <- randomForest(x,cls = 'day',binary = TRUE)
+
+plotMetrics(rf,response = 'day')
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/plotOccupancy.html b/docs/reference/plotOccupancy.html index d092c6d1..fe0d5ccc 100644 --- a/docs/reference/plotOccupancy.html +++ b/docs/reference/plotOccupancy.html @@ -1,67 +1,12 @@ - - - - - - - -Plot class occupancy distributions — plotOccupancy • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot class occupancy distributions — plotOccupancy • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,70 +68,60 @@

Plot class occupancy distributions

Plot class occupancy distributions.

-
plotOccupancy(x, cls = "class", ...)
-
-# S4 method for AnalysisData
-plotOccupancy(x, cls = "class")
-
-# S4 method for Analysis
-plotOccupancy(x, cls = "class", type = "raw")
- -

Arguments

- - - - - - - - - - - - - - - - - - -
x

S4 object of class AnalysisData or Analysis

cls

sample information column to use for class labels

...

arguments to pass to the appropriate method

type

raw or preTreated data to plot

- - -

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg,abr1$fact)
-
-## Plot class occupancy distributions
-plotOccupancy(d,cls = 'day')
-
-
+
+
plotOccupancy(x, cls = "class", ...)
+
+# S4 method for AnalysisData
+plotOccupancy(x, cls = "class")
+
+# S4 method for Analysis
+plotOccupancy(x, cls = "class", type = "raw")
+
+ +
+

Arguments

+
x
+

S4 object of class AnalysisData or Analysis

+
cls
+

sample information column to use for class labels

+
...
+

arguments to pass to the appropriate method

+
type
+

raw or preTreated data to plot

+
+ +
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg,abr1$fact)
+
+## Plot class occupancy distributions
+plotOccupancy(d,cls = 'day')
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/plotPCA.html b/docs/reference/plotPCA.html index 06d9b6a6..861c4de9 100644 --- a/docs/reference/plotPCA.html +++ b/docs/reference/plotPCA.html @@ -1,67 +1,12 @@ - - - - - - - -Principle Component Analysis plot — plotPCA • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Principle Component Analysis plot — plotPCA • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,155 +68,125 @@

Principle Component Analysis plot

Plot Principle Component Analysis results.

-
plotPCA(
-  analysis,
-  cls = "class",
-  label = NULL,
-  scale = TRUE,
-  center = TRUE,
-  xAxis = "PC1",
-  yAxis = "PC2",
-  shape = FALSE,
-  ellipses = TRUE,
-  title = "PCA",
-  legendPosition = "bottom",
-  labelSize = 2,
-  ...
-)
-
-# S4 method for AnalysisData
-plotPCA(
-  analysis,
-  cls = "class",
-  label = NULL,
-  scale = TRUE,
-  center = TRUE,
-  xAxis = "PC1",
-  yAxis = "PC2",
-  shape = FALSE,
-  ellipses = TRUE,
-  title = "Principle Component Analysis (PCA)",
-  legendPosition = "bottom",
-  labelSize = 2
-)
-
-# S4 method for Analysis
-plotPCA(
-  analysis,
-  cls = "class",
-  label = NULL,
-  scale = TRUE,
-  center = TRUE,
-  xAxis = "PC1",
-  yAxis = "PC2",
-  shape = FALSE,
-  ellipses = TRUE,
-  title = "PCA",
-  legendPosition = "bottom",
-  labelSize = 2,
-  type = "raw"
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
analysis

object of class AnalysisData or Analysis

cls

name of class information column to use for sample labelling

label

name of class information column to use for sample labels. Set to NULL for no labels.

scale

scale the data

center

center the data

xAxis

principle component to plot on the x-axis

yAxis

principle component to plot on the y-axis

shape

TRUE/FALSE use shape aesthetic for plot points. -Defaults to TRUE when the number of classes is greater than 12

ellipses

TRUE/FALSE, plot multivariate normal distribution 95\ -confidence ellipses for each class

title

plot title

legendPosition

legend position to pass to legend.position argument -of ggplot2::theme. Set to "none" to remove legend.

labelSize

label size. Ignored if label is NULL

...

arguments to pass to the appropriate method

type

raw or pre-treated data to plot

- +
+
plotPCA(
+  analysis,
+  cls = "class",
+  label = NULL,
+  scale = TRUE,
+  center = TRUE,
+  xAxis = "PC1",
+  yAxis = "PC2",
+  shape = FALSE,
+  ellipses = TRUE,
+  title = "PCA",
+  legendPosition = "bottom",
+  labelSize = 2,
+  ...
+)
+
+# S4 method for AnalysisData
+plotPCA(
+  analysis,
+  cls = "class",
+  label = NULL,
+  scale = TRUE,
+  center = TRUE,
+  xAxis = "PC1",
+  yAxis = "PC2",
+  shape = FALSE,
+  ellipses = TRUE,
+  title = "Principle Component Analysis (PCA)",
+  legendPosition = "bottom",
+  labelSize = 2
+)
+
+# S4 method for Analysis
+plotPCA(
+  analysis,
+  cls = "class",
+  label = NULL,
+  scale = TRUE,
+  center = TRUE,
+  xAxis = "PC1",
+  yAxis = "PC2",
+  shape = FALSE,
+  ellipses = TRUE,
+  title = "PCA",
+  legendPosition = "bottom",
+  labelSize = 2,
+  type = "raw"
+)
+
-

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg,abr1$fact) %>% 
- occupancyMaximum(cls = 'day')
-
-## PCA plot
-plotPCA(d,cls = 'day')
-
-
+
+

Arguments

+
analysis
+

object of class AnalysisData or Analysis

+
cls
+

name of class information column to use for sample labelling

+
label
+

name of class information column to use for sample labels. Set to NULL for no labels.

+
scale
+

scale the data

+
center
+

center the data

+
xAxis
+

principle component to plot on the x-axis

+
yAxis
+

principle component to plot on the y-axis

+
shape
+

TRUE/FALSE use shape aesthetic for plot points. +Defaults to TRUE when the number of classes is greater than 12

+
ellipses
+

TRUE/FALSE, plot multivariate normal distribution 95\ +confidence ellipses for each class

+
title
+

plot title

+
legendPosition
+

legend position to pass to legend.position argument +of ggplot2::theme. Set to "none" to remove legend.

+
labelSize
+

label size. Ignored if label is NULL

+
...
+

arguments to pass to the appropriate method

+
type
+

raw or pre-treated data to plot

+
+ +
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg,abr1$fact) %>% 
+ occupancyMaximum(cls = 'day')
+
+## PCA plot
+plotPCA(d,cls = 'day')
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/plotROC.html b/docs/reference/plotROC.html index 92a1166d..f7bebb04 100644 --- a/docs/reference/plotROC.html +++ b/docs/reference/plotROC.html @@ -1,68 +1,13 @@ - - - - - - - -Plot receiver operator characteristic (ROC) curves — plotROC • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot receiver operator characteristic (ROC) curves — plotROC • metabolyseR + + - - - - -
-
- -
- -
+
@@ -140,70 +70,62 @@

Plot receiver operator characteristic (ROC) curves

RandomForest class object.

-
plotROC(x, title = "", legendPosition = "bottom")
-
-# S4 method for RandomForest
-plotROC(x, title = "", legendPosition = "bottom")
-
-# S4 method for list
-plotROC(x, title = "", legendPosition = "bottom")
- -

Arguments

- - - - - - - - - - - - - - -
x

S4 object of class RandomForest

title

plot title

legendPosition

legend position to pass to legend.position -argument of ggplot2::theme. Set to "none" to remove legend.

- - -

Examples

-
library(metaboData)
-
-x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
-       occupancyMaximum(cls = 'day') %>%
-       transformTICnorm()
-       
-rf <- randomForest(x,cls = 'day')
-
-plotROC(rf)
-
-
+
+
plotROC(x, title = "", legendPosition = "bottom")
+
+# S4 method for RandomForest
+plotROC(x, title = "", legendPosition = "bottom")
+
+# S4 method for list
+plotROC(x, title = "", legendPosition = "bottom")
+
+ +
+

Arguments

+
x
+

S4 object of class RandomForest

+
title
+

plot title

+
legendPosition
+

legend position to pass to legend.position +argument of ggplot2::theme. Set to "none" to remove legend.

+
+ +
+

Examples

+
library(metaboData)
+
+x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
+       occupancyMaximum(cls = 'day') %>%
+       transformTICnorm()
+       
+rf <- randomForest(x,cls = 'day')
+
+plotROC(rf)
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/plotRSD.html b/docs/reference/plotRSD.html index 60a56bd8..d972627c 100644 --- a/docs/reference/plotRSD.html +++ b/docs/reference/plotRSD.html @@ -1,67 +1,12 @@ - - - - - - - -Plot RSD distributions — plotRSD • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot RSD distributions — plotRSD • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,72 +68,62 @@

Plot RSD distributions

Plot RSD distributions of raw data in quality control samples.

-
plotRSD(analysis, cls = "class", ...)
-
-# S4 method for AnalysisData
-plotRSD(analysis, cls = "class")
-
-# S4 method for Analysis
-plotRSD(analysis, cls = "class", type = "raw")
- -

Arguments

- - - - - - - - - - - - - - - - - - -
analysis

object of class AnalysisData or Analysis

cls

information column to use for class labels

...

arguments to pass to the appropriate method

type

raw or pre-treated data to plot

- - -

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg,abr1$fact)
-
-## Plot class RSD distributions
-plotRSD(d,cls = 'day')
-#> Warning: Removed 716 rows containing non-finite values (stat_density).
-#> Warning: Removed 6 row(s) containing missing values (geom_path).
-
-
+
+
plotRSD(analysis, cls = "class", ...)
+
+# S4 method for AnalysisData
+plotRSD(analysis, cls = "class")
+
+# S4 method for Analysis
+plotRSD(analysis, cls = "class", type = "raw")
+
+ +
+

Arguments

+
analysis
+

object of class AnalysisData or Analysis

+
cls
+

information column to use for class labels

+
...
+

arguments to pass to the appropriate method

+
type
+

raw or pre-treated data to plot

+
+ +
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg,abr1$fact)
+
+## Plot class RSD distributions
+plotRSD(d,cls = 'day')
+#> Warning: Removed 716 rows containing non-finite values (stat_density).
+#> Warning: Removed 6 row(s) containing missing values (geom_path).
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/plotSupervisedRF.html b/docs/reference/plotSupervisedRF.html index 28e0dc2d..3b779ed3 100644 --- a/docs/reference/plotSupervisedRF.html +++ b/docs/reference/plotSupervisedRF.html @@ -1,67 +1,12 @@ - - - - - - - -Supervised random forest MDS plot — plotSupervisedRF • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Supervised random forest MDS plot — plotSupervisedRF • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,147 +68,119 @@

Supervised random forest MDS plot

A multidimensional scaling (MDS) plot of supervised random forest analysis

-
plotSupervisedRF(
-  x,
-  cls = "class",
-  rf = list(),
-  label = NULL,
-  shape = FALSE,
-  ellipses = TRUE,
-  ROC = TRUE,
-  seed = 1234,
-  title = "",
-  legendPosition = "bottom",
-  labelSize = 2,
-  ...
-)
-
-# S4 method for AnalysisData
-plotSupervisedRF(
-  x,
-  cls = "class",
-  rf = list(),
-  label = NULL,
-  shape = FALSE,
-  ellipses = TRUE,
-  ROC = TRUE,
-  seed = 1234,
-  title = "",
-  legendPosition = "bottom",
-  labelSize = 2
-)
-
-# S4 method for Analysis
-plotSupervisedRF(
-  x,
-  cls = "class",
-  rf = list(),
-  label = NULL,
-  shape = FALSE,
-  ellipses = TRUE,
-  ROC = TRUE,
-  seed = 1234,
-  title = "",
-  legendPosition = "bottom",
-  labelSize = 2,
-  type = "raw"
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x

object of class AnalysisData or Analysis containing analysis results

cls

information column to use for sample classes

rf

list of additional parameters to pass to randomForest

label

information column to use for sample labels. Set to NULL for no labels.

shape

TRUE/FALSE use shape aesthetic for plot points. -Defaults to TRUE when the number of classes is greater than 12

ellipses

TRUE/FALSE, plot multivariate normal distribution 95% -confidence ellipses for each class

ROC

should receiver-operator characteristics be plotted?

seed

random number seed

title

plot title

legendPosition

legend position to pass to legend.position argument -of ggplot2::theme. Set to "none" to remove legend.

labelSize

label size. Ignored if label is NULL

...

arguments to pass to the appropriate method

type

raw or pre-treated data to plot

- +
+
plotSupervisedRF(
+  x,
+  cls = "class",
+  rf = list(),
+  label = NULL,
+  shape = FALSE,
+  ellipses = TRUE,
+  ROC = TRUE,
+  seed = 1234,
+  title = "",
+  legendPosition = "bottom",
+  labelSize = 2,
+  ...
+)
+
+# S4 method for AnalysisData
+plotSupervisedRF(
+  x,
+  cls = "class",
+  rf = list(),
+  label = NULL,
+  shape = FALSE,
+  ellipses = TRUE,
+  ROC = TRUE,
+  seed = 1234,
+  title = "",
+  legendPosition = "bottom",
+  labelSize = 2
+)
+
+# S4 method for Analysis
+plotSupervisedRF(
+  x,
+  cls = "class",
+  rf = list(),
+  label = NULL,
+  shape = FALSE,
+  ellipses = TRUE,
+  ROC = TRUE,
+  seed = 1234,
+  title = "",
+  legendPosition = "bottom",
+  labelSize = 2,
+  type = "raw"
+)
+
-

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg[,200:300],abr1$fact)
-
-## Supervised random forest MDS plot
-plotSupervisedRF(d,cls = 'day')
-
-
+
+

Arguments

+
x
+

object of class AnalysisData or Analysis containing analysis results

+
cls
+

information column to use for sample classes

+
rf
+

list of additional parameters to pass to randomForest

+
label
+

information column to use for sample labels. Set to NULL for no labels.

+
shape
+

TRUE/FALSE use shape aesthetic for plot points. +Defaults to TRUE when the number of classes is greater than 12

+
ellipses
+

TRUE/FALSE, plot multivariate normal distribution 95% +confidence ellipses for each class

+
ROC
+

should receiver-operator characteristics be plotted?

+
seed
+

random number seed

+
title
+

plot title

+
legendPosition
+

legend position to pass to legend.position argument +of ggplot2::theme. Set to "none" to remove legend.

+
labelSize
+

label size. Ignored if label is NULL

+
...
+

arguments to pass to the appropriate method

+
type
+

raw or pre-treated data to plot

+
+ +
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact)
+
+## Supervised random forest MDS plot
+plotSupervisedRF(d,cls = 'day')
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/plotTIC.html b/docs/reference/plotTIC.html index c1c47b79..f815835d 100644 --- a/docs/reference/plotTIC.html +++ b/docs/reference/plotTIC.html @@ -1,67 +1,12 @@ - - - - - - - -Plot sample total ion counts — plotTIC • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot sample total ion counts — plotTIC • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,82 +68,70 @@

Plot sample total ion counts

Plot total ion counts of sample data.

-
plotTIC(analysis, by = "injOrder", colour = "block", ...)
-
-# S4 method for AnalysisData
-plotTIC(analysis, by = "injOrder", colour = "block")
-
-# S4 method for Analysis
-plotTIC(
-  analysis,
-  by = "injOrder",
-  colour = "block",
-  type = c("raw", "pre-treated")
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - -
analysis

S4 object of class AnalysisData or Analysis

by

information column to plot against

colour

information column to provide colour labels

...

arguments to pass to the appropriate method

type

raw or pre-treated sample data

- - -

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg,abr1$fact)
-
-## Plot sample TIVs
-plotTIC(d,by = 'injorder',colour = 'day')
-
-
-plotTIC(d,by = 'day',colour = 'day')
-
-
+
+
plotTIC(analysis, by = "injOrder", colour = "block", ...)
+
+# S4 method for AnalysisData
+plotTIC(analysis, by = "injOrder", colour = "block")
+
+# S4 method for Analysis
+plotTIC(
+  analysis,
+  by = "injOrder",
+  colour = "block",
+  type = c("raw", "pre-treated")
+)
+
+ +
+

Arguments

+
analysis
+

S4 object of class AnalysisData or Analysis

+
by
+

information column to plot against

+
colour
+

information column to provide colour labels

+
...
+

arguments to pass to the appropriate method

+
type
+

raw or pre-treated sample data

+
+ +
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg,abr1$fact)
+
+## Plot sample TIVs
+plotTIC(d,by = 'injorder',colour = 'day')
+
+
+plotTIC(d,by = 'day',colour = 'day')
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/plotUnsupervisedRF.html b/docs/reference/plotUnsupervisedRF.html index 3529024a..10b7062d 100644 --- a/docs/reference/plotUnsupervisedRF.html +++ b/docs/reference/plotUnsupervisedRF.html @@ -1,67 +1,12 @@ - - - - - - - -Unsupervised random forest MDS plot — plotUnsupervisedRF • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Unsupervised random forest MDS plot — plotUnsupervisedRF • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,140 +68,114 @@

Unsupervised random forest MDS plot

A multidimensional scaling (MDS) plot of unsupervised random forest analysis

-
plotUnsupervisedRF(
-  x,
-  cls = "class",
-  rf = list(),
-  label = NULL,
-  shape = FALSE,
-  ellipses = TRUE,
-  seed = 1234,
-  title = "",
-  legendPosition = "bottom",
-  labelSize = 2,
-  ...
-)
-
-# S4 method for AnalysisData
-plotUnsupervisedRF(
-  x,
-  cls = "class",
-  rf = list(),
-  label = NULL,
-  shape = FALSE,
-  ellipses = TRUE,
-  seed = 1234,
-  title = "",
-  legendPosition = "bottom",
-  labelSize = 2
-)
-
-# S4 method for Analysis
-plotUnsupervisedRF(
-  x,
-  cls = "class",
-  rf = list(),
-  label = NULL,
-  shape = FALSE,
-  ellipses = TRUE,
-  seed = 1234,
-  title = "",
-  legendPosition = "bottom",
-  labelSize = 2,
-  type = "raw"
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x

object of class AnalysisData or Analysis

cls

sample information column to use for sample labelling

rf

list of additional parameters to pass to randomForest

label

info column to use for sample labels. Set to NULL for no labels.

shape

TRUE/FALSE use shape aesthetic for plot points. -Defaults to TRUE when the number of classes is greater than 12

ellipses

TRUE/FALSE, plot multivariate normal distribution 95% -confidence ellipses for each class

seed

random number seed

title

plot title

legendPosition

legend position to pass to legend.position argument -of ggplot2::theme. Set to "none" to remove legend.

labelSize

label size. Ignored if label is NULL

...

arguments to pass to the appropriate method

type

raw or pre-treated data to plot

- +
+
plotUnsupervisedRF(
+  x,
+  cls = "class",
+  rf = list(),
+  label = NULL,
+  shape = FALSE,
+  ellipses = TRUE,
+  seed = 1234,
+  title = "",
+  legendPosition = "bottom",
+  labelSize = 2,
+  ...
+)
+
+# S4 method for AnalysisData
+plotUnsupervisedRF(
+  x,
+  cls = "class",
+  rf = list(),
+  label = NULL,
+  shape = FALSE,
+  ellipses = TRUE,
+  seed = 1234,
+  title = "",
+  legendPosition = "bottom",
+  labelSize = 2
+)
+
+# S4 method for Analysis
+plotUnsupervisedRF(
+  x,
+  cls = "class",
+  rf = list(),
+  label = NULL,
+  shape = FALSE,
+  ellipses = TRUE,
+  seed = 1234,
+  title = "",
+  legendPosition = "bottom",
+  labelSize = 2,
+  type = "raw"
+)
+
-

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg[,200:300],abr1$fact)
-
-## Unsupervised random forest MDS plot
-plotUnsupervisedRF(d,cls = 'day')
-
-
+
+

Arguments

+
x
+

object of class AnalysisData or Analysis

+
cls
+

sample information column to use for sample labelling

+
rf
+

list of additional parameters to pass to randomForest

+
label
+

info column to use for sample labels. Set to NULL for no labels.

+
shape
+

TRUE/FALSE use shape aesthetic for plot points. +Defaults to TRUE when the number of classes is greater than 12

+
ellipses
+

TRUE/FALSE, plot multivariate normal distribution 95% +confidence ellipses for each class

+
seed
+

random number seed

+
title
+

plot title

+
legendPosition
+

legend position to pass to legend.position argument +of ggplot2::theme. Set to "none" to remove legend.

+
labelSize
+

label size. Ignored if label is NULL

+
...
+

arguments to pass to the appropriate method

+
type
+

raw or pre-treated data to plot

+
+ +
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact)
+
+## Unsupervised random forest MDS plot
+plotUnsupervisedRF(d,cls = 'day')
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/pre-treatment-parameters.html b/docs/reference/pre-treatment-parameters.html index 001466f0..e6aae3ff 100644 --- a/docs/reference/pre-treatment-parameters.html +++ b/docs/reference/pre-treatment-parameters.html @@ -1,67 +1,12 @@ - - - - - - - -Pre-treatment parameters — preTreatmentElements • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Pre-treatment parameters — preTreatmentElements • metabolyseR + + - - - - -
-
- -
- -
+
@@ -138,93 +68,87 @@

Pre-treatment parameters

Return pre-treatment elements, methods and parameters.

-
preTreatmentElements()
-
-preTreatmentMethods(element)
-
-preTreatmentParameters(methods)
- -

Arguments

- - - - - - - - - - -
element

pre-treatment element name

methods

a named list of element methods

- - -

Examples

-
## Return the availalble pre-treatment elements
-preTreatmentElements()
-#> [1] "aggregate"       "correction"      "impute"          "keep"           
-#> [5] "occupancyFilter" "QC"              "remove"          "transform"      
-
-## Return the available pre-treatment methods for the remove element
-preTreatmentMethods('remove')
-#> [1] "classes"  "features" "samples" 
-
-## Define some default pre-treatment parameters
-p <- preTreatmentParameters(
-  list(
-    remove = 'classes',
-    QC = c('RSDfilter','removeQC'),
-    transform = 'TICnorm'
-  )
-)
-
-## Assign the pre-treatment parameters to analysis parameters
-ap <- analysisParameters('pre-treatment')
-parameters(ap,'pre-treatment') <- p
-
-print(ap)
-#> Parameters:
-#> pre-treatment
-#> 	remove
-#> 		classes
-#> 			cls = class
-#> 			classes = c()
-#> 	QC
-#> 		RSDfilter
-#> 			cls = class
-#> 			QCidx = QC
-#> 			RSDthresh = 50
-#> 		removeQC
-#> 			cls = class
-#> 			QCidx = QC
-#> 	transform
-#> 		TICnorm
-#> 
-
+
+
preTreatmentElements()
+
+preTreatmentMethods(element)
+
+preTreatmentParameters(methods)
+
+ +
+

Arguments

+
element
+

pre-treatment element name

+
methods
+

a named list of element methods

+
+ +
+

Examples

+
## Return the availalble pre-treatment elements
+preTreatmentElements()
+#> [1] "aggregate"       "correction"      "impute"          "keep"           
+#> [5] "occupancyFilter" "QC"              "remove"          "transform"      
+
+## Return the available pre-treatment methods for the remove element
+preTreatmentMethods('remove')
+#> [1] "classes"  "features" "samples" 
+
+## Define some default pre-treatment parameters
+p <- preTreatmentParameters(
+  list(
+    remove = 'classes',
+    QC = c('RSDfilter','removeQC'),
+    transform = 'TICnorm'
+  )
+)
+
+## Assign the pre-treatment parameters to analysis parameters
+ap <- analysisParameters('pre-treatment')
+parameters(ap,'pre-treatment') <- p
+
+print(ap)
+#> Parameters:
+#> pre-treatment
+#> 	remove
+#> 		classes
+#> 			cls = class
+#> 			classes = c()
+#> 	QC
+#> 		RSDfilter
+#> 			cls = class
+#> 			QCidx = QC
+#> 			RSDthresh = 50
+#> 		removeQC
+#> 			cls = class
+#> 			QCidx = QC
+#> 	transform
+#> 		TICnorm
+#> 
+
+
+
-
- +
- - + + diff --git a/docs/reference/randomForest.html b/docs/reference/randomForest.html index 716951ff..a3372792 100644 --- a/docs/reference/randomForest.html +++ b/docs/reference/randomForest.html @@ -1,67 +1,12 @@ - - - - - - - -Random forest analysis — randomForest • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Random forest analysis — randomForest • metabolyseR - + + - - - -
-
- -
- -
+
@@ -138,119 +68,101 @@

Random forest analysis

Perform random forest on an AnalysisData object

-
randomForest(
-  x,
-  cls = "class",
-  rf = list(),
-  reps = 1,
-  binary = FALSE,
-  comparisons = list(),
-  perm = 0,
-  returnModels = FALSE,
-  seed = 1234
-)
-
-# S4 method for AnalysisData
-randomForest(
-  x,
-  cls = "class",
-  rf = list(),
-  reps = 1,
-  binary = FALSE,
-  comparisons = list(),
-  perm = 0,
-  returnModels = FALSE,
-  seed = 1234
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x

S4 object of class AnalysisData

cls

vector of sample information columns to use for response variable information. Set to NULL for unsupervised.

rf

named list of arguments to pass to randomForest::randomForest

reps

number of repetitions to perform

binary

TRUE/FALSE should binary comparisons be performed. Ignored for unsupervised and regression. Ignored if comparisons specified.

comparisons

list of comparisons to perform. -Ignored for unsupervised and regression. See details.

perm

number of permutations to perform. Ignored for unsupervised.

returnModels

TRUE/FALSE should model objects be returned.

seed

random number seed

- -

Value

+
+
randomForest(
+  x,
+  cls = "class",
+  rf = list(),
+  reps = 1,
+  binary = FALSE,
+  comparisons = list(),
+  perm = 0,
+  returnModels = FALSE,
+  seed = 1234
+)
+
+# S4 method for AnalysisData
+randomForest(
+  x,
+  cls = "class",
+  rf = list(),
+  reps = 1,
+  binary = FALSE,
+  comparisons = list(),
+  perm = 0,
+  returnModels = FALSE,
+  seed = 1234
+)
+
+
+

Arguments

+
x
+

S4 object of class AnalysisData

+
cls
+

vector of sample information columns to use for response variable information. Set to NULL for unsupervised.

+
rf
+

named list of arguments to pass to randomForest::randomForest

+
reps
+

number of repetitions to perform

+
binary
+

TRUE/FALSE should binary comparisons be performed. Ignored for unsupervised and regression. Ignored if comparisons specified.

+
comparisons
+

list of comparisons to perform. +Ignored for unsupervised and regression. See details.

+
perm
+

number of permutations to perform. Ignored for unsupervised.

+
returnModels
+

TRUE/FALSE should model objects be returned.

+
seed
+

random number seed

+
+
+

Value

An S4 object of class RandomForest.

-

Details

- +
+
+

Details

Specified class comparisons should be given as a list named according to cls. Comparisons should be given as class names separated by '~' (eg. '1~2~H').

+
-

Examples

-
library(metaboData)
-
-x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
-       occupancyMaximum(cls = 'day') %>%
-       transformTICnorm()
-       
-rf <- randomForest(x,cls = 'day')
-
-plotMDS(rf,cls = 'day')
-
-
+
+

Examples

+
library(metaboData)
+
+x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
+       occupancyMaximum(cls = 'day') %>%
+       transformTICnorm()
+       
+rf <- randomForest(x,cls = 'day')
+
+plotMDS(rf,cls = 'day')
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html index 2c5e14e2..2680e153 100644 --- a/docs/reference/reexports.html +++ b/docs/reference/reexports.html @@ -1,74 +1,23 @@ - - - - - - - -Objects exported from other packages — reexports • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Objects exported from other packages — reexports • metabolyseR + future +plan + magrittr +%&gt;% - - - - - - - - - + + -
-
- -
- -
+

These objects are imported from other packages. Follow the links below to see their documentation.

-
-
future

plan

+
future
+

plan

-
magrittr

%>%

-
-
+
magrittr
+

%>%

+ +
+
-
- +
- - + + diff --git a/docs/reference/remove.html b/docs/reference/remove.html index bb571e3d..258f1ef4 100644 --- a/docs/reference/remove.html +++ b/docs/reference/remove.html @@ -1,67 +1,12 @@ - - - - - - - -Remove samples, classes or features — removeClasses • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Remove samples, classes or features — removeClasses • metabolyseR - - - - + + -
-
- -
- -
+
@@ -138,126 +68,111 @@

Remove samples, classes or features

Exclusion of samples, classes or features from an AnalysisData object.

-
removeClasses(d, cls = "class", classes = c())
+    
+
removeClasses(d, cls = "class", classes = c())
 
-# S4 method for AnalysisData
-removeClasses(d, cls = "class", classes = c())
+# S4 method for AnalysisData
+removeClasses(d, cls = "class", classes = c())
 
-removeFeatures(d, features = character())
+removeFeatures(d, features = character())
 
-# S4 method for AnalysisData
-removeFeatures(d, features = character())
+# S4 method for AnalysisData
+removeFeatures(d, features = character())
 
-removeSamples(d, idx = "fileOrder", samples = c())
+removeSamples(d, idx = "fileOrder", samples = c())
 
-# S4 method for AnalysisData
-removeSamples(d, idx = "fileOrder", samples = c())
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - -
d

S4 object of class AnalysisData

cls

info column to use for class information

classes

classes to remove

features

features to remove

idx

info column containing sample indexes

samples

sample indexes to remove

- -

Value

+# S4 method for AnalysisData +removeSamples(d, idx = "fileOrder", samples = c())
+
+
+

Arguments

+
d
+

S4 object of class AnalysisData

+
cls
+

info column to use for class information

+
classes
+

classes to remove

+
features
+

features to remove

+
idx
+

info column containing sample indexes

+
samples
+

sample indexes to remove

+
+
+

Value

An S4 object of class AnalysisData with samples, classes or features removed.

-

Methods

- +
+
+

Methods

-
    -
  • removeClasses: Remove classes.

  • +
    • removeClasses: Remove classes.

    • removeFeatures: Remove features.

    • removeSamples: Remove samples.

    • -
    - - -

    Examples

    -
    library(metaboData)
    - d <- analysisData(abr1$neg[,200:300],abr1$fact)
    - 
    - ## Remove classes
    - d %>% 
    -  removeClasses(cls = 'day',classes = 'H')
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 100 
    -#> Features: 101 
    -#> Info: 9 
    -#> 
    - 
    - ## Remove features
    - d %>% 
    -  removeFeatures(features = c('N200','N201'))
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 120 
    -#> Features: 99 
    -#> Info: 9 
    -#> 
    - 
    - ## Remove samples
    - d %>% 
    -  removeSamples(idx = 'injorder',samples = c(1,10))
    -#> 
    -#> AnalysisData object containing:
    -#> 
    -#> Samples: 118 
    -#> Features: 101 
    -#> Info: 9 
    -#> 
    -
    +
+ +
+

Examples

+
library(metaboData)
+ d <- analysisData(abr1$neg[,200:300],abr1$fact)
+ 
+ ## Remove classes
+ d %>% 
+  removeClasses(cls = 'day',classes = 'H')
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 100 
+#> Features: 101 
+#> Info: 9 
+#> 
+ 
+ ## Remove features
+ d %>% 
+  removeFeatures(features = c('N200','N201'))
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 120 
+#> Features: 99 
+#> Info: 9 
+#> 
+ 
+ ## Remove samples
+ d %>% 
+  removeSamples(idx = 'injorder',samples = c(1,10))
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 118 
+#> Features: 101 
+#> Info: 9 
+#> 
+
+
+ -
- +
- - + + diff --git a/docs/reference/rsd.html b/docs/reference/rsd.html index 1e7abd8d..83e3c7f8 100644 --- a/docs/reference/rsd.html +++ b/docs/reference/rsd.html @@ -1,68 +1,13 @@ - - - - - - - -Calculate feature relative standard deviations — rsd • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Calculate feature relative standard deviations — rsd • metabolyseR + + - - - - -
-
- -
- -
+
@@ -140,74 +70,69 @@

Calculate feature relative standard deviations

feature per class for a given sample information column.

-
rsd(x, cls = "class")
-
-# S4 method for AnalysisData
-rsd(x, cls = "class")
+
+
rsd(x, cls = "class")
 
-    

Arguments

- - - - - - - - - - -
x

S4 object of class AnalysisData

cls

sample information column to use for class structure

- -

Value

+# S4 method for AnalysisData +rsd(x, cls = "class")
+
+
+

Arguments

+
x
+

S4 object of class AnalysisData

+
cls
+

sample information column to use for class structure

+
+
+

Value

A tibble containing the computed RSD values.

+
-

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg[,200:300],abr1$fact)
-
-rsd(d,cls = 'day')
-#> # A tibble: 606 × 5
-#>    day   Feature   Mean    SD   RSD
-#>    <fct> <chr>    <dbl> <dbl> <dbl>
-#>  1 1     N200    0.224  1.00  447. 
-#>  2 1     N201    0.228  0.946 415. 
-#>  3 1     N202    0.0538 0.151 280. 
-#>  4 1     N203    1.34   1.03   76.5
-#>  5 1     N204    0.0833 0.202 242. 
-#>  6 1     N205    1.55   2.29  148. 
-#>  7 1     N206    0.112  0.360 320. 
-#>  8 1     N207    0.220  0.396 180. 
-#>  9 1     N208    0.124  0.225 182. 
-#> 10 1     N209    1.37   2.03  148. 
-#> # … with 596 more rows
-
+
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact)
+
+rsd(d,cls = 'day')
+#> # A tibble: 606 × 5
+#>    day   Feature   Mean    SD   RSD
+#>    <fct> <chr>    <dbl> <dbl> <dbl>
+#>  1 1     N200    0.224  1.00  447. 
+#>  2 1     N201    0.228  0.946 415. 
+#>  3 1     N202    0.0538 0.151 280. 
+#>  4 1     N203    1.34   1.03   76.5
+#>  5 1     N204    0.0833 0.202 242. 
+#>  6 1     N205    1.55   2.29  148. 
+#>  7 1     N206    0.112  0.360 320. 
+#>  8 1     N207    0.220  0.396 180. 
+#>  9 1     N208    0.124  0.225 182. 
+#> 10 1     N209    1.37   2.03  148. 
+#> # … with 596 more rows
+
+
+
-
- +
- - + + diff --git a/docs/reference/split.html b/docs/reference/split.html index 8e294611..4819d636 100644 --- a/docs/reference/split.html +++ b/docs/reference/split.html @@ -1,68 +1,13 @@ - - - - - - - -Split an AnalysisData object — split • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Split an AnalysisData object — split • metabolyseR + + - - - - -
-
- -
- -
+
@@ -140,117 +70,112 @@

Split an AnalysisData object

a class grouping variable.

-
split(x, cls = "class")
-
-# S4 method for AnalysisData
-split(x, cls = "class")
+
+
split(x, cls = "class")
 
-    

Arguments

- - - - - - - - - - -
x

S4 object of class AnalysisData

cls

sample information column to use for splitting

- -

Value

+# S4 method for AnalysisData +split(x, cls = "class")
+
+
+

Arguments

+
x
+

S4 object of class AnalysisData

+
cls
+

sample information column to use for splitting

+
+
+

Value

A list of AnalysisData objects.

+
-

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg,abr1$fact)
-
-## Split the data set based on the 'day' class information column
-d <- split(d,cls = 'day')
-
-print(d)
-#> $`1`
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 20 
-#> Features: 2000 
-#> Info: 9 
-#> 
-#> 
-#> $`2`
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 20 
-#> Features: 2000 
-#> Info: 9 
-#> 
-#> 
-#> $`3`
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 20 
-#> Features: 2000 
-#> Info: 9 
-#> 
-#> 
-#> $`4`
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 20 
-#> Features: 2000 
-#> Info: 9 
-#> 
-#> 
-#> $`5`
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 20 
-#> Features: 2000 
-#> Info: 9 
-#> 
-#> 
-#> $H
-#> 
-#> AnalysisData object containing:
-#> 
-#> Samples: 20 
-#> Features: 2000 
-#> Info: 9 
-#> 
-#> 
-
+
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg,abr1$fact)
+
+## Split the data set based on the 'day' class information column
+d <- split(d,cls = 'day')
+
+print(d)
+#> $`1`
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 20 
+#> Features: 2000 
+#> Info: 9 
+#> 
+#> 
+#> $`2`
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 20 
+#> Features: 2000 
+#> Info: 9 
+#> 
+#> 
+#> $`3`
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 20 
+#> Features: 2000 
+#> Info: 9 
+#> 
+#> 
+#> $`4`
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 20 
+#> Features: 2000 
+#> Info: 9 
+#> 
+#> 
+#> $`5`
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 20 
+#> Features: 2000 
+#> Info: 9 
+#> 
+#> 
+#> $H
+#> 
+#> AnalysisData object containing:
+#> 
+#> Samples: 20 
+#> Features: 2000 
+#> Info: 9 
+#> 
+#> 
+
+
+
-
- +
- - + + diff --git a/docs/reference/transform-1.png b/docs/reference/transform-1.png index 38326a77..763fe6d7 100644 Binary files a/docs/reference/transform-1.png and b/docs/reference/transform-1.png differ diff --git a/docs/reference/transform-10.png b/docs/reference/transform-10.png index b15422b7..1b3a960a 100644 Binary files a/docs/reference/transform-10.png and b/docs/reference/transform-10.png differ diff --git a/docs/reference/transform-11.png b/docs/reference/transform-11.png index 538bb9ca..8cec217c 100644 Binary files a/docs/reference/transform-11.png and b/docs/reference/transform-11.png differ diff --git a/docs/reference/transform-12.png b/docs/reference/transform-12.png index e646a2fd..c192a9c5 100644 Binary files a/docs/reference/transform-12.png and b/docs/reference/transform-12.png differ diff --git a/docs/reference/transform-2.png b/docs/reference/transform-2.png index 4350ca82..9adfcd01 100644 Binary files a/docs/reference/transform-2.png and b/docs/reference/transform-2.png differ diff --git a/docs/reference/transform-3.png b/docs/reference/transform-3.png index 0c6b0611..740cdfd1 100644 Binary files a/docs/reference/transform-3.png and b/docs/reference/transform-3.png differ diff --git a/docs/reference/transform-4.png b/docs/reference/transform-4.png index 09e90f97..834c4b5c 100644 Binary files a/docs/reference/transform-4.png and b/docs/reference/transform-4.png differ diff --git a/docs/reference/transform-5.png b/docs/reference/transform-5.png index 2992c745..e6af4f12 100644 Binary files a/docs/reference/transform-5.png and b/docs/reference/transform-5.png differ diff --git a/docs/reference/transform-6.png b/docs/reference/transform-6.png index b61c3868..92dc92b3 100644 Binary files a/docs/reference/transform-6.png and b/docs/reference/transform-6.png differ diff --git a/docs/reference/transform-7.png b/docs/reference/transform-7.png index 14aa3597..27acd8d2 100644 Binary files a/docs/reference/transform-7.png and b/docs/reference/transform-7.png differ diff --git a/docs/reference/transform-8.png b/docs/reference/transform-8.png index 6d0220b5..e1c13cbc 100644 Binary files a/docs/reference/transform-8.png and b/docs/reference/transform-8.png differ diff --git a/docs/reference/transform-9.png b/docs/reference/transform-9.png index 55bca1f1..53478402 100644 Binary files a/docs/reference/transform-9.png and b/docs/reference/transform-9.png differ diff --git a/docs/reference/transform.html b/docs/reference/transform.html index e0e8a74b..e0833dbd 100644 --- a/docs/reference/transform.html +++ b/docs/reference/transform.html @@ -1,67 +1,12 @@ - - - - - - - -Scaling, transformation and normalisation methods — transformArcSine • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Scaling, transformation and normalisation methods — transformArcSine • metabolyseR - - - - + + -
-
- -
- -
+
@@ -138,90 +68,87 @@

Scaling, transformation and normalisation methods

Methods for data scaling, transformation and normalisation.

-
transformArcSine(d)
+    
+
transformArcSine(d)
 
-# S4 method for AnalysisData
-transformArcSine(d)
+# S4 method for AnalysisData
+transformArcSine(d)
 
-transformAuto(d)
+transformAuto(d)
 
-# S4 method for AnalysisData
-transformAuto(d)
+# S4 method for AnalysisData
+transformAuto(d)
 
-transformCenter(d)
+transformCenter(d)
 
-# S4 method for AnalysisData
-transformCenter(d)
+# S4 method for AnalysisData
+transformCenter(d)
 
-transformLevel(d)
+transformLevel(d)
 
-# S4 method for AnalysisData
-transformLevel(d)
+# S4 method for AnalysisData
+transformLevel(d)
 
-transformLn(d, add = 1)
+transformLn(d, add = 1)
 
-# S4 method for AnalysisData
-transformLn(d, add = 1)
+# S4 method for AnalysisData
+transformLn(d, add = 1)
 
-transformLog10(d, add = 1)
+transformLog10(d, add = 1)
 
-# S4 method for AnalysisData
-transformLog10(d, add = 1)
+# S4 method for AnalysisData
+transformLog10(d, add = 1)
 
-transformPareto(d)
+transformPareto(d)
 
-# S4 method for AnalysisData
-transformPareto(d)
+# S4 method for AnalysisData
+transformPareto(d)
 
-transformRange(d)
+transformRange(d)
 
-# S4 method for AnalysisData
-transformRange(d)
+# S4 method for AnalysisData
+transformRange(d)
 
-transformSQRT(d)
+transformSQRT(d)
 
-# S4 method for AnalysisData
-transformSQRT(d)
+# S4 method for AnalysisData
+transformSQRT(d)
 
-transformTICnorm(d)
+transformTICnorm(d)
 
-# S4 method for AnalysisData
-transformTICnorm(d)
+# S4 method for AnalysisData
+transformTICnorm(d)
 
-transformVast(d)
+transformVast(d)
 
-# S4 method for AnalysisData
-transformVast(d)
- -

Arguments

- - - - - - - - - - -
d

S4 object of class AnalysisData

add

value to add prior to transformation

- -

Value

+# S4 method for AnalysisData +transformVast(d)
+
+
+

Arguments

+
d
+

S4 object of class AnalysisData

+
add
+

value to add prior to transformation

+
+
+

Value

An S4 object of class AnalysisData containing the transformed data.

-

Details

- +
+
+

Details

Prior to downstream analyses, metabolomics data often require transformation to fulfil the assumptions of a particular statistical/data mining technique. Before applying a transformation, it is important to consider the effects that the transformation will have on the data, as this can greatly effect the outcome of further downstream analyses. It is also important to consider at what stage in the pre-treatment routine a transformation is applied as this too could introduce artefacts into the data. The best practice is to apply a transformation as the last in a pre-treatment routine after all other steps have been taken. There are a wide range of transformation methods available that are commonly used for the analysis of metabolomics data.

-

Methods

- +
+
+

Methods

-
    -
  • transformArcSine: Arc-sine transformation.

  • +
    • transformArcSine: Arc-sine transformation.

    • transformAuto: Auto scaling.

    • transformCenter: Mean centring.

    • transformLevel: Level scaling.

    • @@ -232,116 +159,113 @@

      Methods
    • transformSQRT: Square root transformation.

    • transformTICnorm: Total ion count normalisation.

    • transformVast: Vast scaling.

    • -

    - - -

    Examples

    -
    
    -## Each of the following examples shows the application of the transformation and then 
    -## a Linear Discriminant Analysis is plotted to show it's effect on the data structure.
    -
    -## Initial example data preparation
    -library(metaboData)
    -
    -d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% 
    - occupancyMaximum(occupancy = 2/3)
    -
    -d %>% 
    - plotLDA(cls = 'day')
    -
    - 
    -
    -## Arc-sine transformation
    -d %>% 
    - transformArcSine() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Auto scaling
    -d %>% 
    - transformAuto() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Mean centring
    -d %>% 
    - transformCenter()%>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Level scaling
    -d %>% 
    - transformLevel() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Natural logarithmic transformation
    -d %>% 
    - transformLn() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Logarithmic transformation
    -d %>% 
    - transformLog10()%>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Pareto scaling
    -d %>% 
    - transformPareto() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Range scaling
    -d %>% 
    - transformRange() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Square root scaling
    -d %>% 
    - transformSQRT() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Total ion count nromalisation
    -d %>% 
    - transformTICnorm() %>% 
    - plotLDA(cls = 'day')
    -
    -
    -## Vast scaling
    -d %>% 
    - transformVast() %>% 
    - plotLDA(cls = 'day')
    -
    -
    +
+ +
+

Examples

+

+## Each of the following examples shows the application of the transformation and then 
+## a Linear Discriminant Analysis is plotted to show it's effect on the data structure.
+
+## Initial example data preparation
+library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% 
+ occupancyMaximum(occupancy = 2/3)
+
+d %>% 
+ plotLDA(cls = 'day')
+
+ 
+
+## Arc-sine transformation
+d %>% 
+ transformArcSine() %>% 
+ plotLDA(cls = 'day')
+
+
+## Auto scaling
+d %>% 
+ transformAuto() %>% 
+ plotLDA(cls = 'day')
+
+
+## Mean centring
+d %>% 
+ transformCenter()%>% 
+ plotLDA(cls = 'day')
+
+
+## Level scaling
+d %>% 
+ transformLevel() %>% 
+ plotLDA(cls = 'day')
+
+
+## Natural logarithmic transformation
+d %>% 
+ transformLn() %>% 
+ plotLDA(cls = 'day')
+
+
+## Logarithmic transformation
+d %>% 
+ transformLog10()%>% 
+ plotLDA(cls = 'day')
+
+
+## Pareto scaling
+d %>% 
+ transformPareto() %>% 
+ plotLDA(cls = 'day')
+
+
+## Range scaling
+d %>% 
+ transformRange() %>% 
+ plotLDA(cls = 'day')
+
+
+## Square root scaling
+d %>% 
+ transformSQRT() %>% 
+ plotLDA(cls = 'day')
+
+
+## Total ion count nromalisation
+d %>% 
+ transformTICnorm() %>% 
+ plotLDA(cls = 'day')
+
+
+## Vast scaling
+d %>% 
+ transformVast() %>% 
+ plotLDA(cls = 'day')
+
+
+
+ -
- +
- - + + diff --git a/docs/reference/ttest.html b/docs/reference/ttest.html index e3e99f24..12d4b9ee 100644 --- a/docs/reference/ttest.html +++ b/docs/reference/ttest.html @@ -1,67 +1,12 @@ - - - - - - - -Welch's t-test — ttest • metabolyseR - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Welch's t-test — ttest • metabolyseR - - + + - - -
-
- -
- -
+
@@ -138,105 +68,94 @@

Welch's t-test

Welch's t-test

-
ttest(
-  x,
-  cls = "class",
-  pAdjust = "bonferroni",
-  comparisons = list(),
-  returnModels = FALSE
-)
-
-# S4 method for AnalysisData
-ttest(
-  x,
-  cls = "class",
-  pAdjust = "bonferroni",
-  comparisons = list(),
-  returnModels = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - -
x

S4 object of class AnalysisData

cls

vector of sample information column names to analyse

pAdjust

p value adjustment method

comparisons

named list of binary comparisons to analyse

returnModels

should models be returned

- -

Value

+
+
ttest(
+  x,
+  cls = "class",
+  pAdjust = "bonferroni",
+  comparisons = list(),
+  returnModels = FALSE
+)
+
+# S4 method for AnalysisData
+ttest(
+  x,
+  cls = "class",
+  pAdjust = "bonferroni",
+  comparisons = list(),
+  returnModels = FALSE
+)
+
+
+

Arguments

+
x
+

S4 object of class AnalysisData

+
cls
+

vector of sample information column names to analyse

+
pAdjust
+

p value adjustment method

+
comparisons
+

named list of binary comparisons to analyse

+
returnModels
+

should models be returned

+
+
+

Value

An S4 object of class Univariate.

+
-

Examples

-
library(metaboData)
-
-d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% 
- keepClasses(cls = 'day',classes = c('H','5'))
-
-## Perform t-test
-ttest_analysis <- ttest(d,cls = 'day')
-
-## Extract significant features
-explanatoryFeatures(ttest_analysis)
-#> # A tibble: 11 × 14
-#>    Response Comparison Feature estimate estimate1 estimate2 statistic    p.value
-#>    <chr>    <chr>      <chr>      <dbl>     <dbl>     <dbl>     <dbl>      <dbl>
-#>  1 day      5~H        N277       65.4      79.2     13.8        7.77    1.58e-7
-#>  2 day      5~H        N299        7.68      8.99     1.31       6.36    2.53e-6
-#>  3 day      5~H        N229       50.3      55.2      4.93       5.96    8.60e-6
-#>  4 day      5~H        N295        4.19      5.12     0.937      5.56    8.65e-6
-#>  5 day      5~H        N233       -4.65      2.68     7.33      -5.00    1.69e-5
-#>  6 day      5~H        N267       27.3      48.1     20.8        4.79    2.96e-5
-#>  7 day      5~H        N245       18.0      19.9      1.94       4.92    9.00e-5
-#>  8 day      5~H        N279        7.64      9.21     1.57       4.61    1.63e-4
-#>  9 day      5~H        N278        4.14      6.27     2.12       4.45    1.76e-4
-#> 10 day      5~H        N281        3.02      3.72     0.701      4.47    1.92e-4
-#> 11 day      5~H        N272        2.99      3.71     0.722      4.30    2.49e-4
-#> # … with 6 more variables: parameter <dbl>, conf.low <dbl>, conf.high <dbl>,
-#> #   method <chr>, alternative <chr>, adjusted.p.value <dbl>
-
+
+

Examples

+
library(metaboData)
+
+d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% 
+ keepClasses(cls = 'day',classes = c('H','5'))
+
+## Perform t-test
+ttest_analysis <- ttest(d,cls = 'day')
+
+## Extract significant features
+explanatoryFeatures(ttest_analysis)
+#> # A tibble: 11 × 14
+#>    Response Comparison Feature estimate estimate1 estimate2 statistic    p.value
+#>    <chr>    <chr>      <chr>      <dbl>     <dbl>     <dbl>     <dbl>      <dbl>
+#>  1 day      5~H        N277       65.4      79.2     13.8        7.77    1.58e-7
+#>  2 day      5~H        N299        7.68      8.99     1.31       6.36    2.53e-6
+#>  3 day      5~H        N229       50.3      55.2      4.93       5.96    8.60e-6
+#>  4 day      5~H        N295        4.19      5.12     0.937      5.56    8.65e-6
+#>  5 day      5~H        N233       -4.65      2.68     7.33      -5.00    1.69e-5
+#>  6 day      5~H        N267       27.3      48.1     20.8        4.79    2.96e-5
+#>  7 day      5~H        N245       18.0      19.9      1.94       4.92    9.00e-5
+#>  8 day      5~H        N279        7.64      9.21     1.57       4.61    1.63e-4
+#>  9 day      5~H        N278        4.14      6.27     2.12       4.45    1.76e-4
+#> 10 day      5~H        N281        3.02      3.72     0.701      4.47    1.92e-4
+#> 11 day      5~H        N272        2.99      3.71     0.722      4.30    2.49e-4
+#> # … with 6 more variables: parameter <dbl>, conf.low <dbl>, conf.high <dbl>,
+#> #   method <chr>, alternative <chr>, adjusted.p.value <dbl>
+
+
+
-
- +
- - + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 5945212f..6cae8dd2 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -1,165 +1,390 @@ - https://jasenfinch.github.io/metabolyseR//index.html + https://jasenfinch.github.io/metabolyseR/404.html - https://jasenfinch.github.io/metabolyseR//reference/Analysis-class.html + https://jasenfinch.github.io/metabolyseR/articles/01_quick_start.html - https://jasenfinch.github.io/metabolyseR//reference/AnalysisData-class.html + https://jasenfinch.github.io/metabolyseR/articles/02_introduction.html - https://jasenfinch.github.io/metabolyseR//reference/AnalysisParameters-class.html + https://jasenfinch.github.io/metabolyseR/articles/03_pre_treatment.html - https://jasenfinch.github.io/metabolyseR//reference/QC.html + https://jasenfinch.github.io/metabolyseR/articles/04_modelling.html - https://jasenfinch.github.io/metabolyseR//reference/RandomForest-class.html + https://jasenfinch.github.io/metabolyseR/articles/index.html - https://jasenfinch.github.io/metabolyseR//reference/Univariate-class.html + https://jasenfinch.github.io/metabolyseR/articles/introduction.html - https://jasenfinch.github.io/metabolyseR//reference/aggregate.html + https://jasenfinch.github.io/metabolyseR/articles/metabolyseR.html - https://jasenfinch.github.io/metabolyseR//reference/analysis-accessors.html + https://jasenfinch.github.io/metabolyseR/articles/modelling.html - https://jasenfinch.github.io/metabolyseR//reference/analysisData.html + https://jasenfinch.github.io/metabolyseR/articles/pre_treatment.html - https://jasenfinch.github.io/metabolyseR//reference/analysisElements.html + https://jasenfinch.github.io/metabolyseR/articles/quick_start.html - https://jasenfinch.github.io/metabolyseR//reference/analysisParameters.html + https://jasenfinch.github.io/metabolyseR/authors.html - https://jasenfinch.github.io/metabolyseR//reference/anova.html + https://jasenfinch.github.io/metabolyseR/index.html - https://jasenfinch.github.io/metabolyseR//reference/bind.html + https://jasenfinch.github.io/metabolyseR/news/index.html - https://jasenfinch.github.io/metabolyseR//reference/changeParameter.html + https://jasenfinch.github.io/metabolyseR/reference/Analysis-class.html - https://jasenfinch.github.io/metabolyseR//reference/cls.html + https://jasenfinch.github.io/metabolyseR/reference/AnalysisData-class.html - https://jasenfinch.github.io/metabolyseR//reference/correction.html + https://jasenfinch.github.io/metabolyseR/reference/AnalysisParameters-class.html - https://jasenfinch.github.io/metabolyseR//reference/correlations.html + https://jasenfinch.github.io/metabolyseR/reference/QC.html - https://jasenfinch.github.io/metabolyseR//reference/correlationsParameters.html + https://jasenfinch.github.io/metabolyseR/reference/QCimpute.html - https://jasenfinch.github.io/metabolyseR//reference/impute.html + https://jasenfinch.github.io/metabolyseR/reference/QCoccupancy.html - https://jasenfinch.github.io/metabolyseR//reference/io-parameters.html + https://jasenfinch.github.io/metabolyseR/reference/QCremove.html - https://jasenfinch.github.io/metabolyseR//reference/keep.html + https://jasenfinch.github.io/metabolyseR/reference/QCrsdFilter.html - https://jasenfinch.github.io/metabolyseR//reference/linearRegression.html + https://jasenfinch.github.io/metabolyseR/reference/RandomForest-class.html - https://jasenfinch.github.io/metabolyseR//reference/metabolyse.html + https://jasenfinch.github.io/metabolyseR/reference/Univariate-class.html - https://jasenfinch.github.io/metabolyseR//reference/modelling-accessors.html + https://jasenfinch.github.io/metabolyseR/reference/aggregate.html - https://jasenfinch.github.io/metabolyseR//reference/modelling-parameters.html + https://jasenfinch.github.io/metabolyseR/reference/aggregateMean.html - https://jasenfinch.github.io/metabolyseR//reference/occupancy.html + https://jasenfinch.github.io/metabolyseR/reference/aggregateMedian.html - https://jasenfinch.github.io/metabolyseR//reference/occupancyFilter.html + https://jasenfinch.github.io/metabolyseR/reference/aggregateSum.html - https://jasenfinch.github.io/metabolyseR//reference/parameters.html + https://jasenfinch.github.io/metabolyseR/reference/analysis-accessors.html - https://jasenfinch.github.io/metabolyseR//reference/plotExplanatoryHeatmap.html + https://jasenfinch.github.io/metabolyseR/reference/analysisData.html - https://jasenfinch.github.io/metabolyseR//reference/plotFeature.html + https://jasenfinch.github.io/metabolyseR/reference/analysisElements.html - https://jasenfinch.github.io/metabolyseR//reference/plotImportance.html + https://jasenfinch.github.io/metabolyseR/reference/analysisParameters.html - https://jasenfinch.github.io/metabolyseR//reference/plotLDA.html + https://jasenfinch.github.io/metabolyseR/reference/analysisResults.html - https://jasenfinch.github.io/metabolyseR//reference/plotMDS.html + https://jasenfinch.github.io/metabolyseR/reference/anova.html - https://jasenfinch.github.io/metabolyseR//reference/plotMetrics.html + https://jasenfinch.github.io/metabolyseR/reference/binaryComparisons.html - https://jasenfinch.github.io/metabolyseR//reference/plotOccupancy.html + https://jasenfinch.github.io/metabolyseR/reference/bind.html - https://jasenfinch.github.io/metabolyseR//reference/plotPCA.html + https://jasenfinch.github.io/metabolyseR/reference/bindAnalysesRows.html - https://jasenfinch.github.io/metabolyseR//reference/plotROC.html + https://jasenfinch.github.io/metabolyseR/reference/changeParameter.html - https://jasenfinch.github.io/metabolyseR//reference/plotRSD.html + https://jasenfinch.github.io/metabolyseR/reference/cls.html - https://jasenfinch.github.io/metabolyseR//reference/plotSupervisedRF.html + https://jasenfinch.github.io/metabolyseR/reference/clsAdd.html - https://jasenfinch.github.io/metabolyseR//reference/plotTIC.html + https://jasenfinch.github.io/metabolyseR/reference/clsArrange.html - https://jasenfinch.github.io/metabolyseR//reference/plotUnsupervisedRF.html + https://jasenfinch.github.io/metabolyseR/reference/clsAvailable.html - https://jasenfinch.github.io/metabolyseR//reference/pre-treatment-parameters.html + https://jasenfinch.github.io/metabolyseR/reference/clsExtract.html - https://jasenfinch.github.io/metabolyseR//reference/randomForest.html + https://jasenfinch.github.io/metabolyseR/reference/clsRemove.html - https://jasenfinch.github.io/metabolyseR//reference/reexports.html + https://jasenfinch.github.io/metabolyseR/reference/clsRename.html - https://jasenfinch.github.io/metabolyseR//reference/remove.html + https://jasenfinch.github.io/metabolyseR/reference/clsReplace.html - https://jasenfinch.github.io/metabolyseR//reference/rsd.html + https://jasenfinch.github.io/metabolyseR/reference/correction.html - https://jasenfinch.github.io/metabolyseR//reference/split.html + https://jasenfinch.github.io/metabolyseR/reference/correctionCenter.html - https://jasenfinch.github.io/metabolyseR//reference/transform.html + https://jasenfinch.github.io/metabolyseR/reference/correlations.html - https://jasenfinch.github.io/metabolyseR//reference/ttest.html + https://jasenfinch.github.io/metabolyseR/reference/correlationsParameters.html - https://jasenfinch.github.io/metabolyseR//articles/metabolyseR.html + https://jasenfinch.github.io/metabolyseR/reference/dat.html - https://jasenfinch.github.io/metabolyseR//articles/modelling.html + https://jasenfinch.github.io/metabolyseR/reference/explanatoryFeatures.html - https://jasenfinch.github.io/metabolyseR//articles/pre_treatment.html + https://jasenfinch.github.io/metabolyseR/reference/exportParameters.html - https://jasenfinch.github.io/metabolyseR//articles/quick_start.html + https://jasenfinch.github.io/metabolyseR/reference/features.html + + + https://jasenfinch.github.io/metabolyseR/reference/importance.html + + + https://jasenfinch.github.io/metabolyseR/reference/importanceMetrics.html + + + https://jasenfinch.github.io/metabolyseR/reference/impute.html + + + https://jasenfinch.github.io/metabolyseR/reference/imputeAll.html + + + https://jasenfinch.github.io/metabolyseR/reference/imputeClass.html + + + https://jasenfinch.github.io/metabolyseR/reference/index.html + + + https://jasenfinch.github.io/metabolyseR/reference/io-parameters.html + + + https://jasenfinch.github.io/metabolyseR/reference/keep.html + + + https://jasenfinch.github.io/metabolyseR/reference/keepClasses.html + + + https://jasenfinch.github.io/metabolyseR/reference/keepFeatures.html + + + https://jasenfinch.github.io/metabolyseR/reference/keepSamples.html + + + https://jasenfinch.github.io/metabolyseR/reference/linearRegression.html + + + https://jasenfinch.github.io/metabolyseR/reference/metabolyse.html + + + https://jasenfinch.github.io/metabolyseR/reference/metrics.html + + + https://jasenfinch.github.io/metabolyseR/reference/modelling-accessors.html + + + https://jasenfinch.github.io/metabolyseR/reference/modelling-parameters.html + + + https://jasenfinch.github.io/metabolyseR/reference/modellingMethods.html + + + https://jasenfinch.github.io/metabolyseR/reference/modellingParameters.html + + + https://jasenfinch.github.io/metabolyseR/reference/nFeatures.html + + + https://jasenfinch.github.io/metabolyseR/reference/nSamples.html + + + https://jasenfinch.github.io/metabolyseR/reference/occupancy.html + + + https://jasenfinch.github.io/metabolyseR/reference/occupancyFilter.html + + + https://jasenfinch.github.io/metabolyseR/reference/occupancyMaximum.html + + + https://jasenfinch.github.io/metabolyseR/reference/occupancyMinimum.html + + + https://jasenfinch.github.io/metabolyseR/reference/parameters.html + + + https://jasenfinch.github.io/metabolyseR/reference/parseParameters.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotFeature.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotImportance.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotLDA.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotMDS.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotMetrics.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotOccupancy.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotPCA.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotROC.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotRSD.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotSupervisedRF.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotTIC.html + + + https://jasenfinch.github.io/metabolyseR/reference/plotUnsupervisedRF.html + + + https://jasenfinch.github.io/metabolyseR/reference/pre-treatment-parameters.html + + + https://jasenfinch.github.io/metabolyseR/reference/preTreated.html + + + https://jasenfinch.github.io/metabolyseR/reference/preTreatmentElements.html + + + https://jasenfinch.github.io/metabolyseR/reference/preTreatmentMethods.html + + + https://jasenfinch.github.io/metabolyseR/reference/preTreatmentParameters.html + + + https://jasenfinch.github.io/metabolyseR/reference/randomForest.html + + + https://jasenfinch.github.io/metabolyseR/reference/raw.html + + + https://jasenfinch.github.io/metabolyseR/reference/reAnalyse.html + + + https://jasenfinch.github.io/metabolyseR/reference/reexports.html + + + https://jasenfinch.github.io/metabolyseR/reference/remove.html + + + https://jasenfinch.github.io/metabolyseR/reference/removeClasses.html + + + https://jasenfinch.github.io/metabolyseR/reference/removeFeatures.html + + + https://jasenfinch.github.io/metabolyseR/reference/removeSamples.html + + + https://jasenfinch.github.io/metabolyseR/reference/response.html + + + https://jasenfinch.github.io/metabolyseR/reference/rsd.html + + + https://jasenfinch.github.io/metabolyseR/reference/show-Analysis-method.html + + + https://jasenfinch.github.io/metabolyseR/reference/show-AnalysisData-method.html + + + https://jasenfinch.github.io/metabolyseR/reference/show-AnalysisParameters-method.html + + + https://jasenfinch.github.io/metabolyseR/reference/show-RandomForest-method.html + + + https://jasenfinch.github.io/metabolyseR/reference/show-Univariate-method.html + + + https://jasenfinch.github.io/metabolyseR/reference/sinfo.html + + + https://jasenfinch.github.io/metabolyseR/reference/split.html + + + https://jasenfinch.github.io/metabolyseR/reference/transform.html + + + https://jasenfinch.github.io/metabolyseR/reference/transformArcSine.html + + + https://jasenfinch.github.io/metabolyseR/reference/transformAuto.html + + + https://jasenfinch.github.io/metabolyseR/reference/transformCenter.html + + + https://jasenfinch.github.io/metabolyseR/reference/transformLevel.html + + + https://jasenfinch.github.io/metabolyseR/reference/transformLn.html + + + https://jasenfinch.github.io/metabolyseR/reference/transformLog10.html + + + https://jasenfinch.github.io/metabolyseR/reference/transformPareto.html + + + https://jasenfinch.github.io/metabolyseR/reference/transformRange.html + + + https://jasenfinch.github.io/metabolyseR/reference/transformSQRT.html + + + https://jasenfinch.github.io/metabolyseR/reference/transformTICnorm.html + + + https://jasenfinch.github.io/metabolyseR/reference/transformVast.html + + + https://jasenfinch.github.io/metabolyseR/reference/ttest.html + + + https://jasenfinch.github.io/metabolyseR/reference/type.html diff --git a/tests/testthat/test-plotLDA.R b/tests/testthat/test-plotLDA.R index f6c69229..a835f602 100644 --- a/tests/testthat/test-plotLDA.R +++ b/tests/testthat/test-plotLDA.R @@ -26,9 +26,29 @@ test_that('plotLDA throws error when wrong type specified for Analysis',{ expect_error(plotLDA(d,type = 'wrong')) }) +test_that('A warning is thrown when a single replicate class is included',{ + d <- analysisData(abr1$neg[,200:300],abr1$fact) %>% + occupancyMaximum(cls = 'day') + + d <- d %>% + removeSamples(idx = 'injorder', + samples = c(6,13,30,31,32,38, + 41,58,62,63,70, + 87,88,93,99,102, + 103,107, 120)) + + expect_warning(plotLDA(d,cls = 'day')) +}) + test_that('plotLDA throws error when number of classes is less than 2',{ d <- analysisData(abr1$neg,abr1$fact) %>% - keepClasses(classes = 1) + keepClasses(cls = 'day', + classes = '1') + expect_error(plotLDA(d,cls = 'day')) +}) + +test_that('plotLDA throws error when numeric classes specified',{ + d <- analysisData(abr1$neg,abr1$fact) expect_error(plotLDA(d)) })