diff --git a/DESCRIPTION b/DESCRIPTION index 3722e986..6170e610 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: metabolyseR Title: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data -Version: 0.14.6 +Version: 0.14.7 Authors@R: person("Jasen", "Finch", email = "jsf9@aber.ac.uk", role = c("aut", "cre")) Description: A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data. URL: https://jasenfinch.github.io/metabolyseR diff --git a/NAMESPACE b/NAMESPACE index 903c998c..4a9d6df1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -190,6 +190,7 @@ importFrom(magrittr,set_colnames) importFrom(magrittr,set_names) importFrom(magrittr,set_rownames) importFrom(methods,"slot<-") +importFrom(methods,as) importFrom(methods,new) importFrom(methods,show) importFrom(methods,slot) diff --git a/NEWS.md b/NEWS.md index 9de6357a..92d4f501 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,10 @@ +# metabolyseR 0.14.7 + +* Single replicate classes now automatically removed by [`plotLDA()`](https://jasenfinch.github.io/metabolyseR/reference/plotLDA.html). + # metabolyseR 0.14.6 -* [`plotExplanatoryHeatmap`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html) method for the [`Analysis`](https://jasenfinch.github.io/metabolyseR/reference/Analysis-class.html) class now returns the plot only if the number of plots is equal to 1. +* [`plotExplanatoryHeatmap()`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html) method for the [`Analysis`](https://jasenfinch.github.io/metabolyseR/reference/Analysis-class.html) class now returns the plot only if the number of plots is equal to 1. * Removed reference to the `nCores` parameter from the documentation example of [`metabolyse()`](https://jasenfinch.github.io/metabolyseR/reference/metabolyse.html). diff --git a/R/allClasses.R b/R/allClasses.R index 1a05c5c9..6edd9228 100644 --- a/R/allClasses.R +++ b/R/allClasses.R @@ -115,3 +115,22 @@ setClass('Univariate', models = 'list', results = 'tbl_df' )) + +setClass('LDA', + contains = 'AnalysisData', + slots = list( + stats = 'tbl_df', + Tw = 'numeric', + rankmat = 'numeric', + means = 'numeric', + loadings = 'tbl_df', + x = 'tbl_df', + xmeans = 'tbl_df', + pred = 'factor', + cl = 'factor', + prior = 'numeric', + conf = 'table', + acc = 'numeric', + lev = 'character', + call = 'call' + )) \ No newline at end of file diff --git a/R/nlda.R b/R/nlda.R index fb13b859..fdb4f937 100644 --- a/R/nlda.R +++ b/R/nlda.R @@ -1,141 +1,156 @@ -## FIEmspro https://github.com/aberHRML/FIEmspro nlda functionality +## Based on FIEmspro https://github.com/aberHRML/FIEmspro nlda functionality -#'@importFrom e1071 naiveBayes -#'@importFrom stats cov predict +setGeneric('nlda',function(x,cls = 'class',prior = NULL,scale = FALSE,comprank = FALSE,...) + standardGeneric('nlda')) -`nlda.default` <- - function(dat,cl, prior=NULL,scale=FALSE,comprank = FALSE,...) - { - if (missing(dat) || missing(cl)) - stop("data set or class are missing") - dat <- as.matrix(dat) - - cl <- as.factor(cl) - if (any(table(cl) == 0)) stop("Can't have empty classes in cl.") - - if (nrow(dat) != length(cl)) stop("dat and cl don't match.") - if (length(unique(cl)) < 2) - stop("Classification needs at least two classes.") - if (any(is.na(dat)) || any(is.na(cl))) - stop("NA is not permitted in data set or class labels.") - - if(is.null(prior)){prior <- as.vector(table(cl)/length(cl))} - if(is.null(names(prior))){names(prior) <- levels(cl)} - - pc <- prcomp(dat,scale=scale) - - rankmat <- max(1,ncol(pc$x)-1) - - if(comprank == TRUE) - rankmat <- qr(cov(dat)*(dim(dat)[1]-1))$rank - score <- pc$x[,1:rankmat,drop=FALSE] - - g <- nlevels(cl) - mx <- apply(score,2,mean) - t <- matrix(0,nrow = rankmat,ncol=rankmat) - W <- matrix(0,nrow = rankmat,ncol=rankmat) - for(j in 1:g){ - idx <- which(cl==levels(cl)[j]) - L <- length(idx) - K <- score[idx,,drop=FALSE] - zz <- apply(K,2,mean) - A <- K - t(matrix(rep(mx, L),length(mx),L)) - C <- K - t(matrix(rep(zz, L),length(zz),L)) - t <- t + t(A)%*%A - W <- W + t(C)%*%C - } - B <- t-W - - Ng <- nrow(score)-g - P <- W/(Ng) - eP <- eigen(P) - ord <- sort.list(eP$values) - V <- sweep( - eP$vectors[,ord,drop=FALSE], - 2, - sqrt(colSums(eP$vectors[,ord,drop=FALSE]^2)), - "/") - Dg <- eP$values[ord] - nDg <- length(Dg) - Dmean <- sum(diag(P))/nDg - Dn <- matrix(0,nDg,nDg) - for(i in 1:nDg) - Dn[i,i] <- max(c(Dg[i],Dmean)) - - Wn <- V%*%Dn%*%t(V)*Ng - ratio <- solve(Wn)%*%B - er <- eigen(ratio) - ev <- Re(er$values) - ev[Im(er$values)>0] <- 0 - vec <- Re(er$vectors) - ord <- sort.list(ev,decreasing=TRUE) - vec <- sweep( - vec[,ord,drop=FALSE], - 2, - sqrt(colSums(vec[,ord,drop=FALSE]^2)), - "/") - ev <- ev[ord] - maxg <- min(c(g-1,dim(vec)[1])) - vec <- vec[,1:maxg] ## discriminant functions - Tw <- ev[1:maxg] - names(Tw) <- paste("DF", 1:maxg, sep = "") - - ## get stats here - flip <- function(x) x[rev(seq_along(x))] - n <- dim(dat)[1] - st <- matrix(0,length(Tw),3) - st[,1] <- round(Tw,3) - st[,2] <- round(Tw*100/sum(Tw),3) - st[,3] <- round(sqrt(Tw/(1+Tw)),3) - st <- as.data.frame(st) - dimnames(st) <- list( - paste("DF", 1:maxg, sep = ""), - c("Eig","Perceig","Cancor")) - - res <- list() - res$stats <- st - res$Tw <- Tw - res$rankmat <- rankmat - res$means <- pc$center - res$loadings <- pc$rotation[,1:rankmat,drop=FALSE] %*% - vec ## discriminant functions with PCA - - colnames(res$loadings) <- paste("DF", 1:maxg, sep = "") - - ## rotated data (projection) - x <- sweep(dat, 2, res$means) %*% res$loadings - - ## group means based on the rotated data - xmeans <- tapply(x, list(rep(cl,ncol(x)),col(x)), mean) - dimnames(xmeans)[[2]] <- colnames(x) - - #if(type==1){ - # mdist=as.matrix(dist(rbind(xmeans,x))) - # mdist=mdist[1:g,(g+1):ncol(mdist)] - # prob= (1-t(sweep(mdist,2,apply(mdist,2,sum),"/")))/(g-1) - # pred = apply(prob,1,which.max) - # pred <- factor(dimnames(prob)[[2]][pred], levels = levels(cl)) - #} - #else{ - nbmod <- naiveBayes(data.frame(x),cl) - prob <- predict(nbmod,data.frame(x),type="raw") - pred <- apply(prob,1,which.max) - pred <- factor(levels(cl)[pred], levels = levels(cl)) - #} - res$x <- x - res$xmeans <- xmeans - res$pred <- pred - res$cl <- cl - res$prior <- prior - res$conf <- table(cl,pred) - res$acc <- round(sum(diag(res$conf))*100/nrow(dat),2) - res$lev <- levels(cl) - res$call <- match.call() - res$call[[1]] <- as.name("nlda") - class(res) <- "nlda" - - return(res) - } +#' @importFrom e1071 naiveBayes +#' @importFrom stats cov predict +#' @importFrom methods as - -nlda <- function (dat, ...) UseMethod ("nlda") +setMethod('nlda',signature = 'AnalysisData', + function(x,cls = 'class',prior=NULL,scale=FALSE,comprank = FALSE,...) { + + cl <- x %>% + clsExtract(cls) + + if (is.numeric(cl)) + stop('Classes should not be numeric',call. = FALSE) + + cl <- factor(cl,levels = unique(cl)) + + if (any(table(cl) < 2)) { + remove_classes <- cl %>% + table() %>% + names() %>% + {.[table(cl) < 2]} + + x <- x %>% + removeClasses(cls = cls, + classes = remove_classes) + + warning(str_c('Classes with a single replicate removed: ', + str_c(str_c('"', + remove_classes, + '"'), + collapse = ', ')), + call. = FALSE) + + cl <- x %>% + clsExtract(cls) %>% + factor(levels = unique(.)) + } + + if (length(unique(cl)) < 2) + stop('More than 1 class needed for PC-LDA.',call. = FALSE) + + d <- x %>% + dat() %>% + as.matrix() + + if(is.null(prior)){prior <- as.vector(table(cl)/length(cl))} + if(is.null(names(prior))){names(prior) <- levels(cl)} + + pc <- prcomp(d,scale=scale) + + rankmat <- max(1,ncol(pc$x)-1) + + if(comprank == TRUE) + rankmat <- qr(cov(d)*(dim(d)[1]-1))$rank + score <- pc$x[,1:rankmat,drop=FALSE] + + g <- nlevels(cl) + mx <- apply(score,2,mean) + t <- matrix(0,nrow = rankmat,ncol=rankmat) + W <- matrix(0,nrow = rankmat,ncol=rankmat) + for(j in 1:g){ + idx <- which(cl==levels(cl)[j]) + L <- length(idx) + K <- score[idx,,drop=FALSE] + zz <- apply(K,2,mean) + A <- K - t(matrix(rep(mx, L),length(mx),L)) + C <- K - t(matrix(rep(zz, L),length(zz),L)) + t <- t + t(A)%*%A + W <- W + t(C)%*%C + } + B <- t-W + + Ng <- nrow(score)-g + P <- W/(Ng) + eP <- eigen(P) + ord <- sort.list(eP$values) + V <- sweep( + eP$vectors[,ord,drop=FALSE], + 2, + sqrt(colSums(eP$vectors[,ord,drop=FALSE]^2)), + "/") + Dg <- eP$values[ord] + nDg <- length(Dg) + Dmean <- sum(diag(P))/nDg + Dn <- matrix(0,nDg,nDg) + for(i in 1:nDg) + Dn[i,i] <- max(c(Dg[i],Dmean)) + + Wn <- V%*%Dn%*%t(V)*Ng + ratio <- solve(Wn)%*%B + er <- eigen(ratio) + ev <- Re(er$values) + ev[Im(er$values)>0] <- 0 + vec <- Re(er$vectors) + ord <- sort.list(ev,decreasing=TRUE) + vec <- sweep( + vec[,ord,drop=FALSE], + 2, + sqrt(colSums(vec[,ord,drop=FALSE]^2)), + "/") + ev <- ev[ord] + maxg <- min(c(g-1,dim(vec)[1])) + vec <- vec[,1:maxg] ## discriminant functions + Tw <- ev[1:maxg] + names(Tw) <- paste("DF", 1:maxg, sep = "") + + ## get stats here + flip <- function(x) x[rev(seq_along(x))] + n <- dim(d)[1] + st <- matrix(0,length(Tw),3) + st[,1] <- round(Tw,3) + st[,2] <- round(Tw*100/sum(Tw),3) + st[,3] <- round(sqrt(Tw/(1+Tw)),3) + st <- as.data.frame(st) + dimnames(st) <- list( + paste("DF", 1:maxg, sep = ""), + c("Eig","Perceig","Cancor")) + + res <- as(x,'LDA') + res@stats <- as_tibble(st) + res@Tw <- Tw + res@rankmat <- rankmat + res@means <- pc$center + + loadings <- pc$rotation[,1:rankmat,drop=FALSE] %*% vec + colnames(loadings) <- paste("DF", 1:maxg, sep = "") + x <- sweep(d, 2, res@means) %*% loadings + + ## group means based on the rotated data + xmeans <- tapply(x, list(rep(cl,ncol(x)),col(x)), mean) + dimnames(xmeans)[[2]] <- colnames(x) + + nbmod <- naiveBayes(data.frame(x),cl) + prob <- predict(nbmod,data.frame(x),type="raw") + pred <- apply(prob,1,which.max) + pred <- factor(levels(cl)[pred], levels = levels(cl)) + + res@loadings <- as_tibble(loadings) + res@x <- as_tibble(x) + res@xmeans <- as_tibble(xmeans) + res@pred <- pred + res@cl <- cl + res@prior <- prior + res@conf <- table(cl,pred) + res@acc <- round(sum(diag(res@conf))*100/nrow(d),2) + res@lev <- levels(cl) + res@call <- match.call() + res@call[[1]] <- as.name("nlda") + + return(res) + } +) diff --git a/R/plotLDA.R b/R/plotLDA.R index 3b326780..7f427619 100644 --- a/R/plotLDA.R +++ b/R/plotLDA.R @@ -63,24 +63,16 @@ setMethod('plotLDA', legendPosition = 'bottom', labelSize = 2){ - classLength <- clsLen(analysis,cls) + lda <- nlda(analysis,cls = cls,scale = scale,center = center) - if (classLength < 2) { - stop('More than 1 class needed for PC-LDA.',call. = FALSE) - } - - info <- analysis %>% - clsExtract(cls) %>% - factor() - - lda <- nlda(dat(analysis),cl = info,scale = scale,center = center) - - tw <- lda$Tw %>% + tw <- lda@Tw %>% round(2) - lda <- lda$x %>% + classLength <- clsLen(lda,cls = cls) + + lda <- lda@x %>% as_tibble() %>% - mutate(!!cls := info) + mutate(!!cls := lda@cl) if (classLength > 2) { lda <- lda %>% @@ -92,8 +84,6 @@ setMethod('plotLDA', select(all_of(label))) } - classLength <- clsLen(analysis,cls) - pl <- scatterPlot(lda, cls, xAxis, diff --git a/docs/404.html b/docs/404.html index 6fe14936..8e202619 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,66 +1,27 @@ - - -
- + + + + -## .
+## 1 2 3 4 5 H
+## 20 20 20 20 20 20
It can be seen that there are 20 samples available in each class.
Another example is the addition of a new sample information column. In the following, a column called new_class
will be added with all samples labelled 1
.
-d <- clsAdd(d,cls = 'new_class',value = rep(1,nSamples(d)))
-clsAvailable(d)
## [1] "injorder" "pathcdf" "filecdf" "name.org" "remark" "name"
-## [7] "rep" "day" "class" "new_class"
+d <- clsAdd(d,cls = 'new_class',value = rep(1,nSamples(d)))
+clsAvailable(d)
## [1] "injorder" "pathcdf" "filecdf" "name.org" "remark" "name"
+## [7] "rep" "day" "class" "new_class"
-Samples or features can easily be kept or removed from an AnalysisData
object as is most convenient.
Below can be seen the first 6 sample indexes in the injorder
column of the sample information.
-samples <- d %>%
- clsExtract(cls = 'injorder') %>%
- head()
+samples <- d %>%
+ clsExtract(cls = 'injorder') %>%
+ head()
-print(samples)
## [1] 1 2 3 4 5 6
+print(samples)## [1] 1 2 3 4 5 6
Only these samples could be kept using:
-d %>%
- keepSamples(idx = 'injorder',samples = samples)
##
-## AnalysisData object containing:
-##
-## Samples: 6
-## Features: 2000
-## Info: 10
+d %>%
+ keepSamples(idx = 'injorder',samples = samples)
##
+## AnalysisData object containing:
+##
+## Samples: 6
+## Features: 2000
+## Info: 10
Or removed using:
-d %>%
- removeSamples(idx = 'injorder',samples = samples)
##
-## AnalysisData object containing:
-##
-## Samples: 114
-## Features: 2000
-## Info: 10
+d %>%
+ removeSamples(idx = 'injorder',samples = samples)
+##
+## AnalysisData object containing:
+##
+## Samples: 114
+## Features: 2000
+## Info: 10
The process is very similar for keeping or removing specific metabolome features from the data table. Below can be seen the first 6 feature names in the data table.
-## [1] "N1" "N2" "N3" "N4" "N5" "N6"
+print(feat)
+## [1] "N1" "N2" "N3" "N4" "N5" "N6"
Only these features can be kept using:
-d %>%
- keepFeatures(features = feat)
##
-## AnalysisData object containing:
-##
-## Samples: 120
-## Features: 6
-## Info: 10
+d %>%
+ keepFeatures(features = feat)
+##
+## AnalysisData object containing:
+##
+## Samples: 120
+## Features: 6
+## Info: 10
Or to remove these features:
-d %>%
- removeFeatures(features = feat)
##
-## AnalysisData object containing:
-##
-## Samples: 120
-## Features: 1994
-## Info: 10
+d %>%
+ removeFeatures(features = feat)
+##
+## AnalysisData object containing:
+##
+## Samples: 120
+## Features: 1994
+## Info: 10
-Routine analyses are those that are often made up of numerous steps where parameters have likely already been previously established. The emphasis here is on convenience with as little code as possible required. In these analyses, the necessary analysis elements, order and parameters are first prepared and then the analysis routine subsequently performed in a single step. This section will introduce how this type of analysis can be performed using metabolyseR and will include four main topics:
Parameter selection is the fundamental aspect for performing routine analyses using metabolyseR and will be the step requiring the most input from the user. The parameters for an analysis are stored in an S4 object of class AnalysisParameters
containing the relevant parameters of the selected analysis elements.
The parameters have been named so that they denote the same functionality commonly across all analysis element methods. Discussion of the specific parameters can be found withing the vignettes of the relevant analysis elements. These can be accessed using:
-browseVignettes('metabolyseR')
browseVignettes('metabolyseR')
There are several ways to specify the parameters to use for analysis. The first is programatically and the second is through the use of the YAML format.
-The available analysis elements can be shown using:
-## [1] "pre-treatment" "modelling" "correlations"
+## [1] "pre-treatment" "modelling" "correlations"
The analysisParameters()
function can be used to create an AnalysisParameters
object containing the default parameters. For example, the code below will return default parameters for all the metabolyseR analysis elements.
p <- analysisParameters()
p
## Parameters:
-## pre-treatment
-## QC
-## occupancyFilter
-## cls = class
-## QCidx = QC
-## occupancy = 2/3
-## impute
-## cls = class
-## QCidx = QC
-## occupancy = 2/3
-## parallel = variables
-## seed = 1234
-## RSDfilter
-## cls = class
-## QCidx = QC
-## RSDthresh = 50
-## removeQC
-## cls = class
-## QCidx = QC
-## occupancyFilter
-## maximum
-## cls = class
-## occupancy = 2/3
-## impute
-## class
-## cls = class
-## occupancy = 2/3
-## seed = 1234
-## transform
-## TICnorm
-##
-## modelling
-## randomForest
-## cls = class
-## rf = list()
-## reps = 1
-## binary = FALSE
-## comparisons = list()
-## perm = 0
-## returnModels = FALSE
-## seed = 1234
-##
-## correlations
-## method = pearson
-## pAdjustMethod = bonferroni
-## corPvalue = 0.05
+## Parameters:
+## pre-treatment
+## QC
+## occupancyFilter
+## cls = class
+## QCidx = QC
+## occupancy = 2/3
+## impute
+## cls = class
+## QCidx = QC
+## occupancy = 2/3
+## parallel = variables
+## seed = 1234
+## RSDfilter
+## cls = class
+## QCidx = QC
+## RSDthresh = 50
+## removeQC
+## cls = class
+## QCidx = QC
+## occupancyFilter
+## maximum
+## cls = class
+## occupancy = 2/3
+## impute
+## class
+## cls = class
+## occupancy = 2/3
+## seed = 1234
+## transform
+## TICnorm
+##
+## modelling
+## randomForest
+## cls = class
+## rf = list()
+## reps = 1
+## binary = FALSE
+## comparisons = list()
+## perm = 0
+## returnModels = FALSE
+## seed = 1234
+##
+## correlations
+## method = pearson
+## pAdjustMethod = bonferroni
+## corPvalue = 0.05
To retrieve parameters for a subset of analysis elements the following can be run, returning parameters for only the pre-treatment and modelling elements.
-p <- analysisParameters(c('pre-treatment','modelling'))
+p <- analysisParameters(c('pre-treatment','modelling'))
p
## Parameters:
-## pre-treatment
-## QC
-## occupancyFilter
-## cls = class
-## QCidx = QC
-## occupancy = 2/3
-## impute
-## cls = class
-## QCidx = QC
-## occupancy = 2/3
-## parallel = variables
-## seed = 1234
-## RSDfilter
-## cls = class
-## QCidx = QC
-## RSDthresh = 50
-## removeQC
-## cls = class
-## QCidx = QC
-## occupancyFilter
-## maximum
-## cls = class
-## occupancy = 2/3
-## impute
-## class
-## cls = class
-## occupancy = 2/3
-## seed = 1234
-## transform
-## TICnorm
-##
-## modelling
-## randomForest
-## cls = class
-## rf = list()
-## reps = 1
-## binary = FALSE
-## comparisons = list()
-## perm = 0
-## returnModels = FALSE
-## seed = 1234
+## Parameters:
+## pre-treatment
+## QC
+## occupancyFilter
+## cls = class
+## QCidx = QC
+## occupancy = 2/3
+## impute
+## cls = class
+## QCidx = QC
+## occupancy = 2/3
+## parallel = variables
+## seed = 1234
+## RSDfilter
+## cls = class
+## QCidx = QC
+## RSDthresh = 50
+## removeQC
+## cls = class
+## QCidx = QC
+## occupancyFilter
+## maximum
+## cls = class
+## occupancy = 2/3
+## impute
+## class
+## cls = class
+## occupancy = 2/3
+## seed = 1234
+## transform
+## TICnorm
+##
+## modelling
+## randomForest
+## cls = class
+## rf = list()
+## reps = 1
+## binary = FALSE
+## comparisons = list()
+## perm = 0
+## returnModels = FALSE
+## seed = 1234
The changeParameter()
function can be used to uniformly change these parameters across all of the selected methods. The example below changes the defaults of all the parameters named cls
from the default class
to day
.
p <- analysisParameters()
changeParameter(p,'cls') <- 'day'
p
## Parameters:
-## pre-treatment
-## QC
-## occupancyFilter
-## cls = day
-## QCidx = QC
-## occupancy = 2/3
-## impute
-## cls = day
-## QCidx = QC
-## occupancy = 2/3
-## parallel = variables
-## seed = 1234
-## RSDfilter
-## cls = day
-## QCidx = QC
-## RSDthresh = 50
-## removeQC
-## cls = day
-## QCidx = QC
-## occupancyFilter
-## maximum
-## cls = day
-## occupancy = 2/3
-## impute
-## class
-## cls = day
-## occupancy = 2/3
-## seed = 1234
-## transform
-## TICnorm
-##
-## modelling
-## randomForest
-## cls = day
-## rf = list()
-## reps = 1
-## binary = FALSE
-## comparisons = list()
-## perm = 0
-## returnModels = FALSE
-## seed = 1234
-##
-## correlations
-## method = pearson
-## pAdjustMethod = bonferroni
-## corPvalue = 0.05
+## Parameters:
+## pre-treatment
+## QC
+## occupancyFilter
+## cls = day
+## QCidx = QC
+## occupancy = 2/3
+## impute
+## cls = day
+## QCidx = QC
+## occupancy = 2/3
+## parallel = variables
+## seed = 1234
+## RSDfilter
+## cls = day
+## QCidx = QC
+## RSDthresh = 50
+## removeQC
+## cls = day
+## QCidx = QC
+## occupancyFilter
+## maximum
+## cls = day
+## occupancy = 2/3
+## impute
+## class
+## cls = day
+## occupancy = 2/3
+## seed = 1234
+## transform
+## TICnorm
+##
+## modelling
+## randomForest
+## cls = day
+## rf = list()
+## reps = 1
+## binary = FALSE
+## comparisons = list()
+## perm = 0
+## returnModels = FALSE
+## seed = 1234
+##
+## correlations
+## method = pearson
+## pAdjustMethod = bonferroni
+## corPvalue = 0.05
Alternatively the parameters of a specific analysis elements can be targeted using the elements
argument. The following will only alter the cls
parameter back to class
for the pre-treatment element parameters:
changeParameter(p,'cls',elements = 'pre-treatment') <- 'class'
Parameters can be extracted from the AnalysisParameters
class using the parameters()
function for a specified element.
parameters(p,'correlations')
## $method
-## [1] "pearson"
-##
-## $pAdjustMethod
-## [1] "bonferroni"
-##
-## $corPvalue
-## [1] 0.05
-Each analysis element has a function for returning default parameters for specific methods. These include preTreatmentParameters()
, modellingParameters()
and correlationParameters()
. Each returns a list of the default parameters for a specified methods as shown in the example for modellingParameters()
below.
## $method
+## [1] "pearson"
+##
+## $pAdjustMethod
+## [1] "bonferroni"
+##
+## $corPvalue
+## [1] 0.05
+Each analysis element has a function for returning default parameters for specific methods. These include preTreatmentParameters()
, modellingParameters()
and correlationParameters()
. Each returns a list of the default parameters for a specified methods as shown in the example for modellingParameters()
below.
-modellingParameters('anova')
## $anova
-## $anova$cls
-## [1] "class"
-##
-## $anova$pAdjust
-## [1] "bonferroni"
-##
-## $anova$comparisons
-## list()
-##
-## $anova$returnModels
-## [1] FALSE
+modellingParameters('anova')
## $anova
+## $anova$cls
+## [1] "class"
+##
+## $anova$pAdjust
+## [1] "bonferroni"
+##
+## $anova$comparisons
+## list()
+##
+## $anova$returnModels
+## [1] FALSE
Refer to the documentation (?
) of each function for sepecific usage details.
The parameters returned by these functions can be assigned to an AnalysisParameters
object, again using parameters()
’
-parameters(p,'pre-treatment') <- preTreatmentParameters(
- list(
+parameters(p,'pre-treatment') <- preTreatmentParameters(
+ list(
occupancyFilter = 'maximum',
transform = 'TICnorm'
)
)
Due to the relatively complex structure of the parameters needed for analyses containing many components, it is also possible to specify analysis parameters using the YAML file format. YAML parameter files (.yaml) can be parsed using the parseParameters()
function. The example below shows the YAML specification for the defaults returned by analysisParameters()
.
Due to the relatively complex structure of the parameters needed for analyses containing many components, it is also possible to specify analysis parameters using the YAML file format. YAML parameter files (.yaml) can be parsed using the parseParameters()
function. The example below shows the YAML specification for the defaults returned by analysisParameters()
.
pre-treatment:
QC:
occupancyFilter:
@@ -589,8 +585,8 @@
corPvalue: 0.05
This can be passed directly into an AnalysisParameters
object using the following:
-paramFile <- system.file('defaultParameters.yaml',package = 'metabolyseR')
-p <- parseParameters(paramFile)
paramFile <- system.file('defaultParameters.yaml',package = 'metabolyseR')
+p <- parseParameters(paramFile)
For more complex pre-treatment situations such as the following:
pre-treatment:
remove:
@@ -613,21 +609,21 @@
Existing AnalysisParameters
objects can also be exported to YAML format as shown below:
p <- analysisParameters()
-exportParameters(p,file = 'analysis_parameters.yaml')
+exportParameters(p,file = 'analysis_parameters.yaml')
The analysis is performed in a single step using the metabolyse()
function. This accepts the metabolomic data, the sample information and the analysis parameters.
The metabolomic data table of abundance values where the columns are the metabolome features and the rows are each sample observation. Similarly, the sample meta-information table should consist of the observations as rows and the meta information as columns. The order of the observation rows of the sample information table should be concordant with the rows in the metabolomics data table.
We can run an example analysis using the abr1
data set by first generating the default parameters for pre-treatment and modelling (random forest) analysis elements.
-p <- analysisParameters(c('pre-treatment','modelling'))
p <- analysisParameters(c('pre-treatment','modelling'))
Custom pre-treatment parameters can then be specified to only inlude occupancy filtering and total ion count normalisation.
-parameters(p,'pre-treatment') <- preTreatmentParameters(
- list(
+parameters(p,'pre-treatment') <- preTreatmentParameters(
+ list(
occupancyFilter = 'maximum',
transform = 'TICnorm')
)
Finally, the analysis can be run in a single step. Here only the fist 200 features of the negative ionisation mode data are specified to reduce the analysis time needed for this example.
analysis <- metabolyse(abr1$neg[,1:200],abr1$fact,p)
##
-## metabolyseR v0.14.6 Wed Nov 17 10:25:14 2021
-## ________________________________________________________________________________
-## Parameters:
-## pre-treatment
-## occupancyFilter
-## maximum
-## cls = day
-## occupancy = 2/3
-## transform
-## TICnorm
-##
-## modelling
-## randomForest
-## cls = day
-## rf = list()
-## reps = 1
-## binary = FALSE
-## comparisons = list()
-## perm = 0
-## returnModels = FALSE
-## seed = 1234
-## ________________________________________________________________________________
-## Pre-treatment …
+##
+## metabolyseR v0.14.7 Fri Dec 17 17:56:28 2021
+## ________________________________________________________________________________
+## Parameters:
+## pre-treatment
+## occupancyFilter
+## maximum
+## cls = day
+## occupancy = 2/3
+## transform
+## TICnorm
+##
+## modelling
+## randomForest
+## cls = day
+## rf = list()
+## reps = 1
+## binary = FALSE
+## comparisons = list()
+## perm = 0
+## returnModels = FALSE
+## seed = 1234
+## ________________________________________________________________________________
+##
[34mPre-treatment
[39m…
-Pre-treatment ✓ [0.8S]
-## Modelling …
-
-Modelling ✓ [3.5S]
+
[34mPre-treatment
[39m
[32m✓
[39m [0.8S]
+##
[34mModelling
[39m…
+
[34m
+Modelling
[39m
[32m✓
[39m [3.3S]
## ________________________________________________________________________________
##
-## Complete! [4.4S]
+##
[32mComplete!
[39m[4.1S]
Note: If a data pre-treatment step is not performed prior to modelling or correlation analysis, the raw data will automatically be used.
The analysis
object containing the analysis results can be printed to provide some basic information about the results of the analysis.
-print(analysis)
##
-## metabolyseR v0.14.6
-## Analysis:
-## Wed Nov 17 10:25:14 2021
-##
-## Raw Data:
-## No. samples = 120
-## No. features = 200
-##
-## Pre-treated Data:
-## Wed Nov 17 10:25:15 2021
-## No. samples = 120
-## No. features = 48
-##
-## Modelling:
-## Wed Nov 17 10:25:18 2021
-## Methods: randomForest
+print(analysis)
##
+## metabolyseR v0.14.7
+## Analysis:
+## Fri Dec 17 17:56:28 2021
+##
+## Raw Data:
+## No. samples = 120
+## No. features = 200
+##
+## Pre-treated Data:
+## Fri Dec 17 17:56:29 2021
+## No. samples = 120
+## No. features = 48
+##
+## Modelling:
+## Fri Dec 17 17:56:32 2021
+## Methods: randomForest
There are likely to be occasions where an analysis will need to be re-analysed using a new set of parameters. This can be achieved using the reAnalyse()
function.
In the example below we will run a correlation analysis in addition to the pre-treatment and modelling elements already performed.
Firstly, we can specify the correlation parameters:
@@ -702,119 +698,119 @@Then perform the re-analysis on our previously analysed Analysis
object, specifying the additional parameters.
analysis <- reAnalyse(analysis,parameters)
##
-## metabolyseR v0.14.6 Wed Nov 17 10:25:19 2021
-## ________________________________________________________________________________
-## Parameters:
-## correlations
-## method = pearson
-## pAdjustMethod = bonferroni
-## corPvalue = 0.05
-## ________________________________________________________________________________
-## Correlations …
-
-Correlations ✓ [0.1S]
-## ________________________________________________________________________________
-##
-## Complete! [0.1S]
+##
+## metabolyseR v0.14.7 Fri Dec 17 17:56:33 2021
+## ________________________________________________________________________________
+## Parameters:
+## correlations
+## method = pearson
+## pAdjustMethod = bonferroni
+## corPvalue = 0.05
+## ________________________________________________________________________________
+##
[34mCorrelations
[39m…
+
[34m
+Correlations
[39m
[32m✓
[39m [0.1S]
+## ________________________________________________________________________________
+##
+## Complete! [0.1S]
An overview of the results of the analysis (now including correlations) can then be printed.
-print(analysis)
##
-## metabolyseR v0.14.6
-## Analysis:
-## Wed Nov 17 10:25:14 2021
-##
-## Raw Data:
-## No. samples = 120
-## No. features = 200
-##
-## Pre-treated Data:
-## Wed Nov 17 10:25:15 2021
-## No. samples = 120
-## No. features = 48
-##
-## Modelling:
-## Wed Nov 17 10:25:18 2021
-## Methods: randomForest
-##
-## Correlations:
-## Wed Nov 17 10:25:19 2021
-## No. correlations = 140
+print(analysis)
##
+## metabolyseR v0.14.7
+## Analysis:
+## Fri Dec 17 17:56:28 2021
+##
+## Raw Data:
+## No. samples = 120
+## No. features = 200
+##
+## Pre-treated Data:
+## Fri Dec 17 17:56:29 2021
+## No. samples = 120
+## No. features = 48
+##
+## Modelling:
+## Fri Dec 17 17:56:32 2021
+## Methods: randomForest
+##
+## Correlations:
+## Fri Dec 17 17:56:33 2021
+## No. correlations = 140
An analysis performed by metabolyse()
returns an S4 object of class Analysis
. There are a number of ways of extracting analysis results from this object.
Similarly to the AnalysisData
class, the dat()
and sinfo()
functions can be used to extract the metabolomics data or sample information tables directly for either the raw
or pre-treated
data.
Similarly to the AnalysisData
class, the dat()
and sinfo()
functions can be used to extract the metabolomics data or sample information tables directly for either the raw
or pre-treated
data.
For example, to extract the pre-treated metabolomics data from our object analysis
:
-dat(analysis,type = 'pre-treated')
## # A tibble: 120 × 48
-## N113 N115 N117 N118 N119 N127 N128 N129 N130 N131
-## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
-## 1 0.00646 0 1.68e-4 0 1.60e-3 0.0323 2.65e-4 2.80e-4 0 0
-## 2 0.0113 7.74e-4 1.02e-3 0 1.43e-3 0.00856 0 3.95e-4 0 0
-## 3 0.00931 6.01e-4 2.70e-3 6.22e-5 5.58e-3 0 0 1.05e-4 0 6.51e-4
-## 4 0.00798 0 0 0 1.62e-4 0.00848 0 4.05e-4 0 1.28e-4
-## 5 0.0105 0 0 0 0 0.00658 0 1.97e-3 0 0
-## 6 0.00454 0 2.48e-4 3.25e-4 5.31e-4 0.00207 0 1.98e-4 0 0
-## 7 0.0117 0 1.14e-3 0 4.39e-4 0.00603 0 4.04e-4 0 0
-## 8 0.00787 2.36e-3 1.43e-3 1.52e-4 4.22e-3 0.00290 2.78e-4 5.76e-5 0 0
-## 9 0.00136 1.87e-4 8.17e-4 1.87e-4 0 0.0610 1.31e-4 5.23e-4 0 0
-## 10 0.00899 4.26e-4 2.06e-3 0 8.36e-4 0.00106 7.72e-4 0 0 0
-## # … with 110 more rows, and 38 more variables: N132 <dbl>, N133 <dbl>,
-## # N134 <dbl>, N135 <dbl>, N136 <dbl>, N137 <dbl>, N139 <dbl>, N143 <dbl>,
-## # N145 <dbl>, N146 <dbl>, N147 <dbl>, N149 <dbl>, N153 <dbl>, N155 <dbl>,
-## # N157 <dbl>, N161 <dbl>, N163 <dbl>, N164 <dbl>, N165 <dbl>, N168 <dbl>,
-## # N169 <dbl>, N170 <dbl>, N171 <dbl>, N173 <dbl>, N174 <dbl>, N175 <dbl>,
-## # N179 <dbl>, N180 <dbl>, N181 <dbl>, N183 <dbl>, N187 <dbl>, N191 <dbl>,
-## # N192 <dbl>, N193 <dbl>, N195 <dbl>, N196 <dbl>, N197 <dbl>, N198 <dbl>
+dat(analysis,type = 'pre-treated')
## # A tibble: 120 × 48
+## N113 N115 N117 N118 N119 N127 N128 N129 N130 N131
+## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
+## 1 0.00646 0 1.68e-4 0 1.60e-3 0.0323 2.65e-4 2.80e-4 0 0
+## 2 0.0113 7.74e-4 1.02e-3 0 1.43e-3 0.00856 0 3.95e-4 0 0
+## 3 0.00931 6.01e-4 2.70e-3 6.22e-5 5.58e-3 0 0 1.05e-4 0 6.51e-4
+## 4 0.00798 0 0 0 1.62e-4 0.00848 0 4.05e-4 0 1.28e-4
+## 5 0.0105 0 0 0 0 0.00658 0 1.97e-3 0 0
+## 6 0.00454 0 2.48e-4 3.25e-4 5.31e-4 0.00207 0 1.98e-4 0 0
+## 7 0.0117 0 1.14e-3 0 4.39e-4 0.00603 0 4.04e-4 0 0
+## 8 0.00787 2.36e-3 1.43e-3 1.52e-4 4.22e-3 0.00290 2.78e-4 5.76e-5 0 0
+## 9 0.00136 1.87e-4 8.17e-4 1.87e-4 0 0.0610 1.31e-4 5.23e-4 0 0
+## 10 0.00899 4.26e-4 2.06e-3 0 8.36e-4 0.00106 7.72e-4 0 0 0
+## # … with 110 more rows, and 38 more variables: N132 <dbl>, N133 <dbl>,
+## # N134 <dbl>, N135 <dbl>, N136 <dbl>, N137 <dbl>, N139 <dbl>, N143 <dbl>,
+## # N145 <dbl>, N146 <dbl>, N147 <dbl>, N149 <dbl>, N153 <dbl>, N155 <dbl>,
+## # N157 <dbl>, N161 <dbl>, N163 <dbl>, N164 <dbl>, N165 <dbl>, N168 <dbl>,
+## # N169 <dbl>, N170 <dbl>, N171 <dbl>, N173 <dbl>, N174 <dbl>, N175 <dbl>,
+## # N179 <dbl>, N180 <dbl>, N181 <dbl>, N183 <dbl>, N187 <dbl>, N191 <dbl>,
+## # N192 <dbl>, N193 <dbl>, N195 <dbl>, N196 <dbl>, N197 <dbl>, N198 <dbl>
Or to extract the raw sample information:
-sinfo(analysis,type = 'raw')
## # A tibble: 120 × 9
-## injorder pathcdf filecdf name.org remark name rep day class
-## <int> <fct> <fct> <fct> <fct> <fct> <int> <fct> <int>
-## 1 1 C:/Xcalibur/ANDI-LT… 01.cdf 12_2 ok 12_2 2 2 2
-## 2 2 C:/Xcalibur/ANDI-LT… 02.cdf 13_3 ok 13_4 3 3 3
-## 3 3 C:/Xcalibur/ANDI-LT… 03.cdf 15_4 ok 15_5 5 4 4
-## 4 4 C:/Xcalibur/ANDI-LT… 04.cdf 12_1 ok 12_2 2 1 1
-## 5 5 C:/Xcalibur/ANDI-LT… 05.cdf 12_2 ok 12_2 2 2 2
-## 6 6 C:/Xcalibur/ANDI-LT… 06.cdf 11_1 ok 11_2 1 1 1
-## 7 7 C:/Xcalibur/ANDI-LT… 07.cdf 14_2 ok 14_3 4 2 2
-## 8 8 C:/Xcalibur/ANDI-LT… 08.cdf 11_4 ok 11_5 1 4 4
-## 9 9 C:/Xcalibur/ANDI-LT… 09.cdf 13_H ok 13_H 3 H 6
-## 10 10 C:/Xcalibur/ANDI-LT… 10.cdf 15_H ok 15_H 5 H 6
-## # … with 110 more rows
+sinfo(analysis,type = 'raw')
## # A tibble: 120 × 9
+## injorder pathcdf filecdf name.org remark name rep day class
+## <int> <fct> <fct> <fct> <fct> <fct> <int> <fct> <int>
+## 1 1 C:/Xcalibur/ANDI-LT… 01.cdf 12_2 ok 12_2 2 2 2
+## 2 2 C:/Xcalibur/ANDI-LT… 02.cdf 13_3 ok 13_4 3 3 3
+## 3 3 C:/Xcalibur/ANDI-LT… 03.cdf 15_4 ok 15_5 5 4 4
+## 4 4 C:/Xcalibur/ANDI-LT… 04.cdf 12_1 ok 12_2 2 1 1
+## 5 5 C:/Xcalibur/ANDI-LT… 05.cdf 12_2 ok 12_2 2 2 2
+## 6 6 C:/Xcalibur/ANDI-LT… 06.cdf 11_1 ok 11_2 1 1 1
+## 7 7 C:/Xcalibur/ANDI-LT… 07.cdf 14_2 ok 14_3 4 2 2
+## 8 8 C:/Xcalibur/ANDI-LT… 08.cdf 11_4 ok 11_5 1 4 4
+## 9 9 C:/Xcalibur/ANDI-LT… 09.cdf 13_H ok 13_H 3 H 6
+## 10 10 C:/Xcalibur/ANDI-LT… 10.cdf 15_H ok 15_H 5 H 6
+## # … with 110 more rows
Alternatively the raw
or preTreated
functions can be used to extract the AnalysisData
class objects containing both the metabolomics data and sample information for the raw and pre-treated data respectively.
-raw(analysis)
##
-## AnalysisData object containing:
-##
-## Samples: 120
-## Features: 200
-## Info: 9
+raw(analysis)
##
+## AnalysisData object containing:
+##
+## Samples: 120
+## Features: 200
+## Info: 9
-preTreated(analysis)
##
-## AnalysisData object containing:
-##
-## Samples: 120
-## Features: 48
-## Info: 9
+preTreated(analysis)
+##
+## AnalysisData object containing:
+##
+## Samples: 120
+## Features: 48
+## Info: 9
Lastly the analysisResults
function can be used to extract the results of any of the analysis elements. The following will extract the modelling results:
-analysisResults(analysis,element = 'modelling')
## $randomForest
-##
-## Random forest classification
-##
-## Samples: 120
-## Features: 48
-## Response: day
-## # comparisons: 1
+analysisResults(analysis,element = 'modelling')
+## $randomForest
+##
+## Random forest classification
+##
+## Samples: 120
+## Features: 48
+## Response: day
+## # comparisons: 1
@@ -830,11 +826,13 @@ Developed by Jasen Finch.
+ +Developed by Jasen Finch.
Developed by Jasen Finch.
+ +Developed by Jasen Finch.