Merge pull request #84 from jasenfinch/devel

v0.14.7
jasenfinch · Dec 17, 2021 · 2b6fe7b · 2b6fe7b
2 parents 9768f74 + aa5b886
commit 2b6fe7b
Show file tree

Hide file tree

Showing 104 changed files with 5,942 additions and 10,052 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: metabolyseR
 Title: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data
-Version: 0.14.6
+Version: 0.14.7
 Authors@R: person("Jasen", "Finch", email = "jsf9@aber.ac.uk", role = c("aut", "cre"))
 Description: A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data.
 URL: https://jasenfinch.github.io/metabolyseR

diff --git a/NAMESPACE b/NAMESPACE
@@ -190,6 +190,7 @@ importFrom(magrittr,set_colnames)
 importFrom(magrittr,set_names)
 importFrom(magrittr,set_rownames)
 importFrom(methods,"slot<-")
+importFrom(methods,as)
 importFrom(methods,new)
 importFrom(methods,show)
 importFrom(methods,slot)

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,10 @@
+# metabolyseR 0.14.7
+
+* Single replicate classes now automatically removed by [`plotLDA()`](https://jasenfinch.github.io/metabolyseR/reference/plotLDA.html).
+
 # metabolyseR 0.14.6
 
-* [`plotExplanatoryHeatmap`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html) method for the [`Analysis`](https://jasenfinch.github.io/metabolyseR/reference/Analysis-class.html) class now returns the plot only if the number of plots is equal to 1. 
+* [`plotExplanatoryHeatmap()`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html) method for the [`Analysis`](https://jasenfinch.github.io/metabolyseR/reference/Analysis-class.html) class now returns the plot only if the number of plots is equal to 1. 
 
 * Removed reference to the `nCores` parameter from the documentation example of [`metabolyse()`](https://jasenfinch.github.io/metabolyseR/reference/metabolyse.html).
 

diff --git a/R/allClasses.R b/R/allClasses.R
@@ -115,3 +115,22 @@ setClass('Univariate',
            models = 'list',
            results = 'tbl_df'
          ))
+
+setClass('LDA',
+         contains = 'AnalysisData',
+         slots = list(
+           stats = 'tbl_df',
+           Tw = 'numeric',
+           rankmat = 'numeric',
+           means = 'numeric',
+           loadings = 'tbl_df',
+           x = 'tbl_df',
+           xmeans = 'tbl_df',
+           pred = 'factor',
+           cl = 'factor',
+           prior = 'numeric',
+           conf = 'table',
+           acc = 'numeric',
+           lev = 'character',
+           call = 'call'
+         ))
diff --git a/R/nlda.R b/R/nlda.R
@@ -1,141 +1,156 @@
-## FIEmspro https://github.com/aberHRML/FIEmspro nlda functionality
+## Based on FIEmspro https://github.com/aberHRML/FIEmspro nlda functionality
 
-#'@importFrom e1071 naiveBayes
-#'@importFrom stats cov predict 
+setGeneric('nlda',function(x,cls = 'class',prior = NULL,scale = FALSE,comprank = FALSE,...)
+  standardGeneric('nlda'))
 
-`nlda.default` <-
-  function(dat,cl, prior=NULL,scale=FALSE,comprank = FALSE,...) 
-  {
-    if (missing(dat) || missing(cl)) 
-      stop("data set or class are missing")
-    dat <- as.matrix(dat)
-
-    cl  <- as.factor(cl)
-    if (any(table(cl) == 0)) stop("Can't have empty classes in cl.")
-
-    if (nrow(dat) != length(cl)) stop("dat and cl don't match.")
-    if (length(unique(cl)) < 2) 
-      stop("Classification needs at least two classes.")
-    if (any(is.na(dat)) || any(is.na(cl))) 
-      stop("NA is not permitted in data set or class labels.")
-
-    if(is.null(prior)){prior <- as.vector(table(cl)/length(cl))}
-    if(is.null(names(prior))){names(prior) <- levels(cl)}
-
-    pc  <- prcomp(dat,scale=scale)     
-
-    rankmat <- max(1,ncol(pc$x)-1)
-
-    if(comprank == TRUE)
-      rankmat <- qr(cov(dat)*(dim(dat)[1]-1))$rank
-    score <- pc$x[,1:rankmat,drop=FALSE]  
-
-    g  <- nlevels(cl)
-    mx <- apply(score,2,mean)
-    t  <- matrix(0,nrow = rankmat,ncol=rankmat)
-    W  <- matrix(0,nrow = rankmat,ncol=rankmat)
-    for(j in 1:g){
-      idx <- which(cl==levels(cl)[j])
-      L   <- length(idx)
-      K   <- score[idx,,drop=FALSE]         
-      zz  <- apply(K,2,mean)
-      A   <- K - t(matrix(rep(mx, L),length(mx),L))
-      C   <- K - t(matrix(rep(zz, L),length(zz),L))
-      t   <- t + t(A)%*%A
-      W   <- W + t(C)%*%C
-    }
-    B <- t-W
-
-    Ng    <- nrow(score)-g
-    P     <- W/(Ng)
-    eP    <- eigen(P)
-    ord   <- sort.list(eP$values)
-    V     <- sweep(
-      eP$vectors[,ord,drop=FALSE],
-      2, 
-      sqrt(colSums(eP$vectors[,ord,drop=FALSE]^2)), 
-      "/") 
-    Dg    <- eP$values[ord]
-    nDg   <- length(Dg)
-    Dmean <- sum(diag(P))/nDg
-    Dn    <- matrix(0,nDg,nDg)
-    for(i in 1:nDg)
-      Dn[i,i] <- max(c(Dg[i],Dmean))
-
-    Wn    <- V%*%Dn%*%t(V)*Ng
-    ratio <- solve(Wn)%*%B
-    er    <- eigen(ratio)
-    ev    <- Re(er$values)  
-    ev[Im(er$values)>0] <- 0
-    vec   <- Re(er$vectors)
-    ord   <- sort.list(ev,decreasing=TRUE)
-    vec   <- sweep(
-      vec[,ord,drop=FALSE],
-      2, 
-      sqrt(colSums(vec[,ord,drop=FALSE]^2)),
-      "/") 
-    ev    <- ev[ord]
-    maxg  <- min(c(g-1,dim(vec)[1]))
-    vec   <- vec[,1:maxg] ## discriminant functions
-    Tw    <- ev[1:maxg]
-    names(Tw) <- paste("DF", 1:maxg, sep = "")
-
-    ## get stats here
-    flip   <- function(x) x[rev(seq_along(x))]
-    n      <- dim(dat)[1]
-    st     <- matrix(0,length(Tw),3)
-    st[,1] <- round(Tw,3)
-    st[,2] <- round(Tw*100/sum(Tw),3)
-    st[,3] <- round(sqrt(Tw/(1+Tw)),3)
-    st     <- as.data.frame(st)
-    dimnames(st) <- list(
-      paste("DF", 1:maxg, sep = ""),
-      c("Eig","Perceig","Cancor"))
-
-    res <- list()
-    res$stats    <- st
-    res$Tw       <- Tw
-    res$rankmat  <- rankmat
-    res$means    <- pc$center                        
-    res$loadings <- pc$rotation[,1:rankmat,drop=FALSE] %*% 
-      vec  ## discriminant functions with PCA
-
-    colnames(res$loadings) <- paste("DF", 1:maxg, sep = "")  
-
-    ## rotated data (projection)
-    x <- sweep(dat, 2, res$means) %*% res$loadings
-
-    ## group means based on the rotated data
-    xmeans <- tapply(x, list(rep(cl,ncol(x)),col(x)), mean)
-    dimnames(xmeans)[[2]] <- colnames(x)
-
-    #if(type==1){
-    #  mdist=as.matrix(dist(rbind(xmeans,x)))
-    #  mdist=mdist[1:g,(g+1):ncol(mdist)]
-    #  prob= (1-t(sweep(mdist,2,apply(mdist,2,sum),"/")))/(g-1)
-    #  pred = apply(prob,1,which.max)
-    #  pred <- factor(dimnames(prob)[[2]][pred], levels = levels(cl))
-    #}
-    #else{
-    nbmod <- naiveBayes(data.frame(x),cl)
-    prob <- predict(nbmod,data.frame(x),type="raw")
-    pred <- apply(prob,1,which.max)
-    pred <- factor(levels(cl)[pred], levels = levels(cl))
-    #}
-    res$x      <- x
-    res$xmeans <- xmeans
-    res$pred   <- pred
-    res$cl     <- cl
-    res$prior  <- prior
-    res$conf   <- table(cl,pred)
-    res$acc    <- round(sum(diag(res$conf))*100/nrow(dat),2)
-    res$lev    <- levels(cl)     
-    res$call   <- match.call()
-    res$call[[1]]  <- as.name("nlda")
-    class(res) <- "nlda"     
-
-    return(res)
-  }
+#' @importFrom e1071 naiveBayes
+#' @importFrom stats cov predict 
+#' @importFrom methods as
 
-
-nlda <- function (dat, ...) UseMethod ("nlda")
+setMethod('nlda',signature = 'AnalysisData',
+          function(x,cls = 'class',prior=NULL,scale=FALSE,comprank = FALSE,...) {
+
+            cl <- x %>% 
+              clsExtract(cls) 
+
+            if (is.numeric(cl))
+              stop('Classes should not be numeric',call. = FALSE)
+
+            cl <- factor(cl,levels = unique(cl))  
+
+            if (any(table(cl) < 2)) {
+              remove_classes <- cl %>% 
+                table() %>%
+                names() %>% 
+                {.[table(cl) < 2]}
+
+              x <- x %>%
+                removeClasses(cls = cls,
+                              classes = remove_classes)
+
+              warning(str_c('Classes with a single replicate removed: ',
+                            str_c(str_c('"',
+                                        remove_classes,
+                                        '"'),
+                                  collapse = ', ')),
+                      call. = FALSE)
+
+              cl <- x %>% 
+                clsExtract(cls) %>% 
+                factor(levels = unique(.))
+            }
+
+            if (length(unique(cl)) < 2) 
+              stop('More than 1 class needed for PC-LDA.',call. = FALSE)
+
+            d <- x %>% 
+              dat() %>% 
+              as.matrix()
+
+            if(is.null(prior)){prior <- as.vector(table(cl)/length(cl))}
+            if(is.null(names(prior))){names(prior) <- levels(cl)}
+
+            pc  <- prcomp(d,scale=scale)     
+
+            rankmat <- max(1,ncol(pc$x)-1)
+
+            if(comprank == TRUE)
+              rankmat <- qr(cov(d)*(dim(d)[1]-1))$rank
+            score <- pc$x[,1:rankmat,drop=FALSE]  
+
+            g  <- nlevels(cl)
+            mx <- apply(score,2,mean)
+            t  <- matrix(0,nrow = rankmat,ncol=rankmat)
+            W  <- matrix(0,nrow = rankmat,ncol=rankmat)
+            for(j in 1:g){
+              idx <- which(cl==levels(cl)[j])
+              L   <- length(idx)
+              K   <- score[idx,,drop=FALSE]         
+              zz  <- apply(K,2,mean)
+              A   <- K - t(matrix(rep(mx, L),length(mx),L))
+              C   <- K - t(matrix(rep(zz, L),length(zz),L))
+              t   <- t + t(A)%*%A
+              W   <- W + t(C)%*%C
+            }
+            B <- t-W
+
+            Ng    <- nrow(score)-g
+            P     <- W/(Ng)
+            eP    <- eigen(P)
+            ord   <- sort.list(eP$values)
+            V     <- sweep(
+              eP$vectors[,ord,drop=FALSE],
+              2, 
+              sqrt(colSums(eP$vectors[,ord,drop=FALSE]^2)), 
+              "/") 
+            Dg    <- eP$values[ord]
+            nDg   <- length(Dg)
+            Dmean <- sum(diag(P))/nDg
+            Dn    <- matrix(0,nDg,nDg)
+            for(i in 1:nDg)
+              Dn[i,i] <- max(c(Dg[i],Dmean))
+
+            Wn    <- V%*%Dn%*%t(V)*Ng
+            ratio <- solve(Wn)%*%B
+            er    <- eigen(ratio)
+            ev    <- Re(er$values)  
+            ev[Im(er$values)>0] <- 0
+            vec   <- Re(er$vectors)
+            ord   <- sort.list(ev,decreasing=TRUE)
+            vec   <- sweep(
+              vec[,ord,drop=FALSE],
+              2, 
+              sqrt(colSums(vec[,ord,drop=FALSE]^2)),
+              "/") 
+            ev    <- ev[ord]
+            maxg  <- min(c(g-1,dim(vec)[1]))
+            vec   <- vec[,1:maxg] ## discriminant functions
+            Tw    <- ev[1:maxg]
+            names(Tw) <- paste("DF", 1:maxg, sep = "")
+
+            ## get stats here
+            flip   <- function(x) x[rev(seq_along(x))]
+            n      <- dim(d)[1]
+            st     <- matrix(0,length(Tw),3)
+            st[,1] <- round(Tw,3)
+            st[,2] <- round(Tw*100/sum(Tw),3)
+            st[,3] <- round(sqrt(Tw/(1+Tw)),3)
+            st     <- as.data.frame(st)
+            dimnames(st) <- list(
+              paste("DF", 1:maxg, sep = ""),
+              c("Eig","Perceig","Cancor"))
+
+            res <- as(x,'LDA')
+            res@stats    <- as_tibble(st)
+            res@Tw       <- Tw
+            res@rankmat  <- rankmat
+            res@means    <- pc$center                        
+
+            loadings <- pc$rotation[,1:rankmat,drop=FALSE] %*% vec  
+            colnames(loadings) <- paste("DF", 1:maxg, sep = "")  
+            x <- sweep(d, 2, res@means) %*% loadings
+
+            ## group means based on the rotated data
+            xmeans <- tapply(x, list(rep(cl,ncol(x)),col(x)), mean)
+            dimnames(xmeans)[[2]] <- colnames(x)
+
+            nbmod <- naiveBayes(data.frame(x),cl)
+            prob <- predict(nbmod,data.frame(x),type="raw")
+            pred <- apply(prob,1,which.max)
+            pred <- factor(levels(cl)[pred], levels = levels(cl))
+
+            res@loadings <- as_tibble(loadings)
+            res@x      <- as_tibble(x)
+            res@xmeans <- as_tibble(xmeans)
+            res@pred   <- pred
+            res@cl     <- cl
+            res@prior  <- prior
+            res@conf   <- table(cl,pred)
+            res@acc    <- round(sum(diag(res@conf))*100/nrow(d),2)
+            res@lev    <- levels(cl)     
+            res@call   <- match.call()
+            res@call[[1]]  <- as.name("nlda")
+
+            return(res)
+          }
+)
diff --git a/R/plotLDA.R b/R/plotLDA.R
@@ -63,24 +63,16 @@ setMethod('plotLDA',
                    legendPosition = 'bottom', 
                    labelSize = 2){
 
-            classLength <- clsLen(analysis,cls)
+            lda <- nlda(analysis,cls = cls,scale = scale,center = center)
 
-            if (classLength < 2) {
-              stop('More than 1 class needed for PC-LDA.',call. = FALSE)
-            }
-
-            info <- analysis %>%
-              clsExtract(cls) %>%
-              factor()
-
-            lda <- nlda(dat(analysis),cl = info,scale = scale,center = center)
-
-            tw <- lda$Tw %>%
+            tw <- lda@Tw %>%
               round(2)
 
-            lda <- lda$x %>%
+            classLength <- clsLen(lda,cls = cls)
+
+            lda <- lda@x %>%
               as_tibble() %>%
-              mutate(!!cls := info)
+              mutate(!!cls := lda@cl)
 
             if (classLength > 2) {
               lda <- lda %>%
@@ -92,8 +84,6 @@ setMethod('plotLDA',
                               select(all_of(label)))
               }
 
-              classLength <- clsLen(analysis,cls)
-
               pl <- scatterPlot(lda,
                                 cls,
                                 xAxis,