Merge 9226b99 into 1e5a325

PoisotLab · Oct 1, 2020 · dcb52fd · dcb52fd
2 parents 1e5a325 + 9226b99
commit dcb52fd
Show file tree

Hide file tree

Showing 59 changed files with 1,725 additions and 191 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,22 +1,23 @@
 Package: paco
-Version: 0.3.1
-Date: 2016-11-28
+Version: 0.4.2
+Date: 2020-08-19
 Title: Procrustes Application to Cophylogenetic Analysis
 Description: Procrustes analyses to infer co-phylogenetic
-    matching between pairs of (ultrametric) phylogenetic trees.
+    matching between pairs of phylogenetic trees.
 Author: Juan Antonio Balbuena <j.a.balbuena@uv.es>, Timothee Poisot
     <tim@poisotlab.io>, Matthew Hutchinson <matthewhutchinson15@gmail.com>,
-    Fernando Cagua <fernando@cagua.co>
+    Fernando Cagua <fernando@cagua.co>; see PLoS ONE Balbuena et al 2013 <https://doi.org/10.1371/journal.pone.0061048>
 Maintainer: Matthew Hutchinson <matthewhutchinson15@gmail.com>
 Depends:
     R (>= 3.0.0)
 Imports:
     vegan (>= 2.2-0),
+    ape,
     plyr
 Suggests:
-    ape,
-    doMC,
     testthat
+Note: The current version (0.4.2) fixes a numerical issue with symmetric implementation of the paco_links function.
 License: MIT + file LICENSE
-URL: http://www.uv.es/cophylpaco/
-RoxygenNote: 5.0.1
+URL: https://www.uv.es/cophylpaco/
+Encoding: UTF-8
+RoxygenNote: 7.1.1
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-ALL: paco.tar.gz
+ALL: paco_0.4.2.tar.gz
 
 doc: R/*.r
 	R -e "library(devtools); document('.')"
@@ -16,9 +16,11 @@ test: cran/paco doc
 
 paco.tar.gz:
 	rm $@ 2>/dev/null; true
-	cd cran; tar -zcvf paco.tar.gz paco
-	mv cran/paco.tar.gz $@
+	cd cran; R CMD build paco
+	mv paco_0.4.2.tar.gz ../
+	cd ../
+	R CMD check --as-cran paco_0.4.2.tar.gz
 
 clean:
-	rm paco.tar.gz
+	rm paco_0.4.2.tar.gz
 	rm -r cran/paco
diff --git a/R/add_pcoord.r b/R/add_pcoord.r
@@ -3,8 +3,8 @@
 #' Translates the distance matrices of 'host' and 'parasite' phylogenies into Principal Coordinates, as needed for Procrustes superimposition.
 #' @param D A list with objects H, P, and HP, as returned by \code{paco::prepare_paco_data}.
 #' @param correction In some cases, phylogenetic distance matrices are non-Euclidean which generates negative eigenvalues when those matrices are translated into Principal Coordinates. There are several methods to correct negative eigenvalues. Correction options available here are "cailliez", "lingoes", and "none". The "cailliez" and "lingoes" corrections add a constant to the eigenvalues to make them non-negative. Default is "none". 
-#' @return The list that was input as the argument `D' with two new elements; the Principal Coordinates of the `host' distance matrix and the Principal Coordinates of the `parasite' distance matrix.
-#' @note To find the Principal Coordinates of each distance matrix, we internally use a function, \code{coordpcoa}, that is a modified version of \code{ape::pcoa}, using \code{vegan::eigenvals}.
+#' @return The list that was input as the argument `D' with four new elements; the Principal Coordinates of the `host' distance matrix and the Principal Coordinates of the `parasite' distance matrix, as well as, a `correction' object stating the correction used for negative eigenvalues and a `note' object stating whether or not negative eigenvalues were present and therefore corrected.
+#' @note To find the Principal Coordinates of each distance matrix, we internally a modified version of the function \code{ape::pcoa} that uses \code{vegan::eigenvals} and zapsmall
 #' @export
 #' @examples
 #' data(gopherlice)
@@ -16,10 +16,34 @@
 add_pcoord <- function(D, correction='none')
 {
 HP_bin <- which(D$HP >0, arr.ind=TRUE)
-H_PCo <- coordpcoa(D$H, correction)$vectors #Performs PCo of Host distances
-P_PCo <- coordpcoa(D$P, correction)$vectors #Performs PCo of Parasite distances
-D$H_PCo <- H_PCo[HP_bin[,1],] #Adjust Host PCo vectors
-D$P_PCo <- P_PCo[HP_bin[,2],] #Adjust Parasite PCo vectors
+if(correction == 'none'){
+	H_PCo <- coordpcoa(D$H, correction) #Performs PCo of Host distances
+	P_PCo <- coordpcoa(D$P, correction) #Performs PCo of Parasite distances
+	H_PCoVec <- H_PCo$vectors
+	P_PCoVec <- P_PCo$vectors
+	D$H_PCo <- H_PCoVec[HP_bin[,1],] #Adjust Host PCo vectors
+	D$P_PCo <- P_PCoVec[HP_bin[,2],] #Adjust Parasite PCo vectors
+}else{
+	H_PCo <- coordpcoa(D$H, correction)
+	P_PCo <- coordpcoa(D$P, correction)
+	if(H_PCo$note == "There were no negative eigenvalues. No correction was applied"){
+		H_PCoVec <- H_PCo$vectors
+		D$H_PCo <- H_PCoVec[HP_bin[,1],] #Adjust Host PCo vectors
+	}else{
+		H_PCoVec <- H_PCo$vectors.cor
+		D$H_PCo <- H_PCoVec[HP_bin[,1],] #Adjust Host PCo vectors
+	}
+
+	if(P_PCo$note == "There were no negative eigenvalues. No correction was applied"){
+		P_PCoVec <- P_PCo$vectors
+		D$P_PCo <- P_PCoVec[HP_bin[,2],] #Adjust Host PCo vectors
+	}else{
+		P_PCoVec <- P_PCo$vectors.cor
+		D$P_PCo <- P_PCoVec[HP_bin[,2],] #Adjust Host PCo vectors
+	}
+
+}
 D$correction <- correction
+D$note <- list(H_note=H_PCo$note, P_note=P_PCo$note)
 return(D)
 }
diff --git a/R/coordpcoa.r b/R/coordpcoa.r
@@ -2,8 +2,8 @@
 # D A list with objects H, P, and HP, returned by prepare_paco_data
 #  correction The correction to apply (none, lingoes, or cailliez)
 #  rn rownames (optional)
-# Internal function coordpcoa is a modified version of ape::pcoa, utilising vegan::eigenvals
-coordpcoa <-function (D, correction = "none", rn = NULL)
+# Internal function coordpcoa is a modified version of ape::pcoa, utilising vegan::eigenvals and zapsmall
+coordpcoa <- function (D, correction = "none", rn = NULL) 
 {
     centre <- function(D, n) {
         One <- matrix(1, n, n)
@@ -20,55 +20,57 @@ coordpcoa <-function (D, correction = "none", rn = NULL)
     epsilon <- sqrt(.Machine$double.eps)
     if (length(rn) != 0) {
         names <- rn
-    }
-    else {
+    }else {
         names <- rownames(D)
     }
     CORRECTIONS <- c("none", "lingoes", "cailliez")
     correct <- pmatch(correction, CORRECTIONS)
-    if (is.na(correct)) stop("Invalid correction method")
+    if (is.na(correct)) 
+        stop("Invalid correction method")
     delta1 <- centre((-0.5 * D^2), n)
     trace <- sum(diag(delta1))
     D.eig <- eigen(delta1)
+    D.eig$values <- as.numeric(zapsmall(vegan::eigenvals(D.eig)))
     min.eig <- min(D.eig$values)
-    D.eig$values <- vegan::eigenvals(D.eig)
-    zero.eig <- which(D.eig$values < epsilon)
+    zero.eig <- which(abs(D.eig$values) < epsilon)
+    D.eig$values[zero.eig] <- 0
     if (min.eig > -epsilon) {
         correct <- 1
         eig <- D.eig$values
         k <- length(which(eig > epsilon))
         rel.eig <- eig[1:k]/trace
         cum.eig <- cumsum(rel.eig)
-        vectors <- sweep(D.eig$vectors[, 1:k], 2, sqrt(eig[1:k]), FUN = "*")
+        vectors <- sweep(D.eig$vectors[, 1:k], 2, sqrt(eig[1:k]), 
+            FUN = "*")
         bs <- bstick.def(k)
         cum.bs <- cumsum(bs)
         res <- data.frame(eig[1:k], rel.eig, bs, cum.eig, cum.bs)
-        colnames(res) <- c("Eigenvalues", "Relative_eig", "Broken_stick",
+        colnames(res) <- c("Eigenvalues", "Relative_eig", "Broken_stick", 
             "Cumul_eig", "Cumul_br_stick")
         rownames(res) <- 1:nrow(res)
         rownames(vectors) <- names
-        colnames(vectors) <- colnames(vectors, do.NULL = FALSE,
+        colnames(vectors) <- colnames(vectors, do.NULL = FALSE, 
             prefix = "Axis.")
         note <- paste("There were no negative eigenvalues. No correction was applied")
-        out <- (list(correction = c(correction, correct), note = note,
+        out <- (list(correction = c(correction, correct), note = note, 
             values = res, vectors = vectors, trace = trace))
     }
     else {
         k <- n
         eig <- D.eig$values
         rel.eig <- eig/trace
         rel.eig.cor <- (eig - min.eig)/(trace - (n - 1) * min.eig)
-        rel.eig.cor = c(rel.eig.cor[1:(zero.eig[1] - 1)], rel.eig.cor[(zero.eig[1] +
+        rel.eig.cor = c(rel.eig.cor[1:(zero.eig[1] - 1)], rel.eig.cor[(zero.eig[1] + 
             1):n], 0)
         cum.eig.cor <- cumsum(rel.eig.cor)
         k2 <- length(which(eig > epsilon))
         k3 <- length(which(rel.eig.cor > epsilon))
-        vectors <- sweep(D.eig$vectors[, 1:k2], 2, sqrt(eig[1:k2]),
+        vectors <- sweep(D.eig$vectors[, 1:k2], 2, sqrt(eig[1:k2]), 
             FUN = "*")
         if ((correct == 2) | (correct == 3)) {
             if (correct == 2) {
                 c1 <- -min.eig
-                note <- paste("Lingoes correction applied to negative eigenvalues: D' = -0.5*D^2 -",
+                note <- paste("Lingoes correction applied to negative eigenvalues: D' = -0.5*D^2 -", 
                   c1, ", except diagonal elements")
                 D <- -0.5 * (D^2 + 2 * c1)
             }
@@ -77,69 +79,80 @@ coordpcoa <-function (D, correction = "none", rn = NULL)
                 upper <- cbind(matrix(0, n, n), 2 * delta1)
                 lower <- cbind(-diag(n), -4 * delta2)
                 sp.matrix <- rbind(upper, lower)
-                c2 <- max(Re(eigen(sp.matrix, symmetric = FALSE,
+                c2 <- max(Re(eigen(sp.matrix, symmetric = FALSE, 
                   only.values = TRUE)$values))
-                note <- paste("Cailliez correction applied to negative eigenvalues: D' = -0.5*(D +", c2, ")^2, except diagonal elements")
+                note <- paste("Cailliez correction applied to negative eigenvalues: D' = -0.5*(D +", 
+                  c2, ")^2, except diagonal elements")
                 D <- -0.5 * (D + c2)^2
             }
             diag(D) <- 0
             mat.cor <- centre(D, n)
             toto.cor <- eigen(mat.cor)
+            toto.cor$values <- as.numeric(zapsmall(vegan::eigenvals(toto.cor)))
             trace.cor <- sum(diag(mat.cor))
             min.eig.cor <- min(toto.cor$values)
-            toto.cor$values <- vegan::eigenvals(toto.cor)
-            zero.eig.cor <- which((toto.cor$values < epsilon) & (toto.cor$values > -epsilon))
-
+            zero.eig.cor <- which((toto.cor$values < epsilon) & 
+                (toto.cor$values > -epsilon))
+            toto.cor$values[zero.eig.cor] <- 0
             if (min.eig.cor > -epsilon) {
                 eig.cor <- toto.cor$values
                 rel.eig.cor <- eig.cor[1:k]/trace.cor
                 cum.eig.cor <- cumsum(rel.eig.cor)
                 k2 <- length(which(eig.cor > epsilon))
-                vectors.cor <- sweep(toto.cor$vectors[, 1:k2],
+                vectors.cor <- sweep(toto.cor$vectors[, 1:k2], 
                   2, sqrt(eig.cor[1:k2]), FUN = "*")
+                rownames(vectors.cor) <- names
+                colnames(vectors.cor) <- colnames(vectors.cor, 
+                  do.NULL = FALSE, prefix = "Axis.")
                 bs <- bstick.def(k2)
                 bs <- c(bs, rep(0, (k - k2)))
                 cum.bs <- cumsum(bs)
             }
             else {
-                if (correct == 2)
-                  cat("Problem! Negative eigenvalues are still present after Lingoes","\n")
-                if (correct == 3)
-                  cat("Problem! Negative eigenvalues are still present after Cailliez","\n")
-                rel.eig.cor <- cum.eig.cor <- bs <- cum.bs <- rep(NA, n)
+                if (correct == 2) 
+                  cat("Problem! Negative eigenvalues are still present after Lingoes", 
+                    "\n")
+                if (correct == 3) 
+                  cat("Problem! Negative eigenvalues are still present after Cailliez", 
+                    "\n")
+                rel.eig.cor <- cum.eig.cor <- bs <- cum.bs <- rep(NA, 
+                  n)
                 vectors.cor <- matrix(NA, n, 2)
+                rownames(vectors.cor) <- names
+                colnames(vectors.cor) <- colnames(vectors.cor, 
+                  do.NULL = FALSE, prefix = "Axis.")
             }
-            res <- data.frame(eig[1:k], eig.cor[1:k], rel.eig.cor,
+            res <- data.frame(eig[1:k], eig.cor[1:k], rel.eig.cor, 
                 bs, cum.eig.cor, cum.bs)
-            colnames(res) <- c("Eigenvalues", "Corr_eig", "Rel_corr_eig",
+            colnames(res) <- c("Eigenvalues", "Corr_eig", "Rel_corr_eig", 
                 "Broken_stick", "Cum_corr_eig", "Cum_br_stick")
             rownames(res) <- 1:nrow(res)
             rownames(vectors) <- names
-            colnames(vectors) <- colnames(vectors, do.NULL = FALSE,
+            colnames(vectors) <- colnames(vectors, do.NULL = FALSE, 
                 prefix = "Axis.")
-            out <- (list(correction = c(correction, correct),
-                note = note, values = res, vectors = vectors,
+            out <- (list(correction = c(correction, correct), 
+                note = note, values = res, vectors = vectors, 
                 trace = trace, vectors.cor = vectors.cor, trace.cor = trace.cor))
         }
         else {
             note <- "No correction was applied to the negative eigenvalues"
             bs <- bstick.def(k3)
             bs <- c(bs, rep(0, (k - k3)))
             cum.bs <- cumsum(bs)
-            res <- data.frame(eig[1:k], rel.eig, rel.eig.cor,
+            res <- data.frame(eig[1:k], rel.eig, rel.eig.cor, 
                 bs, cum.eig.cor, cum.bs)
-            colnames(res) <- c("Eigenvalues", "Relative_eig",
-                "Rel_corr_eig", "Broken_stick", "Cum_corr_eig",
+            colnames(res) <- c("Eigenvalues", "Relative_eig", 
+                "Rel_corr_eig", "Broken_stick", "Cum_corr_eig", 
                 "Cumul_br_stick")
             rownames(res) <- 1:nrow(res)
             rownames(vectors) <- names
-            colnames(vectors) <- colnames(vectors, do.NULL = FALSE,
+            colnames(vectors) <- colnames(vectors, do.NULL = FALSE, 
                 prefix = "Axis.")
-            out <- (list(correction = c(correction, correct),
-                note = note, values = res, vectors = vectors,
+            out <- (list(correction = c(correction, correct), 
+                note = note, values = res, vectors = vectors, 
                 trace = trace))
         }
     }
     class(out) <- "pcoa"
     out
-}
+}
diff --git a/R/paco_links.r b/R/paco_links.r
@@ -1,10 +1,10 @@
 #' Contribution of individual links
 #'
-#' Uses a jackknife procedure to estimate the degree to which individual interactions are more supportive of a hypothesis of phylogenetic congruence than others. Interactions are iteratively removed, the global fit of the two phylogenies is reassessed and the difference between global fit with and without an interaction estimates the strength of support of said interaction to a hypothesis of phylogenetic congruence.
+#' Uses a jackknife procedure to perform bias correction on procrustes residuals (i.e. interactions) that are indicative of the degree to which individual interactions are more supportive of a hypothesis of phylogenetic congruence than others. Interactions are iteratively removed, the global fit of the two phylogenies is reassessed and bias in observed residuals calculated and corrected.
 #' @param D A list of class \code{paco} as returned by \code{paco::PACo}. 
 #' @param .parallel If TRUE, calculate the jackknife contribution in parallel using the backend provided by foreach.
 #' @param proc.warnings As in PACo. If \code{TRUE}, any warnings produced by internal calls of \code{paco::PACo} will be available for the user to view. If \code{FALSE}, warnings are internally suppressed.
-#' @return The input list of class \code{paco} with the added object jackknife which containing the mean and upper CI values for each link.
+#' @return The input list of class \code{paco} with the added object jackknife which contains the bias-corrected residual for each link.
 #' @export
 #' @examples
 #' data(gopherlice)
@@ -16,38 +16,37 @@
 #' D <- PACo(D, nperm=10, seed=42, method="r0")
 #' D <- paco_links(D)
 
-paco_links <- function(D, .parallel = FALSE, proc.warnings=TRUE)
-{
-   correction <- D$correction
-   HP.ones <- which(D$HP > 0, arr.ind=TRUE)
-   SQres.jackn <- matrix(rep(NA, sum(D$HP)^2), sum(D$HP))# empty matrix of jackknifed squared residuals
-   colnames(SQres.jackn) <- paste(rownames(D$proc$X),rownames(D$proc$Yrot), sep="-") #colnames identify the H-P link
-   t.critical = stats::qt(0.975,sum(D$HP)-1) #Needed to compute 95% confidence intervals.
-   nlinks <- sum(D$HP)
+paco_links <- function(D, .parallel = FALSE, proc.warnings=TRUE){
+
+  correction <- D$correction
+  HP.ones <- which(D$HP > 0, arr.ind=TRUE)
+  SQres.jackn <- matrix(rep(NA, sum(D$HP)^2), sum(D$HP))# empty matrix of jackknifed squared residuals
+  colnames(SQres.jackn) <- paste(rownames(D$proc$X),rownames(D$proc$Yrot), sep="-") #colnames identify the H-P link
+  t.critical = stats::qt(0.975,sum(D$HP)-1) #Needed to compute 95% confidence intervals.
+  nlinks <- sum(D$HP)
 
   #if .parallel is TRUE
   if(.parallel==TRUE){
-  SQres.jackn <- plyr::adply(1:nlinks, 1, function(x) single_paco_link(D, HP.ones, x, correction, proc.warnings), .parallel=.parallel)
-} else{
-  #if .parallel is FALSE
-  for(i in c(1:nlinks))
-  {
-  res.Proc.ind <- single_paco_link (D, HP.ones, i, correction, proc.warnings)
-  SQres.jackn[i, ] <- res.Proc.ind
+    SQres.jackn <- plyr::adply(1:nlinks, 1, 
+      function(i) single_paco_link(D, HP.ones, i, correction, proc.warnings), 
+      .parallel=.parallel)
+    SQres.jackn <- SQres.jackn[,-1]
+    colnames(SQres.jackn)[1] <- paste(rownames(D$proc$X),rownames(D$proc$Yrot), sep="-")[1]
+  }else{
+    #if .parallel is FALSE
+    for(i in c(1:nlinks)){
+      res.Proc.ind <- single_paco_link (D, HP.ones, i, correction, proc.warnings)
+      SQres.jackn[i, ] <- res.Proc.ind
+    }
   }
-}
 
-   SQres.jackn <- SQres.jackn^2 #Jackknifed residuals are squared
-   SQres <- (stats::residuals(D$proc))^2 # Vector of original square residuals
-   #jackknife calculations:
-   SQres.jackn <- SQres.jackn*(-(sum(D$HP)-1))
-   SQres <- SQres*sum(D$HP)
-   SQres.jackn <- t(apply(SQres.jackn, 1, "+", SQres)) #apply jackknife function to matrix
-   phi.mean <- apply(SQres.jackn, 2, mean, na.rm = TRUE) #mean jackknife estimate per link
-   phi.UCI <- apply(SQres.jackn, 2, stats::sd, na.rm = TRUE) #standard deviation of estimates
-   phi.UCI <- phi.mean + t.critical * phi.UCI/sqrt(sum(D$HP))
-   D$jackknife <- list(mean = phi.mean, upper = phi.UCI)
-   return(D)
+  SQres <- residuals_paco(D$proc) # Vector of original square residuals
+  # #jackknife calculations:
+  SQres.jackn.mean <- apply(SQres.jackn, 2, function(x) (1/sum(D$HP))*sum(x, na.rm=TRUE)) # calculate average across all runs
+  phi.mean <- (sum(D$HP)*SQres)-((sum(D$HP)-1)*SQres.jackn.mean) # calculate bias-corrected jackknife estimate
+
+  D$jackknife <- phi.mean
+  return(D)
 }
 
 #PACo setting the ith link = 0
@@ -56,9 +55,9 @@ single_paco_link <- function (D, HP.ones, i, correction, proc.warnings) {
   HP_ind[HP.ones[i,1],HP.ones[i,2]]=0
   PACo.ind <- add_pcoord(list(H=D$H, P=D$P, HP=HP_ind), correction=correction)
   if(proc.warnings==TRUE){
-    Proc.ind <- vegan::procrustes(X=PACo.ind$H_PCo, Y=PACo.ind$P_PCo)
+    Proc.ind <- vegan::procrustes(X=PACo.ind$H_PCo, Y=PACo.ind$P_PCo, symmetric=D$symmetric)
   }else{
-      Proc.ind <- suppressWarnings(vegan::procrustes(X=PACo.ind$H_PCo, Y=PACo.ind$P_PCo))
+      Proc.ind <- suppressWarnings(vegan::procrustes(X=PACo.ind$H_PCo, Y=PACo.ind$P_PCo, symmetric=D$symmetric))
   }
   res.Proc.ind <- c(residuals_paco(Proc.ind))
   res.Proc.ind <- append(res.Proc.ind, NA, after= i-1)

diff --git a/R/residuals_paco.r b/R/residuals_paco.r
@@ -1,7 +1,7 @@
 #' Return Procrustes residuals from a paco object
 #'
 #' Takes the Procrustes object from \code{vegan::procrustes} of the global superimpostion and pulls out either the residual matrix of superimposition or the residual of each individual interaction (link between host and parasite). 
-#' @param object An obejct of class \code{procrustes} as returned from PACo (and internally the \code{vegan::procrustes} function). In a PACo output this is \code{D$proc}.
+#' @param object An obejct of class \code{procrustes} as returned from PACo (and internally the \code{vegan::procrustes} function). In a PACo output this is D\$proc.
 #' @param type Character string. Whether the whole residual matrix (\code{matrix}) or the residuals per interaction (\code{interaction}) is desired.
 #' @return If \code{type=interaction}, a named vector of the Procrustes residuals is returned where names are the interactions. If \code{type=matrix}, a matrix of residuals from Procrustes superimposition is returned.
 #' @export