Skip to content

Commit

Permalink
Merge 9226b99 into 1e5a325
Browse files Browse the repository at this point in the history
  • Loading branch information
matthutchinson1 committed Oct 1, 2020
2 parents 1e5a325 + 9226b99 commit dcb52fd
Show file tree
Hide file tree
Showing 59 changed files with 1,725 additions and 191 deletions.
17 changes: 9 additions & 8 deletions DESCRIPTION
@@ -1,22 +1,23 @@
Package: paco
Version: 0.3.1
Date: 2016-11-28
Version: 0.4.2
Date: 2020-08-19
Title: Procrustes Application to Cophylogenetic Analysis
Description: Procrustes analyses to infer co-phylogenetic
matching between pairs of (ultrametric) phylogenetic trees.
matching between pairs of phylogenetic trees.
Author: Juan Antonio Balbuena <j.a.balbuena@uv.es>, Timothee Poisot
<tim@poisotlab.io>, Matthew Hutchinson <matthewhutchinson15@gmail.com>,
Fernando Cagua <fernando@cagua.co>
Fernando Cagua <fernando@cagua.co>; see PLoS ONE Balbuena et al 2013 <https://doi.org/10.1371/journal.pone.0061048>
Maintainer: Matthew Hutchinson <matthewhutchinson15@gmail.com>
Depends:
R (>= 3.0.0)
Imports:
vegan (>= 2.2-0),
ape,
plyr
Suggests:
ape,
doMC,
testthat
Note: The current version (0.4.2) fixes a numerical issue with symmetric implementation of the paco_links function.
License: MIT + file LICENSE
URL: http://www.uv.es/cophylpaco/
RoxygenNote: 5.0.1
URL: https://www.uv.es/cophylpaco/
Encoding: UTF-8
RoxygenNote: 7.1.1
10 changes: 6 additions & 4 deletions Makefile
@@ -1,4 +1,4 @@
ALL: paco.tar.gz
ALL: paco_0.4.2.tar.gz

doc: R/*.r
R -e "library(devtools); document('.')"
Expand All @@ -16,9 +16,11 @@ test: cran/paco doc

paco.tar.gz:
rm $@ 2>/dev/null; true
cd cran; tar -zcvf paco.tar.gz paco
mv cran/paco.tar.gz $@
cd cran; R CMD build paco
mv paco_0.4.2.tar.gz ../
cd ../
R CMD check --as-cran paco_0.4.2.tar.gz

clean:
rm paco.tar.gz
rm paco_0.4.2.tar.gz
rm -r cran/paco
36 changes: 30 additions & 6 deletions R/add_pcoord.r
Expand Up @@ -3,8 +3,8 @@
#' Translates the distance matrices of 'host' and 'parasite' phylogenies into Principal Coordinates, as needed for Procrustes superimposition.
#' @param D A list with objects H, P, and HP, as returned by \code{paco::prepare_paco_data}.
#' @param correction In some cases, phylogenetic distance matrices are non-Euclidean which generates negative eigenvalues when those matrices are translated into Principal Coordinates. There are several methods to correct negative eigenvalues. Correction options available here are "cailliez", "lingoes", and "none". The "cailliez" and "lingoes" corrections add a constant to the eigenvalues to make them non-negative. Default is "none".
#' @return The list that was input as the argument `D' with two new elements; the Principal Coordinates of the `host' distance matrix and the Principal Coordinates of the `parasite' distance matrix.
#' @note To find the Principal Coordinates of each distance matrix, we internally use a function, \code{coordpcoa}, that is a modified version of \code{ape::pcoa}, using \code{vegan::eigenvals}.
#' @return The list that was input as the argument `D' with four new elements; the Principal Coordinates of the `host' distance matrix and the Principal Coordinates of the `parasite' distance matrix, as well as, a `correction' object stating the correction used for negative eigenvalues and a `note' object stating whether or not negative eigenvalues were present and therefore corrected.
#' @note To find the Principal Coordinates of each distance matrix, we internally a modified version of the function \code{ape::pcoa} that uses \code{vegan::eigenvals} and zapsmall
#' @export
#' @examples
#' data(gopherlice)
Expand All @@ -16,10 +16,34 @@
add_pcoord <- function(D, correction='none')
{
HP_bin <- which(D$HP >0, arr.ind=TRUE)
H_PCo <- coordpcoa(D$H, correction)$vectors #Performs PCo of Host distances
P_PCo <- coordpcoa(D$P, correction)$vectors #Performs PCo of Parasite distances
D$H_PCo <- H_PCo[HP_bin[,1],] #Adjust Host PCo vectors
D$P_PCo <- P_PCo[HP_bin[,2],] #Adjust Parasite PCo vectors
if(correction == 'none'){
H_PCo <- coordpcoa(D$H, correction) #Performs PCo of Host distances
P_PCo <- coordpcoa(D$P, correction) #Performs PCo of Parasite distances
H_PCoVec <- H_PCo$vectors
P_PCoVec <- P_PCo$vectors
D$H_PCo <- H_PCoVec[HP_bin[,1],] #Adjust Host PCo vectors
D$P_PCo <- P_PCoVec[HP_bin[,2],] #Adjust Parasite PCo vectors
}else{
H_PCo <- coordpcoa(D$H, correction)
P_PCo <- coordpcoa(D$P, correction)
if(H_PCo$note == "There were no negative eigenvalues. No correction was applied"){
H_PCoVec <- H_PCo$vectors
D$H_PCo <- H_PCoVec[HP_bin[,1],] #Adjust Host PCo vectors
}else{
H_PCoVec <- H_PCo$vectors.cor
D$H_PCo <- H_PCoVec[HP_bin[,1],] #Adjust Host PCo vectors
}

if(P_PCo$note == "There were no negative eigenvalues. No correction was applied"){
P_PCoVec <- P_PCo$vectors
D$P_PCo <- P_PCoVec[HP_bin[,2],] #Adjust Host PCo vectors
}else{
P_PCoVec <- P_PCo$vectors.cor
D$P_PCo <- P_PCoVec[HP_bin[,2],] #Adjust Host PCo vectors
}

}
D$correction <- correction
D$note <- list(H_note=H_PCo$note, P_note=P_PCo$note)
return(D)
}
87 changes: 50 additions & 37 deletions R/coordpcoa.r
Expand Up @@ -2,8 +2,8 @@
# D A list with objects H, P, and HP, returned by prepare_paco_data
# correction The correction to apply (none, lingoes, or cailliez)
# rn rownames (optional)
# Internal function coordpcoa is a modified version of ape::pcoa, utilising vegan::eigenvals
coordpcoa <-function (D, correction = "none", rn = NULL)
# Internal function coordpcoa is a modified version of ape::pcoa, utilising vegan::eigenvals and zapsmall
coordpcoa <- function (D, correction = "none", rn = NULL)
{
centre <- function(D, n) {
One <- matrix(1, n, n)
Expand All @@ -20,55 +20,57 @@ coordpcoa <-function (D, correction = "none", rn = NULL)
epsilon <- sqrt(.Machine$double.eps)
if (length(rn) != 0) {
names <- rn
}
else {
}else {
names <- rownames(D)
}
CORRECTIONS <- c("none", "lingoes", "cailliez")
correct <- pmatch(correction, CORRECTIONS)
if (is.na(correct)) stop("Invalid correction method")
if (is.na(correct))
stop("Invalid correction method")
delta1 <- centre((-0.5 * D^2), n)
trace <- sum(diag(delta1))
D.eig <- eigen(delta1)
D.eig$values <- as.numeric(zapsmall(vegan::eigenvals(D.eig)))
min.eig <- min(D.eig$values)
D.eig$values <- vegan::eigenvals(D.eig)
zero.eig <- which(D.eig$values < epsilon)
zero.eig <- which(abs(D.eig$values) < epsilon)
D.eig$values[zero.eig] <- 0
if (min.eig > -epsilon) {
correct <- 1
eig <- D.eig$values
k <- length(which(eig > epsilon))
rel.eig <- eig[1:k]/trace
cum.eig <- cumsum(rel.eig)
vectors <- sweep(D.eig$vectors[, 1:k], 2, sqrt(eig[1:k]), FUN = "*")
vectors <- sweep(D.eig$vectors[, 1:k], 2, sqrt(eig[1:k]),
FUN = "*")
bs <- bstick.def(k)
cum.bs <- cumsum(bs)
res <- data.frame(eig[1:k], rel.eig, bs, cum.eig, cum.bs)
colnames(res) <- c("Eigenvalues", "Relative_eig", "Broken_stick",
colnames(res) <- c("Eigenvalues", "Relative_eig", "Broken_stick",
"Cumul_eig", "Cumul_br_stick")
rownames(res) <- 1:nrow(res)
rownames(vectors) <- names
colnames(vectors) <- colnames(vectors, do.NULL = FALSE,
colnames(vectors) <- colnames(vectors, do.NULL = FALSE,
prefix = "Axis.")
note <- paste("There were no negative eigenvalues. No correction was applied")
out <- (list(correction = c(correction, correct), note = note,
out <- (list(correction = c(correction, correct), note = note,
values = res, vectors = vectors, trace = trace))
}
else {
k <- n
eig <- D.eig$values
rel.eig <- eig/trace
rel.eig.cor <- (eig - min.eig)/(trace - (n - 1) * min.eig)
rel.eig.cor = c(rel.eig.cor[1:(zero.eig[1] - 1)], rel.eig.cor[(zero.eig[1] +
rel.eig.cor = c(rel.eig.cor[1:(zero.eig[1] - 1)], rel.eig.cor[(zero.eig[1] +
1):n], 0)
cum.eig.cor <- cumsum(rel.eig.cor)
k2 <- length(which(eig > epsilon))
k3 <- length(which(rel.eig.cor > epsilon))
vectors <- sweep(D.eig$vectors[, 1:k2], 2, sqrt(eig[1:k2]),
vectors <- sweep(D.eig$vectors[, 1:k2], 2, sqrt(eig[1:k2]),
FUN = "*")
if ((correct == 2) | (correct == 3)) {
if (correct == 2) {
c1 <- -min.eig
note <- paste("Lingoes correction applied to negative eigenvalues: D' = -0.5*D^2 -",
note <- paste("Lingoes correction applied to negative eigenvalues: D' = -0.5*D^2 -",
c1, ", except diagonal elements")
D <- -0.5 * (D^2 + 2 * c1)
}
Expand All @@ -77,69 +79,80 @@ coordpcoa <-function (D, correction = "none", rn = NULL)
upper <- cbind(matrix(0, n, n), 2 * delta1)
lower <- cbind(-diag(n), -4 * delta2)
sp.matrix <- rbind(upper, lower)
c2 <- max(Re(eigen(sp.matrix, symmetric = FALSE,
c2 <- max(Re(eigen(sp.matrix, symmetric = FALSE,
only.values = TRUE)$values))
note <- paste("Cailliez correction applied to negative eigenvalues: D' = -0.5*(D +", c2, ")^2, except diagonal elements")
note <- paste("Cailliez correction applied to negative eigenvalues: D' = -0.5*(D +",
c2, ")^2, except diagonal elements")
D <- -0.5 * (D + c2)^2
}
diag(D) <- 0
mat.cor <- centre(D, n)
toto.cor <- eigen(mat.cor)
toto.cor$values <- as.numeric(zapsmall(vegan::eigenvals(toto.cor)))
trace.cor <- sum(diag(mat.cor))
min.eig.cor <- min(toto.cor$values)
toto.cor$values <- vegan::eigenvals(toto.cor)
zero.eig.cor <- which((toto.cor$values < epsilon) & (toto.cor$values > -epsilon))

zero.eig.cor <- which((toto.cor$values < epsilon) &
(toto.cor$values > -epsilon))
toto.cor$values[zero.eig.cor] <- 0
if (min.eig.cor > -epsilon) {
eig.cor <- toto.cor$values
rel.eig.cor <- eig.cor[1:k]/trace.cor
cum.eig.cor <- cumsum(rel.eig.cor)
k2 <- length(which(eig.cor > epsilon))
vectors.cor <- sweep(toto.cor$vectors[, 1:k2],
vectors.cor <- sweep(toto.cor$vectors[, 1:k2],
2, sqrt(eig.cor[1:k2]), FUN = "*")
rownames(vectors.cor) <- names
colnames(vectors.cor) <- colnames(vectors.cor,
do.NULL = FALSE, prefix = "Axis.")
bs <- bstick.def(k2)
bs <- c(bs, rep(0, (k - k2)))
cum.bs <- cumsum(bs)
}
else {
if (correct == 2)
cat("Problem! Negative eigenvalues are still present after Lingoes","\n")
if (correct == 3)
cat("Problem! Negative eigenvalues are still present after Cailliez","\n")
rel.eig.cor <- cum.eig.cor <- bs <- cum.bs <- rep(NA, n)
if (correct == 2)
cat("Problem! Negative eigenvalues are still present after Lingoes",
"\n")
if (correct == 3)
cat("Problem! Negative eigenvalues are still present after Cailliez",
"\n")
rel.eig.cor <- cum.eig.cor <- bs <- cum.bs <- rep(NA,
n)
vectors.cor <- matrix(NA, n, 2)
rownames(vectors.cor) <- names
colnames(vectors.cor) <- colnames(vectors.cor,
do.NULL = FALSE, prefix = "Axis.")
}
res <- data.frame(eig[1:k], eig.cor[1:k], rel.eig.cor,
res <- data.frame(eig[1:k], eig.cor[1:k], rel.eig.cor,
bs, cum.eig.cor, cum.bs)
colnames(res) <- c("Eigenvalues", "Corr_eig", "Rel_corr_eig",
colnames(res) <- c("Eigenvalues", "Corr_eig", "Rel_corr_eig",
"Broken_stick", "Cum_corr_eig", "Cum_br_stick")
rownames(res) <- 1:nrow(res)
rownames(vectors) <- names
colnames(vectors) <- colnames(vectors, do.NULL = FALSE,
colnames(vectors) <- colnames(vectors, do.NULL = FALSE,
prefix = "Axis.")
out <- (list(correction = c(correction, correct),
note = note, values = res, vectors = vectors,
out <- (list(correction = c(correction, correct),
note = note, values = res, vectors = vectors,
trace = trace, vectors.cor = vectors.cor, trace.cor = trace.cor))
}
else {
note <- "No correction was applied to the negative eigenvalues"
bs <- bstick.def(k3)
bs <- c(bs, rep(0, (k - k3)))
cum.bs <- cumsum(bs)
res <- data.frame(eig[1:k], rel.eig, rel.eig.cor,
res <- data.frame(eig[1:k], rel.eig, rel.eig.cor,
bs, cum.eig.cor, cum.bs)
colnames(res) <- c("Eigenvalues", "Relative_eig",
"Rel_corr_eig", "Broken_stick", "Cum_corr_eig",
colnames(res) <- c("Eigenvalues", "Relative_eig",
"Rel_corr_eig", "Broken_stick", "Cum_corr_eig",
"Cumul_br_stick")
rownames(res) <- 1:nrow(res)
rownames(vectors) <- names
colnames(vectors) <- colnames(vectors, do.NULL = FALSE,
colnames(vectors) <- colnames(vectors, do.NULL = FALSE,
prefix = "Axis.")
out <- (list(correction = c(correction, correct),
note = note, values = res, vectors = vectors,
out <- (list(correction = c(correction, correct),
note = note, values = res, vectors = vectors,
trace = trace))
}
}
class(out) <- "pcoa"
out
}
}
61 changes: 30 additions & 31 deletions R/paco_links.r
@@ -1,10 +1,10 @@
#' Contribution of individual links
#'
#' Uses a jackknife procedure to estimate the degree to which individual interactions are more supportive of a hypothesis of phylogenetic congruence than others. Interactions are iteratively removed, the global fit of the two phylogenies is reassessed and the difference between global fit with and without an interaction estimates the strength of support of said interaction to a hypothesis of phylogenetic congruence.
#' Uses a jackknife procedure to perform bias correction on procrustes residuals (i.e. interactions) that are indicative of the degree to which individual interactions are more supportive of a hypothesis of phylogenetic congruence than others. Interactions are iteratively removed, the global fit of the two phylogenies is reassessed and bias in observed residuals calculated and corrected.
#' @param D A list of class \code{paco} as returned by \code{paco::PACo}.
#' @param .parallel If TRUE, calculate the jackknife contribution in parallel using the backend provided by foreach.
#' @param proc.warnings As in PACo. If \code{TRUE}, any warnings produced by internal calls of \code{paco::PACo} will be available for the user to view. If \code{FALSE}, warnings are internally suppressed.
#' @return The input list of class \code{paco} with the added object jackknife which containing the mean and upper CI values for each link.
#' @return The input list of class \code{paco} with the added object jackknife which contains the bias-corrected residual for each link.
#' @export
#' @examples
#' data(gopherlice)
Expand All @@ -16,38 +16,37 @@
#' D <- PACo(D, nperm=10, seed=42, method="r0")
#' D <- paco_links(D)

paco_links <- function(D, .parallel = FALSE, proc.warnings=TRUE)
{
correction <- D$correction
HP.ones <- which(D$HP > 0, arr.ind=TRUE)
SQres.jackn <- matrix(rep(NA, sum(D$HP)^2), sum(D$HP))# empty matrix of jackknifed squared residuals
colnames(SQres.jackn) <- paste(rownames(D$proc$X),rownames(D$proc$Yrot), sep="-") #colnames identify the H-P link
t.critical = stats::qt(0.975,sum(D$HP)-1) #Needed to compute 95% confidence intervals.
nlinks <- sum(D$HP)
paco_links <- function(D, .parallel = FALSE, proc.warnings=TRUE){

correction <- D$correction
HP.ones <- which(D$HP > 0, arr.ind=TRUE)
SQres.jackn <- matrix(rep(NA, sum(D$HP)^2), sum(D$HP))# empty matrix of jackknifed squared residuals
colnames(SQres.jackn) <- paste(rownames(D$proc$X),rownames(D$proc$Yrot), sep="-") #colnames identify the H-P link
t.critical = stats::qt(0.975,sum(D$HP)-1) #Needed to compute 95% confidence intervals.
nlinks <- sum(D$HP)

#if .parallel is TRUE
if(.parallel==TRUE){
SQres.jackn <- plyr::adply(1:nlinks, 1, function(x) single_paco_link(D, HP.ones, x, correction, proc.warnings), .parallel=.parallel)
} else{
#if .parallel is FALSE
for(i in c(1:nlinks))
{
res.Proc.ind <- single_paco_link (D, HP.ones, i, correction, proc.warnings)
SQres.jackn[i, ] <- res.Proc.ind
SQres.jackn <- plyr::adply(1:nlinks, 1,
function(i) single_paco_link(D, HP.ones, i, correction, proc.warnings),
.parallel=.parallel)
SQres.jackn <- SQres.jackn[,-1]
colnames(SQres.jackn)[1] <- paste(rownames(D$proc$X),rownames(D$proc$Yrot), sep="-")[1]
}else{
#if .parallel is FALSE
for(i in c(1:nlinks)){
res.Proc.ind <- single_paco_link (D, HP.ones, i, correction, proc.warnings)
SQres.jackn[i, ] <- res.Proc.ind
}
}
}

SQres.jackn <- SQres.jackn^2 #Jackknifed residuals are squared
SQres <- (stats::residuals(D$proc))^2 # Vector of original square residuals
#jackknife calculations:
SQres.jackn <- SQres.jackn*(-(sum(D$HP)-1))
SQres <- SQres*sum(D$HP)
SQres.jackn <- t(apply(SQres.jackn, 1, "+", SQres)) #apply jackknife function to matrix
phi.mean <- apply(SQres.jackn, 2, mean, na.rm = TRUE) #mean jackknife estimate per link
phi.UCI <- apply(SQres.jackn, 2, stats::sd, na.rm = TRUE) #standard deviation of estimates
phi.UCI <- phi.mean + t.critical * phi.UCI/sqrt(sum(D$HP))
D$jackknife <- list(mean = phi.mean, upper = phi.UCI)
return(D)
SQres <- residuals_paco(D$proc) # Vector of original square residuals
# #jackknife calculations:
SQres.jackn.mean <- apply(SQres.jackn, 2, function(x) (1/sum(D$HP))*sum(x, na.rm=TRUE)) # calculate average across all runs
phi.mean <- (sum(D$HP)*SQres)-((sum(D$HP)-1)*SQres.jackn.mean) # calculate bias-corrected jackknife estimate

D$jackknife <- phi.mean
return(D)
}

#PACo setting the ith link = 0
Expand All @@ -56,9 +55,9 @@ single_paco_link <- function (D, HP.ones, i, correction, proc.warnings) {
HP_ind[HP.ones[i,1],HP.ones[i,2]]=0
PACo.ind <- add_pcoord(list(H=D$H, P=D$P, HP=HP_ind), correction=correction)
if(proc.warnings==TRUE){
Proc.ind <- vegan::procrustes(X=PACo.ind$H_PCo, Y=PACo.ind$P_PCo)
Proc.ind <- vegan::procrustes(X=PACo.ind$H_PCo, Y=PACo.ind$P_PCo, symmetric=D$symmetric)
}else{
Proc.ind <- suppressWarnings(vegan::procrustes(X=PACo.ind$H_PCo, Y=PACo.ind$P_PCo))
Proc.ind <- suppressWarnings(vegan::procrustes(X=PACo.ind$H_PCo, Y=PACo.ind$P_PCo, symmetric=D$symmetric))
}
res.Proc.ind <- c(residuals_paco(Proc.ind))
res.Proc.ind <- append(res.Proc.ind, NA, after= i-1)
Expand Down
2 changes: 1 addition & 1 deletion R/residuals_paco.r
@@ -1,7 +1,7 @@
#' Return Procrustes residuals from a paco object
#'
#' Takes the Procrustes object from \code{vegan::procrustes} of the global superimpostion and pulls out either the residual matrix of superimposition or the residual of each individual interaction (link between host and parasite).
#' @param object An obejct of class \code{procrustes} as returned from PACo (and internally the \code{vegan::procrustes} function). In a PACo output this is \code{D$proc}.
#' @param object An obejct of class \code{procrustes} as returned from PACo (and internally the \code{vegan::procrustes} function). In a PACo output this is D\$proc.
#' @param type Character string. Whether the whole residual matrix (\code{matrix}) or the residuals per interaction (\code{interaction}) is desired.
#' @return If \code{type=interaction}, a named vector of the Procrustes residuals is returned where names are the interactions. If \code{type=matrix}, a matrix of residuals from Procrustes superimposition is returned.
#' @export
Expand Down

0 comments on commit dcb52fd

Please sign in to comment.