/
phyloTop.R
162 lines (135 loc) · 6.47 KB
/
phyloTop.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#' phyloTop: topological properties of phylogenies
#'
#' Calculate a range of topological properties for one or more phylogenetic trees.
#'
#' @author Michelle Kendall \email{michelle.louise.kendall@@gmail.com}
#'
#' @param treeList a \code{list} or \code{multiPhylo} object, or a single tree of class \code{phylo} or \code{phylo4}. All trees should be binary and rooted; if not they will be coerced into binary rooted trees using multi2di, if possible.
#' @param funcs a list of functions. The default is to apply all of the topological functions from the package, but a subset can be specified instead. The functions available are:
#' \itemize{
#' \item \code{\link{avgLadder}}
#' \item \code{\link{cherries}}
#' \item \code{\link{colless.phylo}}
#' \item \code{\link{ILnumber}}
#' \item \code{\link{maxHeight}}
#' \item \code{\link{pitchforks}}
#' \item \code{\link{sackin.phylo}}
#' \item \code{\link{stairs}} (note that this adds two columns to the output, "stairs1" and "stairs2")
#' }
#' @param normalise option to normalise the results of functions where possible. Default is \code{FALSE}
#' @return A matrix where rows correspond to trees and columns correspond to topological properties.
#'
#' @import ape
#'
#' @seealso \code{\link{avgLadder}}, \code{\link{cherries}}, \code{\link{colless.phylo}}, \code{\link{ILnumber}}, \code{\link{maxHeight}}, \code{\link{pitchforks}}, \code{\link{sackin.phylo}}, \code{\link{stairs}}
#'
#' @examples
#' ## Apply all of the functions to a list of 10 random trees, each with 50 tips:
#' phyloTop(rmtree(10,50))
#' ## Normalising the results where possible:
#' phyloTop(rmtree(10,50), normalise=TRUE)
#'
#'
#' @export
phyloTop <- function(treeList,funcs="all", normalise=FALSE){
# check input:
if (!class(treeList) %in% c("list","multiPhylo")) stop("Please supply a list or multiPhylo object for treeList")
# functions which return an integer tree statistic, and their dependencies:
# avgLadder (ladderSizes)
# cherries (nConfig)
# colless.phylo (treeImb, nConfig)
# ILnumber (treeImb, nConfig)
# maxHeight (getDepths)
# pitchforks (nConfig)
# sackin.phylo (getDepths)
# stairs(1 and 2) (treeImb, nConfig)
allfuncs <- c("avgLadder","cherries","colless.phylo","ILnumber","maxHeight","pitchforks","sackin.phylo","stairs")
if (funcs=="all") {funcs <- allfuncs}
else {
# check each func is recognised
for (i in funcs) {
if (!i %in% allfuncs) {
stop(paste0("Function '",i,"' not recognised."))
}
}
# put in alphabetical order, mainly because "stairs" needs to be at the end!
funcs <- sort(funcs)
}
# initialise matrix
# if "stairs" is requested, need two columns for it
if ("stairs" %in% funcs) {treeSummaries <- matrix(nrow=length(treeList),ncol=(length(funcs)+1),0)
colnames(treeSummaries) <- c(setdiff(funcs,"stairs"),"stairs1","stairs2")}
else {treeSummaries <- matrix(nrow=length(treeList),ncol=length(funcs),0)
colnames(treeSummaries) <- funcs }
# go through each tree at a time
for (i in 1:length(treeList)) {
treeList[[i]] <- phyloCheck(treeList[[i]])
ntips <- length(treeList[[i]]$tip.label)
# get the functions which are reused
# can we be more efficient than this repeated calling of phyloCheck?
if (any(is.element(c("maxHeight", "sackin.phylo"),funcs))) {depths <- getDepths(treeList[[i]]) }
if (any(is.element(c("cherries", "colless.phylo","ILnumber","pitchforks","stairs"),funcs))) {
nConfig <- nConfig(treeList[[i]])
treeImb <- treeImb(treeList[[i]])}
# apply each function
for (j in funcs) {
if (j=="avgLadder") { l <- ladderSizes(treeList[[i]])$ladderSizes
if (length(l)==0) {treeSummaries[i,j] <- 0}
else {
if (normalise==FALSE) {treeSummaries[i,j] <- mean(l) }
else {
if (ntips==2) {treeSummaries[i,j] <- 0 }
else {treeSummaries[i,j] <- mean(l)/(ntips-2) }
}
}
}
if (j=="cherries") {
if (normalise==FALSE) {treeSummaries[i,j] <- nConfig$numClades[[2]]}
else {treeSummaries[i,j] <- 2*nConfig$numClades[[2]]/ntips}
}
else if (j=="colless.phylo") { if (ntips==2) {treeSummaries[i,j] <- 0 }
else { diffs <- sum(abs(treeImb[(ntips+1):(2*ntips-1),1]-treeImb[(ntips+1):(2*ntips-1),2]))
if (normalise==FALSE) {n <- 1}
else {n <- ((ntips-1)*(ntips-2))/2}
treeSummaries[i,j] <- diffs/n
}
}
else if (j=="ILnumber") {
if (ntips==2) {treeSummaries[i,j] <- 0} # if ntips=2 the result is 0 (and we should not try to normalise it!)
else {NDs <- treeImb[(ntips+1):(2*ntips-1),]
if (normalise==FALSE) {treeSummaries[i,j] <- sum(apply(NDs,1, function(x) sum(x==1)==1))}
else {treeSummaries[i,j] <- (sum(apply(NDs,1, function(x) sum(x==1)==1)))/(ntips-2)}
}
}
else if (j=="maxHeight") {
heights <- depths$tipDepths
if (normalise==FALSE) {treeSummaries[i,j] <- max(heights)}
else {treeSummaries[i,j] <- max(heights)/(ntips - 1)}
}
else if (j=="pitchforks") {
if (ntips==2) { treeSummaries[i, j] <- 0 }
else if (normalise==FALSE) {treeSummaries[i,j] <- nConfig$numClades[[3]]}
else {treeSummaries[i,j] <- 3*nConfig$numClades[[3]]/ntips}
}
else if (j=="sackin.phylo") {tipDepths <- depths$tipDepths
if (normalise==FALSE) {
treeSummaries[i,j] <- sum(tipDepths)}
else {treeSummaries[i,j] <- (sum(tipDepths))/((1/2)*(ntips*(ntips+1)) - 1)}
}
else if (j=="stairs") {
if(ntips==2){
treeSummaries[i, "stairs1"] <- 0
treeSummaries[i, "stairs2"] <- 1
}
else {
tImb <- treeImb[(ntips+1):(2*ntips-1),]
stair1 <- (1/(ntips - 1)) * sum(tImb[,1] != tImb[,2])
stair2 <- (1/(ntips - 1)) * sum(pmin(tImb[,2], tImb[,1])/pmax(tImb[,2], tImb[,1]))
treeSummaries[i,"stairs1"] <- stair1
treeSummaries[i,"stairs2"] <- stair2
}
}
}
}
return(as.data.frame(treeSummaries))
}