/
blt_dct_test.r
119 lines (100 loc) · 4.57 KB
/
blt_dct_test.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env Rscript
library("optparse")
library("doSNOW")
#Arguments
option_list = list(
make_option(c("-t", "--trees"), type="character", default=NULL,help="input gene trees in newick", metavar="character"),
make_option(c("-s", "--species_tree"), type="character", default=NULL, help="rooted species tree in newick", metavar="character"),
make_option(c("-n", "--node"), type="numeric", default=NULL, help="node number of a tested clade in a species tree", metavar="numeric"),
make_option(c("-c", "--cores"), type="numeric", default=NULL, help="number of cores for parallel computing", metavar="numeric"),
make_option(c("-p", "--prefix"), type="character", default=NULL, help="clade prefix", metavar="character"),
make_option(c("-o", "--outgroup"), type="character", default=NULL, help="outgroup species (only one allowed)", metavar="character")
)
opt_parser = OptionParser(option_list=option_list)
opt = parse_args(opt_parser)
#Create cluster
cl = makeCluster(opt$cores,type = "SOCK")
library("phangorn")
library("foreach")
library("ape")
args = commandArgs(trailingOnly=TRUE)
test_triplet=function(taxa,gene_trees,clade_name,outg)
{
gene_trees=gene_trees
trl_all=c()
brls_all1=c()
brls_all2=c()
internal_all=c()
out_all=c()
root_tip_all=c()
for (tre in gene_trees)
{
if(outg %in% tre$tip.label & all(taxa %in% tre$tip.label))
{
tre=root(tre,outg)
trl=sum(tre$edge.length)
tre_trip=keep.tip(tre,taxa)
brls=extract.clade(tre_trip,max(tre_trip$edge))$edge.length
root_tip=tre_trip$tip.label[!tre_trip$tip.label %in% extract.clade(tre_trip,max(tre_trip$edge))$tip.label]
trl_all=c(trl_all,trl)
outl=tre_trip$edge.length[which(tre_trip$edge[,1]==4 & (tre_trip$edge[,2]==1 | tre_trip$edge[,2]==2 | tre_trip$edge[,2]==3))]
out_all=c(out_all,outl)
internall=tre_trip$edge.length[which(tre_trip$edge[,1]==4 & tre_trip$edge[,2]==5)]
internal_all=c(internal_all,internall)
brls_all1=c(brls_all1,brls[1])
brls_all2=c(brls_all2,brls[2])
root_tip_all=c(root_tip_all,root_tip)
}
}
#Get counts/ compute common and non-common
counts=table(root_tip_all)
com=names(which.max(counts))
dis=names(which.min(counts))
m=data.frame(clade_name,P1=tre_trip$tip.label[1],P2=tre_trip$tip.label[2],P3=tre_trip$tip.label[3],brl1=brls_all1,brl2=brls_all2,trl=trl_all,brl_out=out_all,brl_int=internal_all,root_tip=root_tip_all,topo=ifelse(root_tip_all %in% com,"concord",ifelse(root_tip_all %in% dis,"discord1","discord2")))
#write.table(m,paste(c(taxa,"csv"),collapse="."),quote=F,row.names=F,col.names=F)
if(!any(table(m$root_tip)==0) & length(table(m$root_tip))==3)
{
#Normalize ditances by total gene tree lengths
m$proxy_t=(m$brl1+m$brl2)/m$trl
ccom=m[m$topo=="concord","proxy_t"]
c1=m[m$topo=="discord1","proxy_t"]
c2=m[m$topo=="discord2","proxy_t"]
not_com_count=table(as.vector(m[m$topo!="concord","topo"]))
w_testc1=wilcox.test(ccom,c1)$p.value
w_testc2=wilcox.test(ccom,c2)$p.value
w_test=wilcox.test(c1,c2)$p.value
chi=chisq.test(not_com_count)$p.value
v_out=c(clade_name,names(counts),counts,chi,mean(ccom),mean(c1),mean(c2),w_testc1,w_testc2,w_test)
return(as.vector(v_out))
}
}
getstats_triplets=function(taxa_list,gene_trees,clade_name,outg,species_tree)
{
taxa_combn=combn(taxa_list,m=3)
print(paste("N triplets:",ncol(taxa_combn)))
pb=txtProgressBar(0,ncol(taxa_combn),style=3)
progress=function(n){
setTxtProgressBar(pb,n)
}
opts=list(progress=progress)
out_t=foreach(i=1:ncol(taxa_combn),.combine='rbind',.options.snow=opts) %dopar%
{
triplet=taxa_combn[,i]
stats1=test_triplet(triplet,gene_trees,clade_name,outg)
return(stats1)
}
write.table(as.data.frame(out_t),clade_name,quote = F, row.names = F, col.names = F,sep=",")
}
clusterExport(cl,c("read.tree","cophenetic.phylo","root","keep.tip","extract.clade","setTxtProgressBar","test_triplet","drop.tip","write.tree"))
registerDoSNOW(cl)
tt=read.tree(opt$species_tree)
cat("Read species topology. Done.\n")
phy=read.tree(opt$trees)
cat("Read gene trees. Done.\n")
clade=extract.clade(tt,as.numeric(opt$node))
cat("Exatract clade. Done.\n")
cat(paste(clade$tip.label,collapse="\n"))
cat("\nCalculating DCT/BLT.\n")
getstats_triplets(clade$tip.label,phy,opt$prefix,opt$outgroup,clade)
cat("\nDone.\n")
stopCluster(cl)