-
Notifications
You must be signed in to change notification settings - Fork 2
/
parsbl.go
145 lines (133 loc) · 4.36 KB
/
parsbl.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
// parsbl is a tool to apply parsimony branch lengths to a tree. It reads
// multistate files and so if you have a nucleotide file, you need to give
// it the -n flag and it will output a multistate file.
//
package main
import (
"bufio"
"flag"
"fmt"
"log"
"os"
"strings"
"github.com/FePhyFoFum/gophy"
)
func createMultiStateFile(infile string) {
f, err := os.Create(infile + ".gophy.ms")
if err != nil {
log.Fatal(err)
}
defer f.Close()
lg := bufio.NewWriter(f)
tseqs := gophy.ReadSeqsFromFile(infile)
for _, i := range tseqs {
lg.WriteString(">" + i.NM + "\n")
vals := make([]string, len(i.SQ))
for j := range i.SQ {
if string(i.SQ[j]) == "A" {
vals[j] = "0"
} else if string(i.SQ[j]) == "C" {
vals[j] = "1"
} else if string(i.SQ[j]) == "G" {
vals[j] = "2"
} else if string(i.SQ[j]) == "T" {
vals[j] = "3"
} else {
vals[j] = "-"
}
}
lg.WriteString(strings.Join(vals, " ") + "\n")
}
lg.Flush()
}
func calcPBL(nsites int, numstates int, mseqs []gophy.MSeq, seqs map[string][]string,
trees []*gophy.Tree) {
x := gophy.NewMultStateModel(numstates)
bf := gophy.GetEmpiricalBaseFreqsMS(mseqs, numstates)
x.M.SetBaseFreqs(bf)
x.M.EBF = x.M.BF
// get the site patternas
patterns, patternsint, gapsites, constant, uninformative, _ :=
gophy.GetSitePatternsMS(mseqs, x.M.GetCharMap(), x.M.GetNumStates())
for _, t := range trees {
patternval, _ := gophy.PreparePatternVecsMS(t, patternsint, seqs, x.M.GetCharMap(),
x.M.GetNumStates())
//this is necessary to get order of the patters in the patternvec since they have no order
// this will be used with fullpattern to reconstruct the sequences
//list of sites
fmt.Fprintln(os.Stderr, "nsites:", nsites)
fmt.Fprintln(os.Stderr, "patterns:", len(patterns), len(patternsint))
fmt.Fprintln(os.Stderr, "onlygaps:", len(gapsites))
fmt.Fprintln(os.Stderr, "constant:", len(constant))
fmt.Fprintln(os.Stderr, "uninformative:", len(uninformative))
// model things
gophy.PCalcSankParsPatterns(t, x.M.GetNumStates(), patternval, 1)
gophy.EstParsBL(t, x.M.GetNumStates(), patternval, nsites)
fmt.Println(t.Rt.Newick(true) + ";")
}
}
func calcPAST(site int, nsites int, numstates int, mseqs []gophy.MSeq, seqs map[string][]string,
trees []*gophy.Tree) {
x := gophy.NewMultStateModel(numstates)
bf := gophy.GetEmpiricalBaseFreqsMS(mseqs, numstates)
x.M.SetBaseFreqs(bf)
x.M.EBF = x.M.BF
// get the site patternas
patterns, patternsint, gapsites, constant, uninformative, _ :=
gophy.GetSitePatternsMS(mseqs, x.M.GetCharMap(), x.M.GetNumStates())
for _, t := range trees {
patternval, _ := gophy.PreparePatternVecsMS(t, patternsint, seqs, x.M.GetCharMap(),
x.M.GetNumStates())
//this is necessary to get order of the patters in the patternvec since they have no order
// this will be used with fullpattern to reconstruct the sequences
//list of sites
fmt.Fprintln(os.Stderr, "nsites:", nsites)
fmt.Fprintln(os.Stderr, "patterns:", len(patterns), len(patternsint))
fmt.Fprintln(os.Stderr, "onlygaps:", len(gapsites))
fmt.Fprintln(os.Stderr, "constant:", len(constant))
fmt.Fprintln(os.Stderr, "uninformative:", len(uninformative))
// model things
gophy.PCalcSankParsPatterns(t, x.M.GetNumStates(), patternval, 1)
gophy.CalcSankParsAncStateSingleSite(t, numstates, 0)
fmt.Println(t.Rt.Newick(true) + ";")
}
}
func main() {
tfn := flag.String("t", "", "tree filename")
afn := flag.String("s", "", "seq filename")
nuc := flag.Bool("n", false, "nucleotide data?")
anc := flag.Bool("a", false, "ancestral states instead")
flag.Parse()
if len(*tfn) == 0 {
fmt.Fprintln(os.Stderr, "need a tree filename (-t)")
os.Exit(1)
}
if len(*afn) == 0 {
fmt.Fprintln(os.Stderr, "need a seq filename (-s)")
os.Exit(1)
}
if *nuc {
fmt.Fprintln(os.Stderr, "nucleotide data. will write multistate file")
createMultiStateFile(*afn)
*afn = *afn + ".gophy.ms"
}
//read a tree file
trees := gophy.ReadTreesFromFile(*tfn)
fmt.Fprintln(os.Stderr, len(trees), "trees read")
//read a multistate seq file
nsites := 0
seqs := map[string][]string{}
mseqs, numstates := gophy.ReadMSeqsFromFile(*afn)
seqnames := make([]string, 0)
for _, i := range mseqs {
seqs[i.NM] = i.SQs
seqnames = append(seqnames, i.NM)
nsites = len(i.SQs)
}
//conduct analysis
if *anc {
calcPAST(0, nsites, numstates, mseqs, seqs, trees)
} else {
calcPBL(nsites, numstates, mseqs, seqs, trees)
}
}