-
Notifications
You must be signed in to change notification settings - Fork 1
/
pal2nal
executable file
·118 lines (89 loc) · 2.68 KB
/
pal2nal
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env ruby
#
USAGE = """
Simple Pal2Nal implementation
Pal2Nal converts a multiple sequence alignment of proteins and the
corresponding DNA (or mRNA) sequences into a codon-based DNA
alignment. This version implements a simple 1-to-1 matching with use
of a codon table and validation(!)
The resulting codon-based DNA alignment can further be subjected to
the calculation of synonymous (Ks) and non-synonymous (Ka)
substitution rates and be fed into PAML.
pep.aln Protein (AA) alignment
nuc.fasta Nucleotide sequences
Example:
./bin/pal2nal test/data/fasta/codon/aa-alignment.fa test/data/fasta/codon/nt.fa
"""
gempath = File.dirname(File.dirname(__FILE__))
$: << File.join(gempath,'lib')
VERSION_FILENAME=File.join(gempath,'VERSION')
version = File.new(VERSION_FILENAME).read.chomp
if ARGV.size == 0
print USAGE
end
require 'optparse'
require 'bio-alignment'
require 'bigbio'
include Bio::BioAlignment
options = {show_help: false, codon_table: 1, validate: true}
opts = OptionParser.new do |o|
o.banner = "Usage: #{File.basename($0)} pep.aln nuc.fasta [options]"
o.on("--codon-table [int]", Integer, "Codon table (default 1)") do |ct|
options[:codon_table] = ct
end
o.on("--no-validate", "Validate codons") do |b|
options[:validate] = false
end
o.on("-q", "--quiet", "Run quietly") do |q|
options[:quiet] = true
end
o.on("-v","--verbose", "Run verbosely") do |v|
options[:verbose] = true
end
o.on("-d", "--debug", "Debug mode") do |v|
options[:debug] = true
end
o.separator ""
o.on_tail('-h', '--help', 'display this help and exit') do
options[:show_help] = true
end
end
begin
opts.parse!(ARGV)
$stderr.print "Pal2Nal #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins 2017\n" if !options[:quiet]
if options[:show_help] or ARGV.size < 2
print opts
print USAGE
exit 1
end
$stderr.print "Options: ",options,"\n" if !options[:quiet]
rescue OptionParser::InvalidOption => e
options[:invalid_argument] = e.message
end
aafn = ARGV.shift
ntfn = ARGV.shift
aa_aln = Alignment.new
aa = FastaReader.new(aafn)
aa.each do | rec |
aa_aln << Sequence.new(rec.id, rec.seq)
end
nt_aln = Alignment.new
nt = FastaReader.new(ntfn)
nt.each do | rec |
nt_aln << Sequence.new(rec.id, rec.seq)
end
pal2nal = aa_aln.pal2nal(nt_aln, :codon_table => options[:codon_table], :do_validate => options[:validate])
LINELEN = 60
offset = 0
size = pal2nal.first.seq.size * 3
print "CLUSTAL W multiple sequence alignment\n"
while size > 0
print "\n"
pal2nal.each do | seq |
print seq.id," "*(18-seq.id.size)
print seq.to_nt[offset..offset+LINELEN-1],"\n"
end
offset += LINELEN
size -= LINELEN
end
print "\n"