/
geneticcode.jl
427 lines (362 loc) · 15.7 KB
/
geneticcode.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
###
### Genetic Code
###
###
### Genetic code table and translator from RNA to amino acid sequence.
###
### This file is a part of BioJulia.
### License is MIT: https://github.com/BioJulia/BioSequences.jl/blob/master/LICENSE.md
const XNA = Union{DNA, RNA}
function unambiguous_codon(a::XNA, b::XNA, c::XNA)
@inbounds begin
bits = twobitnucs[reinterpret(UInt8, a) + 0x01] << 4 |
twobitnucs[reinterpret(UInt8, b) + 0x01] << 2 |
twobitnucs[reinterpret(UInt8, c) + 0x01]
end
#reinterpret(RNACodon, bits % UInt64)
return bits % UInt64
end
# A genetic code is a table mapping RNA 3-mers (i.e. RNAKmer{3}) to AminoAcids.
"Type representing a Genetic Code"
struct GeneticCode <: AbstractDict{UInt64, AminoAcid}
name::String
tbl::NTuple{64, AminoAcid}
end
###
### Basic Functions
###
function Base.getindex(code::GeneticCode, codon::UInt64)
return @inbounds code.tbl[codon + one(UInt64)]
end
Base.copy(code::GeneticCode) = code
Base.length(code::GeneticCode) = 64
Base.show(io::IO, code::GeneticCode) = print(io, code.name)
function Base.show(io::IO, ::MIME"text/plain", code::GeneticCode)
print(io, code.name)
rna = rna"ACGU"
for x in rna, y in rna
println(io)
print(io, " ")
for z in rna
codon = unambiguous_codon(x, y, z)
aa = code[codon]
print(io, x, y, z, ": ", aa)
if z != RNA_U
print(io, " ")
end
end
end
end
###
### Iterating through genetic code
###
function Base.iterate(code::GeneticCode, x = UInt64(0))
if x > UInt64(0b111111)
return nothing
else
return (x, @inbounds code[x]), x + 1
end
end
###
### Default genetic codes
###
struct TransTables
tables::Dict{Int,GeneticCode}
bindings::Dict{Int,Symbol}
function TransTables()
return new(Dict(), Dict())
end
end
Base.getindex(trans::TransTables, key::Integer) = trans.tables[Int(key)]
function Base.show(io::IO, trans::TransTables)
print(io, "Translation Tables:")
ids = sort(collect(keys(trans.tables)))
for id in ids
println(io)
print(io, lpad(id, 3), ". ")
show(io, trans.tables[id])
if haskey(trans.bindings, id)
print(io, " (", trans.bindings[id], ")")
end
end
end
"""
Genetic code list of NCBI.
The standard genetic code is `ncbi_trans_table[1]` and others can be shown by
`show(ncbi_trans_table)`.
For more details, consult the next link:
http://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/index.cgi?chapter=cgencodes.
"""
const ncbi_trans_table = TransTables()
macro register_ncbi_gencode(id, bind, tbl)
quote
gencode = parse_gencode($tbl)
const $(esc(bind)) = gencode
ncbi_trans_table.tables[$id] = gencode
ncbi_trans_table.bindings[$id] = Symbol($(string(bind)))
end
end
function parse_gencode(s)
name, _, aas, _, base1, base2, base3 = split(chomp(s), '\n')
name = split(name, ' ', limit = 2)[2] # drop number
codearr = fill(AA_X, 4^3)
@assert length(aas) == 73
for i in 10:73
aa = AminoAcid(aas[i])
b1 = DNA(base1[i])
b2 = DNA(base2[i])
b3 = DNA(base3[i])
codon = unambiguous_codon(b1, b2, b3)
codearr[codon + one(UInt64)] = aa
end
return GeneticCode(name, NTuple{64, AminoAcid}(codearr))
end
# Genetic codes translation tables are taken from the NCBI taxonomy database.
@register_ncbi_gencode 1 standard_genetic_code """
1. The Standard Code
AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
Starts = ---M---------------M---------------M----------------------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 2 vertebrate_mitochondrial_genetic_code """
2. The Vertebrate Mitochondrial Code
AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
Starts = --------------------------------MMMM---------------M------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 3 yeast_mitochondrial_genetic_code """
3. The Yeast Mitochondrial Code
AAs = FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
Starts = ----------------------------------MM----------------------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 4 mold_mitochondrial_genetic_code """
4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code
AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
Starts = --MM---------------M------------MMMM---------------M------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 5 invertebrate_mitochondrial_genetic_code """
5. The Invertebrate Mitochondrial Code
AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
Starts = ---M----------------------------MMMM---------------M------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 6 ciliate_nuclear_genetic_code """
6. The Ciliate, Dasycladacean and Hexamita Nuclear Code
AAs = FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
Starts = -----------------------------------M----------------------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 9 echinoderm_mitochondrial_genetic_code """
9. The Echinoderm and Flatworm Mitochondrial Code
AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
Starts = -----------------------------------M---------------M------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 10 euplotid_nuclear_genetic_code """
10. The Euplotid Nuclear Code
AAs = FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
Starts = -----------------------------------M----------------------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 11 bacterial_plastid_genetic_code """
11. The Bacterial, Archaeal and Plant Plastid Code
AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
Starts = ---M---------------M------------MMMM---------------M------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 12 alternative_yeast_nuclear_genetic_code """
12. The Alternative Yeast Nuclear Code
AAs = FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
Starts = -------------------M---------------M----------------------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 13 ascidian_mitochondrial_genetic_code """
13. The Ascidian Mitochondrial Code
AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
Starts = ---M------------------------------MM---------------M------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 14 alternative_flatworm_mitochondrial_genetic_code """
14. The Alternative Flatworm Mitochondrial Code
AAs = FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
Starts = -----------------------------------M----------------------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 16 chlorophycean_mitochondrial_genetic_code """
16. Chlorophycean Mitochondrial Code
AAs = FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
Starts = -----------------------------------M----------------------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 21 trematode_mitochondrial_genetic_code """
21. Trematode Mitochondrial Code
AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
Starts = -----------------------------------M---------------M------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 22 scenedesmus_obliquus_mitochondrial_genetic_code """
22. Scenedesmus obliquus Mitochondrial Code
AAs = FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
Starts = -----------------------------------M----------------------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 23 thraustochytrium_mitochondrial_genetic_code """
23. Thraustochytrium Mitochondrial Code
AAs = FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
Starts = --------------------------------M--M---------------M------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 24 pterobrachia_mitochondrial_genetic_code """
24. Pterobranchia Mitochondrial Code
AAs = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG
Starts = ---M---------------M---------------M---------------M------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
@register_ncbi_gencode 25 candidate_division_sr1_genetic_code """
25. Candidate Division SR1 and Gracilibacteria Code
AAs = FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
Starts = ---M-------------------------------M---------------M------------
Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
"""
###
### Translation
###
"""
translate(seq, code=standard_genetic_code, allow_ambiguous_codons=true, alternative_start=false)
Translate an `LongRNA` or a `LongDNA` to an `LongAA`.
Translation uses genetic code `code` to map codons to amino acids. See
`ncbi_trans_table` for available genetic codes.
If codons in the given sequence cannot determine a unique amino acid, they
will be translated to `AA_X` if `allow_ambiguous_codons` is `true` and otherwise
result in an error. For organisms that utilize alternative start codons, one
can set `alternative_start=true`, in which case the first codon will always be
converted to a methionine.
"""
function translate(ntseq::SeqOrView;
code::GeneticCode = standard_genetic_code,
allow_ambiguous_codons::Bool = true,
alternative_start::Bool = false
)
len = div((length(ntseq) % UInt) * 11, 32)
translate!(LongAA(undef, len), ntseq; code = code,
allow_ambiguous_codons = allow_ambiguous_codons, alternative_start = alternative_start)
end
function translate!(aaseq::LongAA,
ntseq::SeqOrView{<:NucleicAcidAlphabet{2}};
code::GeneticCode = standard_genetic_code,
allow_ambiguous_codons::Bool = true,
alternative_start::Bool = false
)
n_aa, remainder = divrem(length(ntseq) % UInt, 3)
iszero(remainder) || error("LongRNA length is not divisible by three. Cannot translate.")
resize!(aaseq, n_aa)
@inbounds for i in 1:n_aa
a = ntseq[3i-2]
b = ntseq[3i-1]
c = ntseq[3i]
codon = unambiguous_codon(a, b, c)
aaseq[i] = code[codon]
end
alternative_start && !isempty(aaseq) && (@inbounds aaseq[1] = AA_M)
aaseq
end
function translate!(aaseq::LongAA,
ntseq::SeqOrView{<:NucleicAcidAlphabet{4}};
code::GeneticCode = standard_genetic_code,
allow_ambiguous_codons::Bool = true,
alternative_start::Bool = false
)
n_aa, remainder = divrem(length(ntseq) % UInt, 3)
iszero(remainder) || error("LongRNA length is not divisible by three. Cannot translate.")
resize!(aaseq, n_aa)
@inbounds for i in 1:n_aa
a = reinterpret(RNA, ntseq[3i-2])
b = reinterpret(RNA, ntseq[3i-1])
c = reinterpret(RNA, ntseq[3i])
if isgap(a) | isgap(b) | isgap(c)
error("Cannot translate nucleotide sequences with gaps.")
elseif iscertain(a) & iscertain(b) & iscertain(c)
aaseq[i] = code[unambiguous_codon(a, b, c)]
else
aaseq[i] = try_translate_ambiguous_codon(code, a, b, c, allow_ambiguous_codons)
end
end
alternative_start && !isempty(aaseq) && (@inbounds aaseq[1] = AA_M)
aaseq
end
function try_translate_ambiguous_codon(
code::GeneticCode,
x::RNA,
y::RNA,
z::RNA,
allow_ambiguous::Bool
)::AminoAcid
((a, b, c), unambigs) = Iterators.peel(
Iterators.product(map(UnambiguousRNAs, (x, y, z))...)
)
aa = @inbounds code[unambiguous_codon(a, b, c)]
@inbounds for (a, b, c) in unambigs
aa_new = code[unambiguous_codon(a, b, c)]
aa_new == aa && continue
allow_ambiguous || error("codon ", a, b, c, " cannot be unambiguously translated")
aa = if aa_new in (AA_N, AA_D) && aa in (AA_N, AA_D, AA_B)
AA_B
elseif aa_new in (AA_I, AA_L) && aa in (AA_I, AA_L, AA_J)
AA_J
elseif aa_new in (AA_Q, AA_E) && aa in (AA_Q, AA_E, AA_Z)
AA_Z
else
AA_X
end
aa == AA_X && break
end
return aa
end
struct UnambiguousRNAs
x::RNA
end
Base.eltype(::Type{UnambiguousRNAs}) = RNA
Base.length(x::UnambiguousRNAs) = count_ones(reinterpret(UInt8, x.x))
function Base.iterate(x::UnambiguousRNAs, state=reinterpret(UInt8, x.x))
iszero(state) && return nothing
rna = reinterpret(RNA, 0x01 << (trailing_zeros(state) & 7))
(rna, state & (state - 0x01))
end