Skip to content

Commit

Permalink
Updating codon tables to NCBI version 3.9 (Bug 2962)
Browse files Browse the repository at this point in the history
  • Loading branch information
peterjc committed Dec 6, 2009
1 parent 8e02c77 commit 74ba9d2
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 17 deletions.
109 changes: 99 additions & 10 deletions Bio/Data/CodonTable.py
@@ -1,6 +1,13 @@
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Codon tables based on those from the NCBI.
These tables are based on parsing the NCBI file:
ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
Last updated for Version 3.9
"""

from Bio import Alphabet
from Bio.Alphabet import IUPAC
Expand Down Expand Up @@ -138,7 +145,9 @@ class NCBICodonTableRNA(NCBICodonTable):

def register_ncbi_table(name, alt_name, id,
table, start_codons, stop_codons):
names = name.split("; ")
#In most cases names are divided by "; ", however there is also
#'Bacterial and Plant Plastid' (which used to be just 'Bacterial')
names = [x.strip() for x in name.replace(" and ","; ").split("; ")]

dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons,
stop_codons)
Expand Down Expand Up @@ -188,7 +197,7 @@ def register_ncbi_table(name, alt_name, id,
generic_by_name[name] = generic

### These tables created from the data file
### ftp://ncbi.nlm.nih.gov/entrez/misc/data/gc.prt
### ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
### using the following:
##import re
##for line in open("gc.prt").readlines():
Expand Down Expand Up @@ -218,7 +227,7 @@ def register_ncbi_table(name, alt_name, id,
## if len(names) == 1:
## names.append(None)
## print "register_ncbi_table(name = %s," % repr(names[0])
## print " alt_name = %s, id = %d", % \
## print " alt_name = %s, id = %d," % \
## (repr(names[1]), id)
## print " table = {"
## s = " "
Expand Down Expand Up @@ -315,7 +324,7 @@ def register_ncbi_table(name, alt_name, id,
'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
'GGA': 'G', 'GGG': 'G', },
stop_codons = [ 'TAA', 'TAG', ],
start_codons = [ 'ATG', ]
start_codons = [ 'ATA', 'ATG', ]
)
register_ncbi_table(name = 'Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma',
alt_name = 'SGC3', id = 4,
Expand Down Expand Up @@ -376,7 +385,7 @@ def register_ncbi_table(name, alt_name, id,
stop_codons = [ 'TGA', ],
start_codons = [ 'ATG', ]
)
register_ncbi_table(name = 'Echinoderm Mitochondrial',
register_ncbi_table(name = 'Echinoderm Mitochondrial; Flatworm Mitochondrial',
alt_name = 'SGC8', id = 9,
table = {
'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
Expand All @@ -393,7 +402,7 @@ def register_ncbi_table(name, alt_name, id,
'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
'GGA': 'G', 'GGG': 'G', },
stop_codons = [ 'TAA', 'TAG', ],
start_codons = [ 'ATG', ]
start_codons = [ 'ATG', 'GTG', ]
)
register_ncbi_table(name = 'Euplotid Nuclear',
alt_name = 'SGC9', id = 10,
Expand All @@ -414,7 +423,7 @@ def register_ncbi_table(name, alt_name, id,
stop_codons = [ 'TAA', 'TAG', ],
start_codons = [ 'ATG', ]
)
register_ncbi_table(name = 'Bacterial',
register_ncbi_table(name = 'Bacterial and Plant Plastid',
alt_name = None, id = 11,
table = {
'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
Expand Down Expand Up @@ -470,9 +479,9 @@ def register_ncbi_table(name, alt_name, id,
'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
'GGA': 'G', 'GGG': 'G', },
stop_codons = [ 'TAA', 'TAG', ],
start_codons = [ 'ATG', ]
start_codons = [ 'TTG', 'ATA', 'ATG', 'GTG', ]
)
register_ncbi_table(name = 'Flatworm Mitochondrial',
register_ncbi_table(name = 'Alternative Flatworm Mitochondrial',
alt_name = None, id = 14,
table = {
'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
Expand Down Expand Up @@ -510,6 +519,82 @@ def register_ncbi_table(name, alt_name, id,
stop_codons = [ 'TAA', 'TGA', ],
start_codons = [ 'ATG', ]
)
register_ncbi_table(name = 'Chlorophycean Mitochondrial',
alt_name = None, id = 16,
table = {
'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
'TAG': 'L', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L',
'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
'GGA': 'G', 'GGG': 'G', },
stop_codons = [ 'TAA', 'TGA', ],
start_codons = [ 'ATG', ]
)
register_ncbi_table(name = 'Trematode Mitochondrial',
alt_name = None, id = 21,
table = {
'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S',
'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
'GGA': 'G', 'GGG': 'G', },
stop_codons = [ 'TAA', 'TAG', ],
start_codons = [ 'ATG', 'GTG', ]
)
register_ncbi_table(name = 'Scenedesmus obliquus Mitochondrial',
alt_name = None, id = 22,
table = {
'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
'TCC': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TAG': 'L',
'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
'GGG': 'G', },
stop_codons = [ 'TCA', 'TAA', 'TGA', ],
start_codons = [ 'ATG', ]
)
register_ncbi_table(name = 'Thraustochytrium Mitochondrial',
alt_name = None, id = 23,
table = {
'TTT': 'F', 'TTC': 'F', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S',
'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C',
'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L',
'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R',
'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I',
'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T',
'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 'GTT': 'V',
'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A',
'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E',
'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
stop_codons = [ 'TTA', 'TAA', 'TAG', 'TGA', ],
start_codons = [ 'ATT', 'ATG', 'GTG', ]
)


######### Deal with ambiguous forward translations

Expand Down Expand Up @@ -786,7 +871,9 @@ def _sort(x, y, table = self.ambiguous_protein):
for n in ambiguous_generic_by_id.keys():
assert ambiguous_rna_by_id[n].forward_table["GUU"] == "V"
assert ambiguous_rna_by_id[n].forward_table["GUN"] == "V"
assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X" #F or L
if n != 23 :
#For table 23, UUN = F, L or stop.
assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X" #F or L
#R = A or G, so URR = UAA or UGA / TRA = TAA or TGA = stop codons
if "UAA" in unambiguous_rna_by_id[n].stop_codons \
and "UGA" in unambiguous_rna_by_id[n].stop_codons:
Expand All @@ -802,4 +889,6 @@ def _sort(x, y, table = self.ambiguous_protein):
del n
assert ambiguous_generic_by_id[1].stop_codons == ambiguous_generic_by_name["Standard"].stop_codons
assert ambiguous_generic_by_id[4].stop_codons == ambiguous_generic_by_name["SGC3"].stop_codons
assert ambiguous_generic_by_id[11].stop_codons == ambiguous_generic_by_name["Bacterial"].stop_codons
assert ambiguous_generic_by_id[11].stop_codons == ambiguous_generic_by_name["Plant Plastid"].stop_codons
assert ambiguous_generic_by_id[15].stop_codons == ambiguous_generic_by_name['Blepharisma Macronuclear'].stop_codons
6 changes: 6 additions & 0 deletions NEWS
Expand Up @@ -37,6 +37,12 @@ nucleotide sequence for features in GenBank or EMBL files.
SeqRecord objects now support addition, giving a new SeqRecord with the
combined sequence, all the SeqFeatures, and any common annotation.

The NCBI codon tables have been updated from version 3.4 to 3.9, which adds
a few extra start codons, and a few new tables (Tables 16, 21, 22 and 23).
Note that Table 14 which used to be called "Flatworm Mitochondrial" is now
called "Alternative Flatworm Mitochondrial", and "Flatworm Mitochondrial" is
now an alias for Table 9 ("Echinoderm Mitochondrial").

===================================================================

September 22, 2009: Biopython 1.52 released.
Expand Down
18 changes: 12 additions & 6 deletions Tests/test_CodonTable.py
Expand Up @@ -17,22 +17,28 @@
for n in ambiguous_generic_by_id.keys():
assert ambiguous_rna_by_id[n].forward_table["GUU"] == "V"
assert ambiguous_rna_by_id[n].forward_table["GUN"] == "V"
assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X" #F or L
if n != 23 :
assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X" #F or L

assert ambiguous_dna_by_id[n].forward_table["GTT"] == "V"
assert ambiguous_dna_by_id[n].forward_table["TTN"] == "X" #F or L
if n != 23 :
assert ambiguous_dna_by_id[n].forward_table["TTN"] == "X" #F or L
assert ambiguous_dna_by_id[n].forward_table["GTN"] == "V"

assert ambiguous_generic_by_id[n].forward_table.get("TTN") == "X"
if n != 23 :
assert ambiguous_generic_by_id[n].forward_table.get("TTN") == "X"
assert ambiguous_generic_by_id[n].forward_table["ACN"] == "T"
assert ambiguous_generic_by_id[n].forward_table["GUU"] == "V"
assert ambiguous_generic_by_id[n].forward_table["GUN"] == "V"
assert ambiguous_generic_by_id[n].forward_table["UUN"] == "X" #F or L
if n != 23 :
assert ambiguous_generic_by_id[n].forward_table["UUN"] == "X" #F or L
assert ambiguous_generic_by_id[n].forward_table["GTT"] == "V"
assert ambiguous_generic_by_id[n].forward_table["TTN"] == "X" #F or L
if n != 23 :
assert ambiguous_generic_by_id[n].forward_table["TTN"] == "X" #F or L
assert ambiguous_generic_by_id[n].forward_table["GTN"] == "V"
#And finally something evil, an RNA-DNA mixture:
assert ambiguous_generic_by_id[n].forward_table["UTN"] == "X" #F or L
if n != 23 :
assert ambiguous_generic_by_id[n].forward_table["UTN"] == "X" #F or L
assert ambiguous_generic_by_id[n].forward_table["UTU"] == "F"

#R = A or G, so URR = UAA or UGA / TRA = TAA or TGA = stop codons
Expand Down
2 changes: 1 addition & 1 deletion Tests/test_Emboss.py
Expand Up @@ -766,7 +766,7 @@ def check(self, sequence):
translation = emboss_translate(sequence)
self.assert_(check_translation(sequence, translation))

for table in [1,2,3,4,5,6,9,10,11,12,13,14,15]:
for table in [1,2,3,4,5,6,9,10,11,12,13,14,15,16,21,22,23]:
translation = emboss_translate(sequence, table)
self.assert_(check_translation(sequence, translation, table))
return True
Expand Down

0 comments on commit 74ba9d2

Please sign in to comment.