In [18]:
from Bio import Entrez
from Bio.Seq import Seq
from Bio.Data import CodonTable

Entrez.email = "qamilmirza@berkeley.edu"  # Required for NCBI queries

# Search for the species in the NCBI taxonomy database
species_name = "Hansenula saturnus"  # Change to any species of interest
handle = Entrez.esearch(db="taxonomy", term=species_name)
record = Entrez.read(handle)
handle.close()

if record["IdList"]:
    taxon_id = record["IdList"][0]
    print(f"Taxonomy ID: {taxon_id}")

    # Fetch taxonomy details to obtain the genetic code ID
    handle = Entrez.efetch(db="taxonomy", id=taxon_id, retmode="xml")
    records = Entrez.read(handle)
    handle.close()

    if records:
        genetic_code = records[0].get("GeneticCode", {})
        gc_id = genetic_code.get("GCId", "N/A")
        gc_name = genetic_code.get("GCName", "N/A")
        print(f"Genetic Code ID: {gc_id}")
        print(f"Genetic Code Name: {gc_name}")

        # Print the associated codon table
        if gc_id.isdigit():
            gc_id = int(gc_id)
            if gc_id in CodonTable.unambiguous_dna_by_id:
                codon_table = CodonTable.unambiguous_dna_by_id[gc_id]
                print("\nCodon Table Retrieved:")
                print(codon_table)
            else:
                print(f"Codon table for Genetic Code ID {gc_id} is not available in Biopython.")
        else:
            print("Invalid Genetic Code ID.")
    else:
        print("No records found.")
else:
    print("Species not found.")

Taxonomy ID: 907340
Genetic Code ID: 1
Genetic Code Name: Standard

Codon Table Retrieved:
Table 1 Standard, SGC0

  |  T      |  C      |  A      |  G      |
--+---------+---------+---------+---------+--
T | TTT F   | TCT S   | TAT Y   | TGT C   | T
T | TTC F   | TCC S   | TAC Y   | TGC C   | C
T | TTA L   | TCA S   | TAA Stop| TGA Stop| A
T | TTG L(s)| TCG S   | TAG Stop| TGG W   | G
--+---------+---------+---------+---------+--
C | CTT L   | CCT P   | CAT H   | CGT R   | T
C | CTC L   | CCC P   | CAC H   | CGC R   | C
C | CTA L   | CCA P   | CAA Q   | CGA R   | A
C | CTG L(s)| CCG P   | CAG Q   | CGG R   | G
--+---------+---------+---------+---------+--
A | ATT I   | ACT T   | AAT N   | AGT S   | T
A | ATC I   | ACC T   | AAC N   | AGC S   | C
A | ATA I   | ACA T   | AAA K   | AGA R   | A
A | ATG M(s)| ACG T   | AAG K   | AGG R   | G
--+---------+---------+---------+---------+--
G | GTT V   | GCT A   | GAT D   | GGT G   | T
G | GTC V   | GCC A   | GAC D   | GGC G   | C
G | GTA V   |

In [1]:
# Advanced Project Code: Species-Dependent Translator
from Bio.Data import CodonTable

def translate_sequence(seq, species="universal"):
    """
    Translate a DNA sequence using the codon table appropriate for the given species.
    
    Parameters:
        seq (str): The DNA sequence (should be in 5'-to-3' orientation).
        species (str): Either "universal" or a string indicating another species/organellar code.
                       For this example, if species is "yeast_mito", we use table 3.
    
    Returns:
        str: The translated protein sequence.
    """
    # Choose the appropriate codon table.
    # (This mapping can be expanded or automated by querying NCBI taxonomy data.)
    if species.lower() in ["saccharomyces cerevisiae mitochondria", "yeast_mito", "yeast mitochondria"]:
        table_id = 3  # Yeast mitochondrial code (as defined by NCBI)
    else:
        table_id = 1  # Universal code
    
    codon_table = CodonTable.unambiguous_dna_by_id[table_id]
    
    protein = ""
    # Process the sequence in windows of three.
    for i in range(0, len(seq), 3):
        codon = seq[i:i+3]
        # Ignore incomplete codons at the end.
        if len(codon) != 3:
            continue
        # Check if the codon is a stop codon.
        if codon in codon_table.stop_codons:
            protein += "*"
        else:
            # Look up the amino acid. If the codon is not found, use 'X' for unknown.
            aa = codon_table.forward_table.get(codon, "X")
            protein += aa
    return protein

# Example usage:
dna_seq = "ATGAAGACCTGG"  # Example sequence; adjust as needed.
protein_universal = translate_sequence(dna_seq, species="universal")
protein_yeast = translate_sequence(dna_seq, species="yeast_mito")

print("Universal translation:", protein_universal)
print("Yeast mitochondrial translation:", protein_yeast)


Universal translation: MKTW
Yeast mitochondrial translation: MKTW


In [2]:
dna_seq = "ATGATAGTATTAGGTAAGTGA"
protein_universal = translate_sequence(dna_seq, species="universal")
protein_yeast = translate_sequence(dna_seq, species="yeast_mito")

print("Universal translation:", protein_universal)
print("Yeast mitochondrial translation:", protein_yeast)


Universal translation: MIVLGK*
Yeast mitochondrial translation: MMVLGKW


In [1]:
import requests
from Bio.Data import CodonTable
from Bio import Entrez
from Bio import SeqIO
from io import StringIO
from urllib.parse import quote 

def get_species_codon_table(species_name):
    """
    Input: species_name (str) - the name of the species to search for
    Output: table_id (int) - the genetic code table ID for the species
    """
    encoded_species = quote(species_name)
    search_url = f"https://www.ebi.ac.uk/ena/taxonomy/rest/scientific-name/{encoded_species}"
    
    try:
        response = requests.get(search_url)
        response.raise_for_status()
        data = response.json()
        print(data)
        
        if isinstance(data, list) and len(data) > 0:
            taxon_info = data[0]
            return int(taxon_info.get("geneticCode", 1))  # Ensure integer
        else:
            return None
    except requests.RequestException as e:
        return None

In [2]:
get_species_codon_table("Hansenula saturnus")

[]
