In [18]:
codon_chart = {'AUG': 'M', 'UAA': 'STOP', 'UAG': 'STOP', 'UGA': 'STOP', 
               'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L',
               'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S',
              'UAU': 'Y', 'UAC': 'Y', 'UGU': 'C', 'UGC': 'C', 'UGG': 'W',
              'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L',
              'CCU': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
              'CAU': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
              'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
              'AUU': 'I', 'AUC': 'I', 'AUA': 'I',
              'ACU': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
              'AAU': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
              'AGU': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
              'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V',
              'GCU': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
              'GAU': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
              'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}

dna_seq = input('Please type your DNA sequence:')
def dnaseq_aminoacidseq(dna_seq):
    rna_seq = dna_seq.replace('T', 'U') # Convert to mRNA seq
    amino_acids = []
    for i in range(0, len(rna_seq), 3): # Range from 1st to last index, in steps of 3 (bc every 3 nucleotides is 1 codon)
        codon = rna_seq[i:i+3]
        if codon in codon_chart:
            amino_acid = codon_chart[codon]
            if amino_acid == 'STOP':
                break
            amino_acids.append(amino_acid)
    amino_acids_string = ''.join([str(element) for element in amino_acids]) # Converting to string
    return amino_acids_string

amino_acid_seq = dnaseq_aminoacidseq(dna_seq)
print(f"Amino Acid Sequence: {amino_acid_seq}")

Please type your DNA sequence: ATGGTGAGCAAGGGCGAGGATAACATG


Amino Acid Sequence: MVSKGEDNM


In [19]:
from Bio.Blast import NCBIWWW # Online version of BLAST. Must have internet connection to run
from Bio.Blast import NCBIXML
import pandas as pd
import requests
import nglview as nv

# BLAST query
result_handle = NCBIWWW.qblast("blastp", "nr", amino_acid_seq)
# qblast passes sequence data as the main parameter
# Internal programme: blastp for protein. Otherwise, blastn is for nucleotides
# Database: nr stands for non-redundant. All identical sequences are represented by a single entry in the database. Allows to access all identical sequences in the database
# Results are given in XML format by default

# Read results
blast_results = NCBIXML.parse(result_handle)
# For multiple results
# What parse does: 
# Single result is blast_records = NCBIXML.read(result_handle)

# Create dataframe using Pandas
blast_hits = []
for blast_result in blast_results:
    for alignment in blast_result.alignments: # Calling for alignments in results
        for hsp in alignment.hsps: # HSP is High-scoring Segment Pair; Local alignment with no gaps that achieves one of the highest alignment scores in a given search
            blast_hits.append({'Description': alignment.title, 
                           'Length': alignment.length,
                           'E-value': hsp.expect,
                           'Score': hsp.score,
                           'Identity': hsp.identities,
                           'Query': hsp.query,
                           'Match': hsp.match})
df_blast = pd.DataFrame(blast_hits)
df_blast.head(5)

In [None]:
# Creating list of pdb_ids
pdb_ids = [alignment.accession for blast_result in blast_results for alignment in blast_result.alignments] # Accession number

if pdb_ids: # If list is not empty
    first_pdb_id = pdb_ids[0] # First result
    print(f"PDB ID: {first_pdb_id}") # Call PDB file from RCSB PDB (protein database)
    pdb_url = f"https://files.rcsb.org/download/{first_pdb_id}.pdb"
    response = requests.get(pdb_url) # Request to take extract the PDB file from the URL
    if response.status_code == 200: # 200 is the code for successful request
        with open(f"{first_pdb_id}.pdb", "w") as pdb_file: # w: write mode
            pdb_file.write(response.text) # Write as text and save PDB file
        # Visualise using nglview
        view = nv.show_file(f"{first_pdb_id}.pdb")
        view.add_representation('cartoon')
        view

In [None]:
'PDB ID': alignment.hit_id.split('|')[3] if '|' in alignment.hit_id else None})
# May or may not need this
# alignment.hit_id.split('|'): Splits the hit_id string into a list using | as the delimiter.
# [3]: Retrieves the fourth element from the list (index 3).
# if '|' in alignment.hit_id: Checks if there are any | characters in hit_id.
# else None: If no | is found, assigns None to the PDB ID bc it's unlikely to contain a PDB file