In [23]:
import matplotlib.pyplot as plt
from protein import ProteinFeatureExtractor

In [24]:
# Sample protein sequence
sequence = "MKKFFDSRREQGGSGLGSGSSGGGGSTSGLGSGYIGRVFGIGRQQVTVDEVLAEGGFAIVFLVRTSNGMKCALKRMFVNNEHDLQVCKREIQIMRDLSGHKNIVGYIDSSINNVSSGDVWEVLILMDFCRGGQVVNLMNQRLQTGFTENEVLQIFCDTCEAVARLHQCKTPIIHRDLKVENILLHDRGHYVLCDFGSATNKFQNPQTEGVNAVEDEIKKYTTLSYRAPEMVNLYSGKIITTKADIWALGCLLYKLCYFTLPFGESQVAICDGNFTIPDNSRYSQDMHCLIRYMLEPDPDKRPDIYQVSYFSFKLLKKECPIPNVQNSPIPAKLPEPVKASEAAAKKTQPKARLTDPIPTTETSIAPRQRPKAGQTQPNPGILPIQPALTPRKRATVQPPPQAAGSSNQPGLLASVPQPKPQAPPSQPLPQTQAKQPQAPPTPQQTPSTQAQGLPAQAQATPQHQQQLFLKQQQQQQQPPPAQQQPAGTFYQQQQAQTQQFQAVHPATQKPAIAQFPVVSQGGSQQQLMQNFYQQQQQQQQQQQQQQLATALHQQQLMTQQAALQQKPTMAAGQQPQPQPAAAPQPAPAQEPAIQAPVRQQPKVQTTPPPAVQGQKVGSLTPPSSPKTQRAGHRRILSDVTHSAVFGVPASKSTQLLQAAAAEASLNKSKSATTTPSGSPRTSQQNVYNPSEGSTWNPFDDDNFSKLTAEELLNKDFAKLGEGKHPEKLGGSAESLIPGFQSTQGDAFATTSFSAGTAEKRKGGQTVDSGLPLLSVSDPFIPLQVPDAPEKLIEGLKSPDTSLLLPDLLPMTDPFGSTSDAVIEKADVAVESLIPGLEPPVPQRLPSQTESVTSNRTDSLTGEDSLLDCSLLSNPTTDLLEEFAPTAISAPVHKAAEDSNLISGFDVPEGSDKVAEDEFDPIPVLITKNPQGGHSRNSSGSSESSLPNLARSLLLVDQLIDL"

# Instantiate the feature extractor
extractor = ProteinFeatureExtractor()

# Extract features
features = extractor.extract_features(sequence)

# Labels for the features
feature_labels = extractor.amino_acids + [
    "Molecular Weight",
    "Aromaticity",
    "Instability Index",
    "Isoelectric Point",
    "Hydrophobicity"
]

In [None]:
# Plotting
plt.figure(figsize=(12, 6))
plt.bar(feature_labels, features, color="skyblue", edgecolor="black")
plt.xticks(rotation=45, ha="right")
plt.title("Protein Feature Visualization")
plt.xlabel("Feature")
plt.ylabel("Value")
plt.tight_layout()
plt.show()

In [26]:
from Bio import ExPASy
from Bio import SwissProt
import py3Dmol
import requests
import xml.etree.ElementTree as ET
import time

In [27]:
def get_pdb_ids_from_uniprot(uniprot_id: str):
    """Fetch PDB IDs associated with a UniProt ID using the UniProt API."""
    url = f"https://www.uniprot.org/uniprot/{uniprot_id}.xml"
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"Error fetching UniProt entry for {uniprot_id}")
    
    # Parse the XML response
    root = ET.fromstring(response.content)
    pdb_ids = []
    # Iterate through cross-references to find PDB entries
    for cross_ref in root.findall(".//{http://uniprot.org/uniprot}dbReference"):
        if cross_ref.attrib.get('type') == 'PDB':
            pdb_ids.append(cross_ref.attrib.get('id'))
    return pdb_ids

def fetch_pdb_file(pdb_id: str):
    """Fetch the PDB file content from the PDB database."""
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        raise Exception(f"Error fetching PDB file for {pdb_id}")

def visualize_protein_3d(uniprot_id: str):
    """Visualize the protein structure from a UniProt ID."""
    try:
        # Fetch PDB ID(s) associated with the UniProt ID
        pdb_ids = get_pdb_ids_from_uniprot(uniprot_id)
        
        if not pdb_ids:
            print(f"No PDB IDs found for UniProt ID {uniprot_id}.")
            return
        
        pdb_ids = pdb_ids[:1]

        for pdbs in pdb_ids:
            pdb_data = fetch_pdb_file(pdbs)
            print("3D model for the protein: ",pdbs)
            view = py3Dmol.view(width=800, height=600)
            view.addModel(pdb_data, "pdb")
            view.setStyle({"cartoon": {"color": "spectrum"}})
            view.zoomTo()
            view.show()
        
    except Exception as e:
        print(f"Error visualizing protein: {e}")

In [None]:
# Example usage:
uniprot_id = "Q852Q0"  # Replace with your UniProt ID
visualize_protein_3d(uniprot_id)

In [29]:

def fetch_uniprot_id(sequence):
    """
    Use the NCBI BLAST API to find the closest UniProt ID for the given protein sequence.
    """
    # Step 1: Submit the sequence to BLAST
    blast_url = "https://blast.ncbi.nlm.nih.gov/Blast.cgi"
    params = {
        "CMD": "Put",
        "PROGRAM": "blastp",
        "DATABASE": "swissprot",
        "QUERY": sequence,
        "FORMAT_TYPE": "XML"
    }
    response = requests.post(blast_url, data=params)

    if response.status_code != 200:
        print("Error submitting sequence to BLAST.")
        return None

    # Parse the request ID (RID)
    rid_start = response.text.find("RID = ") + len("RID = ")
    rid_end = response.text.find("\n", rid_start)
    rid = response.text[rid_start:rid_end].strip()
    print(f"BLAST request submitted. RID: {rid}")

    # Step 2: Wait for the results to be ready
    while True:
        status_response = requests.get(blast_url, params={"CMD": "Get", "RID": rid, "FORMAT_TYPE": "XML"})
        if "Status=WAITING" not in status_response.text:
            break
        print("Waiting for BLAST results...")
        time.sleep(5)

    # Step 3: Retrieve the results
    result_response = requests.get(blast_url, params={"CMD": "Get", "RID": rid, "FORMAT_TYPE": "XML"})
    if result_response.status_code != 200:
        print("Error retrieving BLAST results.")
        return None

    # Step 4: Parse the XML results to extract UniProt ID
    try:
        root = ET.fromstring(result_response.text)
        hits = root.findall(".//Hit")
        for hit in hits:
            hit_def = hit.find("Hit_def").text
            if "sp|" in hit_def:  # Check for SwissProt entries
                uniprot_id = hit_def.split("|")[1]  # Extract UniProt ID
                return uniprot_id
    except Exception as e:
        print(f"Error parsing BLAST results: {e}")

    print("No matching UniProt ID found.")
    return None

def fetch_pdb_from_alphafold(uniprot_id):
    """
    Check if a UniProt ID exists in the AlphaFold database and fetch the PDB file if available.
    """

    # Strip version suffix (e.g., ".1") if present
    clean_uniprot_id = uniprot_id.split('.')[0]

    alphafold_url = f"https://alphafold.ebi.ac.uk/files/AF-{clean_uniprot_id}-F1-model_v4.pdb"
    response = requests.get(alphafold_url)
    
    if response.status_code == 200:
        return response.text
    else:
        return None

def visualize_pdb(pdb_data):
    """
    Visualize the 3D structure using py3Dmol.
    """
    view = py3Dmol.view(width=800, height=600)
    view.addModel(pdb_data, "pdb")
    view.setStyle({"cartoon": {"color": "spectrum"}})
    view.zoomTo()
    view.show()

In [None]:
uniprot_id = fetch_uniprot_id(sequence)
    
if uniprot_id:
    print(f"UniProt ID found: {uniprot_id}")
    print("Fetching structure from AlphaFold...")
    pdb_data = fetch_pdb_from_alphafold(uniprot_id)
        
    if pdb_data:
        print("Structure found in AlphaFold! Visualizing 3D model...")
        visualize_pdb(pdb_data)
    else:
        print("No structure found in AlphaFold database for this sequence.")
        print("Checking for pdb data in Uniprot.....\n")
        visualize_protein_3d(uniprot_id=uniprot_id)
        
else:
    print("No matching UniProt ID found.")