In [1]:
!pip install biopython


Installing collected packages: biopython
Successfully installed biopython-1.83


In [None]:
from Bio import SeqIO
from Bio import pairwise2


In [3]:
# Normal sequence
normal_fasta = """>sp|O43708|MAAI_HUMAN Maleylacetoacetate isomerase OS=Homo sapiens OX=9606 GN=GSTZ1 PE=1 SV=3
MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVP
TLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASVRMISDLIAGGIQPLQNLSVLK
QVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDL
TPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA
"""

# Mutated sequence
mutated_fasta = """MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVP
TLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASMRMISDLIAGGIQPLQNLSVLK
QVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDL
TPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA
"""


In [6]:
from Bio import SeqIO
from Bio import pairwise2
from io import StringIO

# Normal sequence
normal_fasta = """>sp|O43708|MAAI_HUMAN Maleylacetoacetate isomerase OS=Homo sapiens OX=9606 GN=GSTZ1 PE=1 SV=3
MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVP
TLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASVRMISDLIAGGIQPLQNLSVLK
QVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDL
TPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA
"""

# Mutated sequence
mutated_fasta = """> Mutated
MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVP
TLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASMRMISDLIAGGIQPLQNLSVLK
QVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDL
TPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA
"""

# Extract sequences
def get_sequence_from_fasta(fasta_string):
    record = SeqIO.read(StringIO(fasta_string), "fasta")
    return str(record.seq)

normal_sequence = get_sequence_from_fasta(normal_fasta)

# Check if the mutated sequence has a header, if not, add one
if not mutated_fasta.startswith(">"):
    mutated_fasta = "> Mutated\n" + mutated_fasta

mutated_sequence = get_sequence_from_fasta(mutated_fasta)

# Compare sequences
alignments = pairwise2.align.globalxx(normal_sequence, mutated_sequence, one_alignment_only=True)
best_alignment = alignments[0]
alignment_score = best_alignment.score

# Print results
print("Alignment Score:", alignment_score)
print("Aligned Sequence 1:", best_alignment.seqA)
print("Aligned Sequence 2:", best_alignment.seqB)


Alignment Score: 215.0
Aligned Sequence 1: MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVPTLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASV-RMISDLIAGGIQPLQNLSVLKQVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDLTPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA
Aligned Sequence 2: MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVPTLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRAS-MRMISDLIAGGIQPLQNLSVLKQVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDLTPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA


In [None]:
!pip install biopython nglview


In [None]:
!pip install nglview --upgrade
!pip install ipywidgets --upgrade
!jupyter nbextension enable --py --sys-prefix widgetsnbextension

In [13]:
!pip install py3Dmol


Collecting py3Dmol
  Downloading py3Dmol-2.0.4-py2.py3-none-any.whl (12 kB)
Installing collected packages: py3Dmol
Successfully installed py3Dmol-2.0.4


In [16]:
import py3Dmol

normal_structure = open('normal_structure.pdb').read()
mutated_structure = open('mutated_structure.pdb').read()

view = py3Dmol.view(width=800, height=400)
view.addModel(normal_structure, 'pdb')
view.setStyle({'stick': {}})
view.addModel(mutated_structure, 'pdb')
view.setStyle({'stick': {}})
view.zoomTo()
view.show()


In [15]:
from Bio import SeqIO
from Bio.SeqUtils import seq3
import nglview

# Function to create a PDB file from a FASTA sequence
def create_pdb_from_fasta(fasta_string, pdb_file):
    with open(pdb_file, 'w') as f:
        f.write(f"ATOM      1  N   MET A   1       0.0   0.0   0.0  1.00 20.00           N\n")
        for i, aa in enumerate(fasta_string, start=2):
            f.write(f"ATOM {i:6}  CA  {seq3(aa):<3} A   1       0.0   0.0   0.0  1.00 20.00           C\n")

# Define the protein structures in FASTA format
normal_fasta = """\
>sp|O43708|MAAI_HUMAN Maleylacetoacetate isomerase OS=Homo sapiens OX=9606 GN=GSTZ1 PE=1 SV=3
MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVP
TLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASVRMISDLIAGGIQPLQNLSVLK
QVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDL
TPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA
"""

mutated_fasta = """\
> Mutated
MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVP
TLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASMRMISDLIAGGIQPLQNLSVLK
QVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDL
TPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA
"""

# Create PDB files from the provided FASTA sequences
create_pdb_from_fasta(normal_fasta, 'normal_structure.pdb')
create_pdb_from_fasta(mutated_fasta, 'mutated_structure.pdb')

# Load the structures using nglview
view = nglview.show_structure_file('normal_structure.pdb')

# Add the mutated structure using add_component
view.add_component('mutated_structure.pdb')

# Display the structures
view


NGLWidget()

In [19]:
import py3Dmol

def visualize_proteins(normal_pdb, mutated_pdb):
    viewer = py3Dmol.view(width=800, height=400)

    # Add normal protein structure
    viewer.addModel(normal_pdb, format='pdb')
    viewer.setStyle({'stick': {}})
    viewer.addLabel('Normal', {'fontSize': 15, 'position': {'x': 1, 'y': 5, 'z': 0}})

    # Add mutated protein structure
    viewer.addModel(mutated_pdb, format='pdb')
    viewer.setStyle({'stick': {}})
    viewer.addLabel('Mutated', {'fontSize': 15, 'position': {'x': -1, 'y': 5, 'z': 0}})

    viewer.zoomTo()
    viewer.show()

# Read PDB structures
normal_pdb = open('normal_structure.pdb').read()
mutated_pdb = open('mutated_structure.pdb').read()

# Visualize structures
visualize_proteins(normal_pdb, mutated_pdb)


In [21]:
import py3Dmol
import requests

def visualize_proteins_by_ids(normal_pdb_id, mutated_pdb_id):
    def fetch_pdb_data(pdb_id):
        pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
        return requests.get(pdb_url).text

    viewer = py3Dmol.view(width=800, height=400)

    # Add normal protein structure
    normal_pdb_data = fetch_pdb_data(normal_pdb_id)
    viewer.addModel(normal_pdb_data, format='pdb')
    viewer.setStyle({'stick': {}})
    viewer.addLabel('Normal', {'fontSize': 15, 'position': {'x': 1, 'y': 5, 'z': 0}})

    # Add mutated protein structure
    mutated_pdb_data = fetch_pdb_data(mutated_pdb_id)
    viewer.addModel(mutated_pdb_data, format='pdb')
    viewer.setStyle({'stick': {}})
    viewer.addLabel('Mutated', {'fontSize': 15, 'position': {'x': -1, 'y': 5, 'z': 0}})

    viewer.zoomTo()
    viewer.show()

# Replace 'NORMAL_PDB_ID' and 'MUTATED_PDB_ID' with your actual PDB IDs
normal_pdb_id = '1FW1'
mutated_pdb_id = '8E8P'

# Visualize structures
visualize_proteins_by_ids(normal_pdb_id, mutated_pdb_id)


In [None]:
import py3Dmol
import requests

def visualize_proteins_by_ids(normal_pdb_id, mutated_pdb_id, mutation_positions):
    def fetch_pdb_data(pdb_id):
        pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
        return requests.get(pdb_url).text

    def color_mutation(model, mutation_positions):
        for position in mutation_positions:
            model.setStyle({'resi': position}, {'color': 'orange', 'stick': {}})

    viewer = py3Dmol.view(width=800, height=400)

    # Add normal protein structure
    normal_pdb_data = fetch_pdb_data(normal_pdb_id)
    viewer.addModel(normal_pdb_data, format='pdb')
    viewer.setStyle({'stick': {}})
    viewer.addLabel('Normal', {'fontSize': 15, 'position': {'x': 1, 'y': 5, 'z': 0}})

    # Add mutated protein structure
    mutated_pdb_data = fetch_pdb_data(mutated_pdb_id)
    viewer.addModel(mutated_pdb_data, format='pdb')
    color_mutation(viewer, mutation_positions)
    viewer.setStyle({'stick': {}})
    viewer.addLabel('Mutated', {'fontSize': 15, 'position': {'x': -1, 'y': 5, 'z': 0}})

    viewer.zoomTo()
    viewer.show()

# Replace 'NORMAL_PDB_ID' and 'MUTATED_PDB_ID' with your actual PDB IDs
normal_pdb_id = '1FW1'
mutated_pdb_id = '8E8P'

# Replace [list of mutation positions] with the actual positions of mutated residues
mutation_positions = [1, 5, 10]

# Visualize structures
visualize_proteins_by_ids(normal_pdb_id, mutated_pdb_id, mutation_positions)


In [22]:
import py3Dmol
import requests

def visualize_proteins_by_ids(normal_pdb_id, mutated_pdb_id):
    def fetch_pdb_data(pdb_id):
        pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
        return requests.get(pdb_url).text

    def find_mutated_positions(normal_sequence, mutated_sequence):
        mutated_positions = []
        for i, (normal_aa, mutated_aa) in enumerate(zip(normal_sequence, mutated_sequence), start=1):
            if normal_aa != mutated_aa:
                mutated_positions.append(i)
        return mutated_positions

    def color_mutation(model, mutation_positions):
        for position in mutation_positions:
            model.setStyle({'resi': position}, {'color': 'orange', 'stick': {}})

    viewer = py3Dmol.view(width=800, height=400)

    # Replace 'NORMAL_PDB_ID' and 'MUTATED_PDB_ID' with your actual PDB IDs
    normal_pdb_id = '1FW1'
    mutated_pdb_id = '8E8P'

    # Fetch PDB data
    normal_pdb_data = fetch_pdb_data(normal_pdb_id)
    mutated_pdb_data = fetch_pdb_data(mutated_pdb_id)

    # Add normal protein structure
    viewer.addModel(normal_pdb_data, format='pdb')
    viewer.setStyle({'stick': {}})
    viewer.addLabel('Normal', {'fontSize': 15, 'position': {'x': 1, 'y': 5, 'z': 0}})

    # Add mutated protein structure
    viewer.addModel(mutated_pdb_data, format='pdb')

    # Example protein sequences (replace with your actual sequences)
    normal_sequence = "MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVP..."
    mutated_sequence = "MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVP..."

    # Find mutated positions
    mutated_positions = find_mutated_positions(normal_sequence, mutated_sequence)

    # Color mutated positions
    color_mutation(viewer, mutated_positions)

    viewer.setStyle({'stick': {}})
    viewer.addLabel('Mutated', {'fontSize': 15, 'position': {'x': -1, 'y': 5, 'z': 0}})

    viewer.zoomTo()
    viewer.show()

# Visualize structures
visualize_proteins_by_ids('NORMAL_PDB_ID', 'MUTATED_PDB_ID')


In [23]:
import py3Dmol
import requests

def visualize_proteins_by_ids(normal_pdb_id, mutated_pdb_id):
    def fetch_pdb_data(pdb_id):
        pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
        return requests.get(pdb_url).text

    def find_mutated_positions(normal_sequence, mutated_sequence):
        mutated_positions = []
        for i, (normal_aa, mutated_aa) in enumerate(zip(normal_sequence, mutated_sequence), start=1):
            if normal_aa != mutated_aa:
                mutated_positions.append(i)
        return mutated_positions

    def color_mutation(model, mutation_positions):
        for position in mutation_positions:
            model.setStyle({'resi': position}, {'color': 'orange', 'cartoon': {'color': 'orange'}})

    viewer = py3Dmol.view(width=800, height=400)

    # Replace 'NORMAL_PDB_ID' and 'MUTATED_PDB_ID' with your actual PDB IDs
    normal_pdb_id = '1FW1'
    mutated_pdb_id = '8E8P'

    # Fetch PDB data
    normal_pdb_data = fetch_pdb_data(normal_pdb_id)
    mutated_pdb_data = fetch_pdb_data(mutated_pdb_id)

    # Add normal protein structure
    viewer.addModel(normal_pdb_data, format='pdb')
    viewer.setStyle({'cartoon': {'color': 'white'}})
    viewer.addLabel('Normal', {'fontSize': 15, 'position': {'x': 1, 'y': 5, 'z': 0}})

    # Add mutated protein structure
    viewer.addModel(mutated_pdb_data, format='pdb')

    # Example protein sequences (replace with your actual sequences)
    normal_sequence = "MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVP..."
    mutated_sequence = "MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVP..."

    # Find mutated positions
    mutated_positions = find_mutated_positions(normal_sequence, mutated_sequence)

    # Color mutated positions
    color_mutation(viewer, mutated_positions)

    viewer.setStyle({'cartoon': {'color': 'orange'}})
    viewer.addLabel('Mutated', {'fontSize': 15, 'position': {'x': -1, 'y': 5, 'z': 0}})

    viewer.zoomTo()
    viewer.show()

# Visualize structures
visualize_proteins_by_ids('NORMAL_PDB_ID', 'MUTATED_PDB_ID')


In [25]:
import py3Dmol
import requests

def visualize_proteins_by_ids(normal_pdb_id, mutated_pdb_id):
    def fetch_pdb_data(pdb_id):
        pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
        return requests.get(pdb_url).text

    def find_mutated_positions(normal_sequence, mutated_sequence):
        mutated_positions = []
        for i, (normal_aa, mutated_aa) in enumerate(zip(normal_sequence, mutated_sequence), start=1):
            if normal_aa != mutated_aa:
                mutated_positions.append(i)
        return mutated_positions

    def color_mutation(model, mutation_positions):
        for position in mutation_positions:
            model.setStyle({'resi': position}, {'color': 'red', 'cartoon': {'color': 'red'}})

    viewer = py3Dmol.view(width=800, height=400)

    # Replace 'NORMAL_PDB_ID' and 'MUTATED_PDB_ID' with your actual PDB IDs
    normal_pdb_id = '1FW1'
    mutated_pdb_id = '8E8P'

    # Fetch PDB data
    normal_pdb_data = fetch_pdb_data(normal_pdb_id)
    mutated_pdb_data = fetch_pdb_data(mutated_pdb_id)

    # Add normal protein structure
    viewer.addModel(normal_pdb_data, format='pdb')
    viewer.setStyle({'cartoon': {'color': 'white'}})
    viewer.addLabel('Normal', {'fontSize': 15, 'position': {'x': 1, 'y': 5, 'z': 0}})

    # Add mutated protein structure
    viewer.addModel(mutated_pdb_data, format='pdb')

    # Example protein sequences (replace with your actual sequences)
    normal_sequence = "MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVPTLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASVRMISDLIAGGIQPLQNLSVLKQVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDLTPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA"
    mutated_sequence = "MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVPTLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASMRMISDLIAGGIQPLQNLSVLKQVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDLTPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA"

    # Find mutated positions
    mutated_positions = find_mutated_positions(normal_sequence, mutated_sequence)

    # Color mutated positions in red
    color_mutation(viewer, mutated_positions)

    viewer.setStyle({'cartoon': {'color': 'red'}})
    viewer.addLabel('Mutated', {'fontSize': 15, 'position': {'x': -1, 'y': 5, 'z': 0}})

    viewer.zoomTo()
    viewer.show()

# Visualize structures
visualize_proteins_by_ids('NORMAL_PDB_ID', 'MUTATED_PDB_ID')


In [26]:
import py3Dmol
import requests

def visualize_proteins_by_ids(normal_pdb_id, mutated_pdb_id):
    def fetch_pdb_data(pdb_id):
        pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
        return requests.get(pdb_url).text

    def find_mutated_positions(normal_sequence, mutated_sequence):
        mutated_positions = []
        for i, (normal_aa, mutated_aa) in enumerate(zip(normal_sequence, mutated_sequence), start=1):
            if normal_aa != mutated_aa:
                mutated_positions.append(i)
        return mutated_positions

    def color_mutation(model, mutation_positions):
        model.setStyle({'resi': mutation_positions}, {'color': 'red', 'cartoon': {'color': 'red'}})

    viewer = py3Dmol.view(width=800, height=400)

    # Replace 'NORMAL_PDB_ID' and 'MUTATED_PDB_ID' with your actual PDB IDs
    normal_pdb_id = '1FW1'
    mutated_pdb_id = '8E8P'

    # Fetch PDB data
    normal_pdb_data = fetch_pdb_data(normal_pdb_id)
    mutated_pdb_data = fetch_pdb_data(mutated_pdb_id)

    # Add normal protein structure
    viewer.addModel(normal_pdb_data, format='pdb')
    viewer.setStyle({'cartoon': {'color': 'white'}})
    viewer.addLabel('Normal', {'fontSize': 15, 'position': {'x': 1, 'y': 5, 'z': 0}})

    # Add mutated protein structure
    viewer.addModel(mutated_pdb_data, format='pdb')

    # Example protein sequences (replace with your actual sequences)
    normal_sequence = "MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVPTLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASVRMISDLIAGGIQPLQNLSVLKQVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDLTPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA"
    mutated_sequence = "MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVPTLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASMRMISDLIAGGIQPLQNLSVLKQVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDLTPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA"

    # Find mutated positions
    mutated_positions = find_mutated_positions(normal_sequence, mutated_sequence)

    # Color mutated positions in red
    color_mutation(viewer, mutated_positions)

    viewer.setStyle({'cartoon': {'color': 'red'}})
    viewer.addLabel('Mutated', {'fontSize': 15, 'position': {'x': -1, 'y': 5, 'z': 0}})

    viewer.zoomTo()
    viewer.show()

# Visualize structures
visualize_proteins_by_ids('NORMAL_PDB_ID', 'MUTATED_PDB_ID')


In [28]:
import py3Dmol
import requests

def visualize_proteins_by_ids(normal_pdb_id, mutated_pdb_id):
    def fetch_pdb_data(pdb_id):
        pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
        return requests.get(pdb_url).text

    def find_mutated_positions(normal_sequence, mutated_sequence):
        mutated_positions = []

        # Check if sequences have the same length
        if len(normal_sequence) != len(mutated_sequence):
            raise ValueError("Sequences have different lengths")

        for i, (normal_aa, mutated_aa) in enumerate(zip(normal_sequence, mutated_sequence), start=1):
            if normal_aa != mutated_aa:
                mutated_positions.append(i)

        return mutated_positions

    def color_mutation(model, mutation_positions):
        model.setStyle({'resi': mutation_positions}, {'color': 'red', 'cartoon': {'color': 'red'}})

    viewer = py3Dmol.view(width=800, height=400)

    # Replace 'NORMAL_PDB_ID' and 'MUTATED_PDB_ID' with your actual PDB IDs
    normal_pdb_id = '1FW1'
    mutated_pdb_id = '8E8P'

    # Fetch PDB data
    normal_pdb_data = fetch_pdb_data(normal_pdb_id)
    mutated_pdb_data = fetch_pdb_data(mutated_pdb_id)

    # Add normal protein structure
    viewer.addModel(normal_pdb_data, format='pdb')
    viewer.setStyle({'cartoon': {'color': 'white'}})
    viewer.addLabel('Normal', {'fontSize': 15, 'position': {'x': 1, 'y': 5, 'z': 0}})

    # Add mutated protein structure
    viewer.addModel(mutated_pdb_data, format='pdb')

    # Example protein sequences (replace with your actual sequences)
    normal_sequence = "MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVPTLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASVRMISDLIAGGIQPLQNLSVLKQVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDLTPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA"
    mutated_sequence = "MQAGKPILYSYFRSSCSWRVRIALALKGIDYKTVPINLIKDRGQQFSKDFQALNPMKQVPTLKIDGITIHQSLAIIEYLEEMRPTPRLLPQDPKKRASMRMISDLIAGGIQPLQNLSVLKQVGEEMQLTWAQNAITCGFNALEQILQSTAGIYCVGDEVTMADLCLVPQVANAERFKVDLTPYPTISSINKRLLVLEAFQVSHPCRQPDTPTELRA"

    # Find mutated positions
    mutated_positions = find_mutated_positions(normal_sequence, mutated_sequence)

    # Color mutated positions in red
    color_mutation(viewer, mutated_positions)

    viewer.setStyle({'cartoon': {'color': 'red'}})
    viewer.addLabel('Mutated', {'fontSize': 15, 'position': {'x': -1, 'y': 5, 'z': 0}})

    viewer.zoomTo()
    viewer.show()

# Visualize structures
visualize_proteins_by_ids('NORMAL_PDB_ID', 'MUTATED_PDB_ID')


In [33]:
import requests

def fetch_fasta_from_pdb_id(pdb_id):
    # Construct the URL for the RCSB PDB web service
    url = f'https://www.rcsb.org/fasta/entry/{pdb_id}'

    # Make a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Return the FASTA sequence
        return response.text
    else:
        # If the request was not successful, raise an exception
        response.raise_for_status()

# Example usage:
pdb_id = '3CYO'  # Replace with your PDB ID    -3CYO -3CP1
fasta_sequence = fetch_fasta_from_pdb_id(pdb_id)

print(f'FASTA sequence for {pdb_id}:\n{fasta_sequence}')
pdb_id = '3CP1'
fasta_sequence = fetch_fasta_from_pdb_id(pdb_id)

print(f'FASTA sequence for {pdb_id}:\n{fasta_sequence}')

FASTA sequence for 3CYO:
>3CYO_1|Chain A|Transmembrane protein|Human immunodeficiency virus type 1
TLTVQARQLLSGIVQQQNDLLRAIEAQQHLLQLTVWGIKQLQARSGGRGGWMEWDREINNYTSLIHSLIEKSQNQQEKNEQELLEL

FASTA sequence for 3CP1:
>3CP1_1|Chain A|Transmembrane Protein|Human immunodeficiency virus 1 (11676)
TLTVQARQLLSGIVQQQNDLLRAIEAQQHLLQLTVWGIKQLQARSGGRGGWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLEL



In [34]:
def find_mutations(sequence1, sequence2):
    mutations = []

    # Check if sequences have the same length
    if len(sequence1) != len(sequence2):
        raise ValueError("Sequences have different lengths")

    for i, (aa1, aa2) in enumerate(zip(sequence1, sequence2), start=1):
        if aa1 != aa2:
            mutations.append((i, aa1, aa2))

    return mutations

# Example usage:
sequence1 = "TLTVQARQLLSGIVQQQNDLLRAIEAQQHLLQLTVWGIKQLQARSGGRGGWMEWDREINNYTSLIHSLIEKSQNQQEKNEQELLEL"
sequence2 = "TLTVQARQLLSGIVQQQNDLLRAIEAQQHLLQLTVWGIKQLQARSGGRGGWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLEL"

mutations = find_mutations(sequence1, sequence2)

print("Mutations:")
for position, aa1, aa2 in mutations:
    print(f"Position {position}: {aa1} -> {aa2}")


Mutations:
Position 71: K -> E


In [39]:
import py3Dmol
import requests

def visualize_proteins_by_ids(normal_pdb_id, mutated_pdb_id):
    def fetch_pdb_data(pdb_id):
        pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
        return requests.get(pdb_url).text

    def find_mutated_positions(normal_sequence, mutated_sequence):
        mutated_positions = []
        for i, (normal_aa, mutated_aa) in enumerate(zip(normal_sequence, mutated_sequence), start=1):
            if normal_aa != mutated_aa:
                mutated_positions.append(i)
        return mutated_positions

    def color_mutation(model, mutation_positions):
        for position in mutation_positions:
            model.setStyle({'resi': position}, {'color': 'orange', 'stick': {}})

    viewer = py3Dmol.view(width=800, height=400)

    # Replace 'NORMAL_PDB_ID' and 'MUTATED_PDB_ID' with your actual PDB IDs
    normal_pdb_id = '3CYO'
    mutated_pdb_id = '3CP1'

    # Fetch PDB data
    normal_pdb_data = fetch_pdb_data(normal_pdb_id)
    mutated_pdb_data = fetch_pdb_data(mutated_pdb_id)

    # Add normal protein structure
    viewer.addModel(normal_pdb_data, format='pdb')
    viewer.setStyle({'stick': {}})
    viewer.addLabel('Normal', {'fontSize': 15, 'position': {'x': 1, 'y': 5, 'z': 0}})

    # Add mutated protein structure
    viewer.addModel(mutated_pdb_data, format='pdb')

    # Example protein sequences (replace with your actual sequences)
    normal_sequence = "TLTVQARQLLSGIVQQQNDLLRAIEAQQHLLQLTVWGIKQLQARSGGRGGWMEWDREINNYTSLIHSLIEKSQNQQEKNEQELLEL"
    mutated_sequence = "TLTVQARQLLSGIVQQQNDLLRAIEAQQHLLQLTVWGIKQLQARSGGRGGWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLEL"

    # Find mutated positions
    mutated_positions = find_mutated_positions(normal_sequence, mutated_sequence)

    # Color mutated positions
    color_mutation(viewer, mutated_positions)

    viewer.setStyle({'stick': {}})
    viewer.addLabel('Mutated', {'fontSize': 15, 'position': {'x': -1, 'y': 5, 'z': 0}})

    viewer.zoomTo()
    viewer.show()

# Visualize structures
visualize_proteins_by_ids('NORMAL_PDB_ID', 'MUTATED_PDB_ID')


In [40]:
import nglview
from Bio.PDB import PDBParser

def find_differences_in_fasta(normal_sequence, mutated_sequence):
    differences = []

    # Check if sequences have the same length
    if len(normal_sequence) != len(mutated_sequence):
        raise ValueError("Sequences have different lengths")

    for i, (normal_aa, mutated_aa) in enumerate(zip(normal_sequence, mutated_sequence), start=1):
        if normal_aa != mutated_aa:
            differences.append(i)

    return differences

def visualize_differences(structure_path, differences):
    # Load the structure using nglview
    view = nglview.show_structure_file(structure_path)

    # Color the differing positions in red
    for position in differences:
        view.add_representation('spacefill', selection=f'${position}', color='red')

    # Display the structure
    view.center_view()
    view._remote_call('setSize', target='Widget', args=['100%', '400px'])
    view.show()

# Example usage:
normal_sequence = "TLTVQARQLLSGIVQQQNDLLRAIEAQQHLLQLTVWGIKQLQARSGGRGGWMEWDREINNYTSLIHSLIEKSQNQQEKNEQELLEL"
mutated_sequence = "TLTVQARQLLSGIVQQQNDLLRAIEAQQHLLQLTVWGIKQLQARSGGRGGWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLEL"
structure_path = '/content/3cyo.pdb'  # Update with the actual path

differences = find_differences_in_fasta(normal_sequence, mutated_sequence)

print("Differing positions:")
print(differences)

# Visualize differing positions on the 3D structure
visualize_differences(structure_path, differences)




Differing positions:
[71]


In [43]:
import py3Dmol
from Bio.PDB import PDBParser

def find_differences_in_fasta(normal_sequence, mutated_sequence):
    differences = []

    # Check if sequences have the same length
    if len(normal_sequence) != len(mutated_sequence):
        raise ValueError("Sequences have different lengths")

    for i, (normal_aa, mutated_aa) in enumerate(zip(normal_sequence, mutated_sequence), start=1):
        if normal_aa != mutated_aa:
            differences.append(i)

    return differences

def visualize_differences_py3dmol(structure_path, differences):
    # Read PDB structures
    normal_pdb = open(structure_path).read()
    mutated_pdb = open(structure_path).read()

    # Initialize the viewer
    viewer = py3Dmol.view(width=800, height=400)

    # Add normal protein structure
    viewer.addModel(normal_pdb, format='pdb')
    viewer.setStyle({'cartoon': {}})

    # Add mutated protein structure
    viewer.addModel(mutated_pdb, format='pdb')
    viewer.setStyle({'cartoon': {}})

    # Color differing positions in red
    for position in differences:
        viewer.addStyle({'resi': str(position)}, {'stick': {'color': 'red'}})

    # Zoom to fit the structures
    viewer.zoomTo()

    # Show the viewer
    viewer.show()

# Example usage:
normal_sequence = "TLTVQARQLLSGIVQQQNDLLRAIEAQQHLLQLTVWGIKQLQARSGGRGGWMEWDREINNYTSLIHSLIEKSQNQQEKNEQELLEL"
mutated_sequence = "TLTVQARQLLSGIVQQQNDLLRAIEAQQHLLQLTVWGIKQLQARSGGRGGWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLEL"
structure_path = '/content/3cyo.pdb'  # Update with the actual path

differences = find_differences_in_fasta(normal_sequence, mutated_sequence)

print("Differing positions:")
print(differences)

# Visualize differing positions on the 3D structure using Py3Dmol
visualize_differences_py3dmol(structure_path, differences)


Differing positions:
[71]


In [44]:
import py3Dmol

def visualize_differences_py3dmol(normal_pdb, mutated_pdb, differences):
    # Initialize the viewer for normal structure
    viewer_normal = py3Dmol.view(width=800, height=400)
    viewer_normal.addModel(normal_pdb, format='pdb')
    viewer_normal.setStyle({'cartoon': {}})
    viewer_normal.zoomTo()

    # Initialize the viewer for mutated structure
    viewer_mutated = py3Dmol.view(width=800, height=400)
    viewer_mutated.addModel(mutated_pdb, format='pdb')
    viewer_mutated.setStyle({'cartoon': {}})
    viewer_mutated.zoomTo()

    # Color differing positions in red for both structures
    for position in differences:
        viewer_normal.addStyle({'resi': str(position)}, {'stick': {'color': 'red'}})
        viewer_mutated.addStyle({'resi': str(position)}, {'stick': {'color': 'red'}})

    # Show the viewers
    viewer_normal.show()
    viewer_mutated.show()

# Example usage:
normal_pdb = open('/content/3cyo.pdb').read()
mutated_pdb = open('/content/3cp1.pdb').read()
differences = [71]  # Replace with your differing positions

# Visualize differing positions on both 3D structures using Py3Dmol
visualize_differences_py3dmol(normal_pdb, mutated_pdb, differences)
