# Scop3P

A comprehensive database of human phosphosites within their full context. Scop3P integrates sequences (UniProtKB/Swiss-Prot), structures (PDB), and uniformly reprocessed phosphoproteomics data (PRIDE) to annotate all known human phosphosites. 

Scop3P, available at https://iomics.ugent.be/scop3p, presents a unique resource for visualization and analysis of phosphosites and for understanding of phosphosite structure–function relationships.

Please cite: https://doi.org/10.1021/acs.jproteome.0c00306

## Install Dependencies

In [4]:
%%capture
!jupyter labextension install jupyterlab_3dmol
!pip install pandas matplotlib py3Dmol b2btools==3.0.7b2 pyvis ipycytoscape


In [58]:
%%capture
import requests, tempfile,json
import pandas as pd 
from b2bTools import SingleSeq, constants
import py3Dmol

## Target protein

In [59]:
TARGET_PROTEIN_ID = "P07949" # Write here the Protein ID of your protein of interest
PDB_ID = "2IVT" # Write here the PDB ID of your protein of interest

## API Request

This function makes a GET request to Scop3P API endpoint for a given protein accession ID and returns the protein sequence in string format.

In [60]:
def fetch_protein_modifications(accession):
    """
    Fetches protein modifications for a given UniProt ID.

    Parameters:
    accession (str): UniProt ID of the protein.

    Returns:
    dict: A dictionary containing protein modifications.
    """
    BASE_URL = "https://iomics.ugent.be/scop3p/api/modifications"
    url = f'{BASE_URL}?accession={accession}'
    headers = {'accept': 'application/json'}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        return None

## Data parsing

For parsing the JSON as a table, we'll use pandas library.

In [61]:
scop3P_results = fetch_protein_modifications(TARGET_PROTEIN_ID)

protein_name = scop3P_results['proteinName']
entry_name = scop3P_results['entryName']
accession = scop3P_results['accession']
url = scop3P_results['url']
modifications = scop3P_results['modifications']

print("""
--------------------------------------------------------------------------------
Scop3P: A Comprehensive Resource of Human Phosphosites within Their Full Context
--------------------------------------------------------------------------------

{0}:{1}

Phospho-sites found: {3} entries.

Full entry available on SCOP3P website: {2}
""".format(entry_name, protein_name, url, len(modifications)))


--------------------------------------------------------------------------------
Scop3P: A Comprehensive Resource of Human Phosphosites within Their Full Context
--------------------------------------------------------------------------------

RET_HUMAN:Proto-oncogene tyrosine-protein kinase receptor Ret (EC 2.7.10.1) (Cadherin family member 12) (Proto-oncogene c-Ret) [Cleaved into: Soluble RET kinase fragment; Extracellular cell-membrane anchored RET cadherin 120 kDa fragment]

Phospho-sites found: 9 entries.

Full entry available on SCOP3P website: https://iomics.ugent.be/scop3p/index?protein=P07949



In [104]:
def get_modification_table(modifications):
    """
    Displays the protein modifications in a pandas DataFrame.

    Parameters:
    modifications (list): A list of dictionaries, each representing a protein modification.
    """
    df = pd.DataFrame(modifications)
    df = df[['residue', 'name', 'evidence', 'position', 'source', 'reference', 'functionalScore', 'specificSinglyPhosphorylated']]
    
    return df 
    
modification_table = get_modification_table(modifications)
display(modification_table)

### Get Disease causing variants from UniProt using API

In [63]:
import requests
import pandas as pd

ACC = "P07949"
url = f"https://www.ebi.ac.uk/proteins/api/variation/{ACC}"
headers = {"Accept": "application/json"}

r = requests.get(url, headers=headers)
r.raise_for_status()
data = r.json()

rows = []

for feat in data.get("features", []):
    if feat.get("type") != "VARIANT":
        continue

    for assoc in feat.get("association", []):
        if assoc.get("disease") is not True:
            continue

        rows.append({
            "ACC_ID": ACC,
            "position": int(feat.get("begin")),
            "WT": feat.get("wildType"),
            "MT": feat.get("mutatedType"),
            "consequence": feat.get("consequenceType"),
            "disease_name": assoc.get("name"),
            "disease_description": assoc.get("description"),
            "source_type": feat.get("sourceType")
            # "somatic_status": feat.get("somaticStatus"),
            # "genomic_location": ";".join(feat.get("genomicLocation", [])),
            # "cytoband": feat.get("cytogeneticBand")
        })

mut_table = pd.DataFrame(rows)


In [103]:
mut_table

## Rendering results

For visualizing the 3D structure, we'll use py3Dmol library.

## Let's get AlphaFold model for the target protein


In [65]:
## Get alphaFold model for the protein
import urllib.request
AFurl="https://alphafold.ebi.ac.uk/files/AF-"
modelurl = f'{AFurl}{accession}{"-F1-model_v6.pdb"}'
AFmodel = urllib.request.urlretrieve(modelurl,f'{accession}{".pdb"}')


## Rendering results

For visualizing the 3D structure, we'll use py3Dmol library.

In [51]:
import py3Dmol

def display_3D(modification_table, accession):
    view = py3Dmol.view(width=700, height=500)
    view.addModel(open(accession + '.pdb', 'r').read(), 'pdb')

    view.setStyle({}, {'cartoon': {'color': 'silver'}})
    view.addSurface(py3Dmol.VDW, {'opacity': 0.35, 'color': 'white'}, {})

    # --- Color phosphosites 
    for _, row in modification_table.iterrows():
        position = str(row['position'])

        # Normalize residue label to avoid mismatches
        residue = str(row['residue']).strip()  # removes trailing spaces etc.

        if residue == 'TYR':
            color = '#2CA02C'
        elif residue == 'SER':
            color = '#1F77B4'
        elif residue == 'THR':
            color = '#FF7F0E'
        else:
            color = '#7B241C'

        sel = {'resi': position}  # add {'chain': row['chain']} if needed

        view.addStyle(sel, {'stick': {'color': color}})
        view.addStyle(sel, {'sphere': {'color': color, 'radius': 0.9}})

    # --- Hover for ALL amino acids (all atoms) ---
    view.setHoverable(
        {}, True,
        """
        function(atom, viewer, event, container) {
            if(!atom.label) {
                atom.label = viewer.addLabel(
                    atom.resn + " " + atom.resi + (atom.chain ? (" : " + atom.chain) : ""),
                    {position: atom, backgroundColor: 'mintcream', fontColor: 'black'}
                );
            }
        }
        """,
        """
        function(atom, viewer) {
            if(atom.label) {
                viewer.removeLabel(atom.label);
                delete atom.label;
            }
        }
        """
    )

    view.zoomTo()
    view.render()
    return view


In [84]:
display_3D(modification_table,accession)

### PDB structure mapping

In [56]:
def display_3D(modification_table, pdb_id, chain=None):
    view = py3Dmol.view(query=f"pdb:{pdb_id}")

    # Protein context
    view.setStyle({}, {'cartoon': {'color': 'skyblue'}})

    # Global surface (NO hover expected here)
    view.addSurface(py3Dmol.VDW, {'opacity': 0.6, 'color': 'white'}, {})

    # ---- Colored modified residues (ATOMS) ----
    for _, row in modification_table.iterrows():
        position = str(row['position'])
        residue  = str(row['residue']).strip()

        if residue == 'TYR':
            color = '#2CA02C'
        elif residue == 'SER':
            color = '#1F77B4'
        elif residue == 'THR':
            color = '#FF7F0E'
        else:
            color = '#7B241C'

        sel = {'resi': position}
        if chain:
            sel['chain'] = chain

        # ATOMS → hover works
        view.addStyle(sel, {'stick':  {'color': color}})
        view.addStyle(sel, {'sphere': {'color': color, 'radius': 0.9}})

    # ---- Hover for ALL amino acids ----
    view.setHoverable(
        {}, True,
        """
        function(atom, viewer, event, container) {
            if (!atom.label) {
                atom.label = viewer.addLabel(
                    atom.resn + " " + atom.resi + (atom.chain ? (" : " + atom.chain) : ""),
                    {position: atom, backgroundColor: 'mintcream', fontColor: 'black'}
                );
            }
        }
        """,
        """
        function(atom, viewer) {
            if (atom.label) {
                viewer.removeLabel(atom.label);
                delete atom.label;
            }
        }
        """
    )

    view.zoomTo()
    view.render()
    return view


In [85]:
display_3D(modification_table, pdb_id="6nja", chain="A")

## Mapping Biophysical properties on AF models

For visualizing the 3D structure, we'll use py3Dmol library.

### Predict dynamic properties from Bio2Byte tools

We will fetch the amino acid sequence from UniProtKB API

In [66]:
def fetch_sequence_aminoacids(accession):
    BASE_URL = f"http://uniprot.org/uniprotkb/{accession}.fasta"
    url = f'{BASE_URL}?accession={accession}'
    response = requests.get(url)
    if response.status_code == 200:
        raw_fasta_sequence = response.content.decode("utf-8")
    else:
        raw_fasta_sequence = ""
    
    lines = raw_fasta_sequence.split('\n')
    protein_id = str(lines[0])
    amino_acids = "".join([str(l) for l in lines[1:]])
    
    return protein_id, amino_acids

#### Predict Dynamic properties

In [67]:
_protein_id, sequence = fetch_sequence_aminoacids(TARGET_PROTEIN_ID)

In [68]:
%%capture
def predict_biophysical_features(accession, sequence):

    with tempfile.NamedTemporaryFile(prefix="seq_", suffix=".fasta", mode="w") as fp:
        fp.write(f">{accession}\n{sequence}\n")
        fp.flush()
        fp.seek(0)
        
        pred = SingleSeq(fp.name).predict(tools=[constants.TOOL_DYNAMINE, constants.TOOL_DISOMINE, constants.TOOL_EFOLDMINE]).get_all_predictions()
    
    return pred


biophysical_features = predict_biophysical_features(TARGET_PROTEIN_ID, sequence)

biophysical_features_target_protein = biophysical_features['proteins'][TARGET_PROTEIN_ID]

In [69]:
# values above 1.0 membrane spanning regions, 
# Values above 0.8 indicate rigid conformations, 
# Values between 0.69-0.80 are 'context' dependent and capable of being either rigid or flexible.
# values below 0.69 flexible regions. 

# phosphorylated = list(modifications_table['position'])
biophysical_features_target_protein['BD_label'] = []
biophysical_features_target_protein['DO_label'] = []
biophysical_features_target_protein['EF_label'] = []
# biophysical_features_target_protein['psites'] = []

for index, residue in enumerate(biophysical_features_target_protein['seq']):
    current_backbone = biophysical_features_target_protein['backbone'][index]
    biophysical_features_target_protein['BD_label'].append(1 if current_backbone > 1 else 2 if current_backbone > 0.8 else 3 if current_backbone > 0.69 else 4)

    current_disorder_propensity = biophysical_features_target_protein['disoMine'][index]
    biophysical_features_target_protein['DO_label'].append(1 if current_disorder_propensity > 0.5 else 0)

    current_early_folding = biophysical_features_target_protein['earlyFolding'][index]
    biophysical_features_target_protein['EF_label'].append(1 if current_early_folding > 0.169 else 0)
    
#     biophysical_features_target_protein['psites'].append(1 if index + 1 in phosphorylated else 0)

### Get the dynamic properties in a pandas DataFrame object

In [70]:
dynamic_properties=pd.DataFrame(biophysical_features_target_protein)
dynamic_properties['seqpos']=range(1,len(dynamic_properties)+1)

### Map onto AF models and visualize

For visualizing the 3D structure, we'll use py3Dmol library.

In [71]:
import colorsys


def pseudocolor(minval, maxval,val):
    """ Convert predicted values min.....max in range Green...Yellow..RED 
        The colors correspond to Red and Green in the HSV colorspace
    """
    minval,maxval=float(minval),float(maxval)
    h = (float(maxval-val) / (maxval-minval)) * 120
    r, g, b = colorsys.hsv_to_rgb(h/360, 1., 1.)
    rgb=map(lambda x: int(255 * x), (r, g, b))
    rgb=tuple(rgb)
    rgb='0x%02x%02x%02x' % rgb
    return rgb

In [72]:
def remap(df):
    BDcolor,EFcolor,DOcolor={},{},{}
    seqpos=0
    min_BD,max_BD=min(df.backbone),max(df.backbone)
    min_DO,max_DO=min(df.disoMine),max(df.disoMine)
    min_EF,max_EF=min(df.earlyFolding),max(df.earlyFolding)
    
    for index, row in df.iterrows():
        seqpos+=1
        BDrescol=pseudocolor(min_BD,max_BD,float(row.backbone))
        DOrescol=pseudocolor(min_EF,max_EF,float(row.disoMine))
        EFrescol=pseudocolor(min_EF,max_EF,float(row.earlyFolding))
        BDcolor[seqpos]=BDrescol
        DOcolor[seqpos]=DOrescol
        EFcolor[seqpos]=EFrescol
        
    return BDcolor,EFcolor,DOcolor
        
        

In [81]:
def display_3D(dynamic_properties):
    BDcolor, EFcolor, DOcolor = remap(dynamic_properties)
    modpos = modification_table.position.tolist()

    view = py3Dmol.view(viewergrid=(2,2))
    view.addModel(open((accession + '.pdb'), 'r').read(), 'pdb')

    # IMPORTANT: setStyle(selection, style)
    view.setStyle({}, {'cartoon': {'colorscheme': {'prop':'b','gradient':'rwb','min':0.0,'max':100.0}}}, viewer=(0,0))
    view.setStyle({}, {'cartoon': {'colorscheme': {'prop':'resi','map':BDcolor}}}, viewer=(0,1))
    view.setStyle({}, {'cartoon': {'colorscheme': {'prop':'resi','map':DOcolor}}}, viewer=(1,0))
    view.setStyle({}, {'cartoon': {'colorscheme': {'prop':'resi','map':EFcolor}}}, viewer=(1,1))

    # Surface highlight + pickable overlay on mod residues
    for mod in modpos:
        m = str(mod)
        sel = {'resi': m}

        view.addSurface(py3Dmol.VDW, {'opacity': 1.0}, sel, viewer=(0,0))
        view.addSurface(py3Dmol.VDW, {'opacity': 1.0, 'color': BDcolor[mod]}, sel, viewer=(0,1))
        view.addSurface(py3Dmol.VDW, {'opacity': 1.0, 'color': DOcolor[mod]}, sel, viewer=(1,0))
        view.addSurface(py3Dmol.VDW, {'opacity': 1.0, 'color': EFcolor[mod]}, sel, viewer=(1,1))

        # MAKE IT PICKABLE: opacity must be > 0
        for panel in [(0,0), (0,1), (1,0), (1,1)]:
            view.addStyle(sel, {'sphere': {'radius': 0.8, 'opacity': 0.15}}, viewer=panel)
            # optional: stick helps pickability even more
            # view.addStyle(sel, {'stick': {'opacity': 0.15}}, viewer=panel)

    # Background + hover everywhere (per panel)
    for panel in [(0,0), (0,1), (1,0), (1,1)]:
        view.setBackgroundColor('white', viewer=panel)

        view.setHoverable(
            {},  # hover everywhere
            True,
            """
            function(atom, viewer, event, container) {
                if (!atom.label) {
                    atom.label = viewer.addLabel(
                        atom.resn + " " + atom.resi + (atom.chain ? (" : " + atom.chain) : ""),
                        {position: atom, backgroundColor: 'mintcream', fontColor:'black'}
                    );
                }
            }
            """,
            """
            function(atom, viewer) {
                if (atom.label) {
                    viewer.removeLabel(atom.label);
                    delete atom.label;
                }
            }
            """,
            viewer=panel
        )

    view.zoomTo()
    view.render()
    return view


## Visualize the 3D structure

In [86]:
display_3D(dynamic_properties)

### Let's convert the 3D protein structure into a Residue Interaction Network (2.5D map)

Every amino acid is a node and an edge is drawn betweentwo amino acids if they are close (<=8 A ngstrom) in 3D space.

In [68]:
import numpy as np
import networkx as nx
from scipy.spatial import KDTree
from Bio.PDB import PDBParser

def build_geometry_graph_from_pdb(pdb_path, chain="A", cutoff=8.0, atom_name="CA"):
    """
    Build a residue interaction network from a PDB file using CA (or CB fallback) distances.
    Nodes: residue positions (ints)
    Edges: if distance <= cutoff, with attributes distance, weight=1/distance, resistance=distance
    """
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("af", pdb_path)

    # Use first model
    model = next(structure.get_models())

    # Pick chain (AlphaFold is usually 'A')
    if chain not in model:
        # fallback: first chain in model
        chain_obj = next(model.get_chains())
        chain = chain_obj.id
    else:
        chain_obj = model[chain]

    coords = []
    meta = []

    for res in chain_obj:
        # standard residues only
        if res.id[0] != " ":
            continue

        resi = int(res.id[1])
        resn = res.resname

        # choose atom
        atom = None
        if atom_name in res:
            atom = res[atom_name]
        elif atom_name == "CB" and "CA" in res:
            atom = res["CA"]
        elif atom_name == "CA":
            # CA required; skip if missing
            continue
        else:
            # fallback to CA if present
            atom = res["CA"] if "CA" in res else None

        if atom is None:
            continue

        coords.append(atom.coord.astype(float))
        meta.append({"Chain": chain, "Residue": resi, "ResName": resn})

    coords = np.asarray(coords, dtype=float)
    if len(coords) == 0:
        raise ValueError("No residue coordinates found. Check chain/atom_name.")

    nodes = [(m["Chain"], int(m["Residue"])) for m in meta]
    tree = KDTree(coords)

    G = nx.Graph(layer=f"geometry:{atom_name}_cut{cutoff}", chain=chain, pdb=pdb_path)

    for n, m in zip(nodes, meta):
        G.add_node(n, **m)

    for i in range(len(nodes)):
        idxs = tree.query_ball_point(coords[i], cutoff)
        for j in idxs:
            if j <= i:
                continue
            d = float(np.linalg.norm(coords[i] - coords[j]))
            w = 1.0 / max(d, 1e-6)
            G.add_edge(nodes[i], nodes[j], weight=w, distance=d, resistance=1.0 / max(w, 1e-9))

    return G, meta


In [96]:
from pyvis.network import Network
from IPython.display import IFrame, display

def nx_rin_to_pyvis_default(
    G,
    ptm_positions=None,
    mutation_positions=None,
    out_html="rin_pyvis.html",
    height="600px",
    width="100%",
    default_color="#B0B0B0",   # light grey
    ptm_color="#1f77b4",       # blue
    mut_color="#d62728",       # red
    both_color="#9467bd",      # purple
    node_size=30,
    ptm_size=35,
    mut_size=35,
    both_size=40,
    select_menu=True,
    filter_menu=True
):
    ptm_set = set(int(x) for x in (ptm_positions or []))
    mut_set = set(int(x) for x in (mutation_positions or []))

    net = Network(
        height=height,
        width=width,
        directed=False,
        notebook=True,
        cdn_resources="in_line",
        select_menu=select_menu,
        filter_menu=filter_menu
    )

    net.set_options("""
    {
      "groups": {
        "PTM": {
          "color": {
            "background": "#d62728",
            "border": "#d62728",
            "highlight": { "background": "#d62728", "border": "#d62728" },
            "hover":     { "background": "#d62728", "border": "#d62728" }
          }
        },
        "Mutation": {
          "color": {
            "background": "#1f77b4",
            "border": "#1f77b4",
            "highlight": { "background": "#1f77b4", "border": "#1f77b4" },
            "hover":     { "background": "#1f77b4", "border": "#1f77b4" }
          }
        },
        "PTM+Mutation": {
          "color": {
            "background": "#2ca02c",
            "border": "#2ca02c",
            "highlight": { "background": "#2ca02c", "border": "#2ca02c" },
            "hover":     { "background": "#2ca02c", "border": "#2ca02c" }
          }
        },
        "Other": {
          "color": {
            "background": "#9FA8B0",
            "border": "#9FA8B0",
            "highlight": { "background": "#9FA8B0", "border": "#9FA8B0" },
            "hover":     { "background": "#9FA8B0", "border": "#9FA8B0" }
          }
        }
      }
    }
    """)




    # ---- Nodes ----
    for (ch, resi), attrs in G.nodes(data=True):
        resi = int(resi)
        resn = attrs.get("ResName", "")

        is_ptm = resi in ptm_set
        is_mut = resi in mut_set

        if is_ptm and is_mut:
            bg = both_color
            size = both_size
            group = "PTM+Mutation"
        elif is_mut:
            bg = mut_color
            size = mut_size
            group = "Mutation"
        elif is_ptm:
            bg = ptm_color
            size = ptm_size
            group = "PTM"
        else:
            bg = default_color
            size = node_size
            group = "Other"

        node_id = f"{ch}:{resi}"

        net.add_node(
            node_id,
            label=f"{resn} {resi}" if resn else str(resi),
            title=f"Chain: {ch}<br>Residue: {resn}<br>Position: {resi}<br>Group: {group}",
            color={
                "background": bg,
                "border": "#333333",
                "highlight": {"background": bg, "border": "#000000"},
                "hover": {"background": bg, "border": "#000000"}
            },
            size=size,
            group=group,
            font={"size": 12}
        )

    # ---- Edges ----
    for (ch1, r1), (ch2, r2), eattrs in G.edges(data=True):
        a = f"{ch1}:{int(r1)}"
        b = f"{ch2}:{int(r2)}"
        dist = eattrs.get("distance", None)

        net.add_edge(
            a, b,
            color="#A9A9A9",
            title=f"distance: {dist:.2f} Å" if dist is not None else ""
        )

    html = net.generate_html()
    with open(out_html, "w", encoding="utf-8") as f:
        f.write(html)

    display(IFrame(out_html, width=900, height=650))
    return out_html


In [97]:
# Build RIN from AF model
G, meta = build_geometry_graph_from_pdb(f"{accession}.pdb", chain="A", cutoff=8.0, atom_name="CA")

In [102]:
print("Directed:", G.is_directed())
print("Nodes:", G.number_of_nodes())
print("Edges:", G.number_of_edges())

### Visualize the RIN (Residue intercation Network) 

1. Phospho sites are colored red
2. Mutations are colored blue
3. Mutations + Phosphorylation colored Green

In [100]:
ptm_positions = list(set(modification_table["position"].dropna().astype(int).tolist()))

# Example mutation table with column 'position'
mutation_positions = list(set(mut_table["position"].dropna().astype(int).tolist()))

nx_rin_to_pyvis_default(
    G,
    ptm_positions=ptm_positions,
    mutation_positions=mutation_positions,
    out_html="rin_pyvis.html",
    select_menu=True,
    filter_menu=False
)


### Let's align two protein structures using TM-align

### Requires TM-align to align structures and NGLview for visulizing the aligned structures
1. TM-align
2. NGL viewer (better than py3Dmol for aligned structures)

In [10]:
#!pip install -U nglview ipywidgets
##check if installed
# import nglview as nv
# nv.demo()



In [None]:
## TM align installation (Terminal)
# git clone https://github.com/kad-ecoli/TMalign.git
# cd TMalign
# g++ -O3 -ffast-math -lm TMalign.cpp -o TM-align
# sudo mv TM-align /usr/local/bin/
# TM-align

##conda installation
# conda install -c bioconda tmalign
# TM-align

## precompiled binary
# wget https://zhanggroup.org/TM-align/TMalign
# chmod +x TMalign
# sudo mv TMalign /usr/local/bin/TM-align


In [53]:
import os
import subprocess
import tempfile
from Bio.PDB import PDBParser, PDBIO, Select
import nglview as nv
import ipywidgets as widgets
from IPython.display import display, clear_output

def save_upload(upload_widget, out_dir):
    os.makedirs(out_dir, exist_ok=True)
    v = upload_widget.value
    if not v:
        raise ValueError("No file uploaded")

    # Newer ipywidgets: tuple/list of dicts
    if isinstance(v, (tuple, list)):
        fileinfo = v[0]
        name = fileinfo.get("name", "upload.pdb")
        content = fileinfo["content"]

    # Older ipywidgets: dict name -> fileinfo
    elif isinstance(v, dict):
        name, fileinfo = next(iter(v.items()))
        content = fileinfo["content"]

    else:
        raise TypeError(f"Unexpected upload_widget.value type: {type(v)}")

    path = os.path.join(out_dir, name)
    with open(path, "wb") as f:
        f.write(content)
    return path



def chain_range_from_pdb(pdb_path, chain_id):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("X", pdb_path)
    residues = [
        res.id[1]
        for model in structure
        for chain in model
        if chain.id == chain_id
        for res in chain
        if res.id[0] == " "
    ]
    if not residues:
        raise ValueError(f"No residues found for chain {chain_id}")
    return min(residues), max(residues)


class ChainRangeSelect(Select):
    def __init__(self, chain_id, start, end):
        self.chain_id = chain_id
        self.start = start
        self.end = end

    def accept_chain(self, chain):
        return chain.id == self.chain_id

    def accept_residue(self, residue):
        r = residue.id[1]
        return (self.start <= r <= self.end)

def run_tmalign_write(pdb1, pdb2, out_dir, out_name):
    os.makedirs(out_dir, exist_ok=True)
    cmd = ["TM-align", os.path.abspath(pdb1), os.path.abspath(pdb2), "-o", out_name]
    res = subprocess.run(cmd, cwd=out_dir, capture_output=True, text=True, check=True)

    candidates = [
        os.path.join(out_dir, out_name),
        os.path.join(out_dir, out_name + ".pdb"),
        os.path.join(out_dir, "TM_sup.pdb"),
    ]
    out_pdb = next((c for c in candidates if os.path.exists(c)), None)
    if out_pdb is None:
        raise RuntimeError(f"No TM-align output found. Files: {os.listdir(out_dir)}")
    return out_pdb, res.stdout

import nglview as nv

def visualize_ngl(pdb_ref, pdb_aligned, selection="protein"):
    view = nv.NGLWidget()

    # CRITICAL: disable default rainbow reps
    view.add_component(pdb_ref, ext="pdb", defaultRepresentation=False)
    view.add_component(pdb_aligned, ext="pdb", defaultRepresentation=False)

    view.clear_representations()

    # Reference (blue, translucent)
    view.add_cartoon(
        component=0,
        selection=selection,
        colorScheme="uniform",
        colorValue="blue",
        opacity=0.7
    )

    # Aligned (red, solid)
    view.add_cartoon(
        component=1,
        selection=selection,
        colorScheme="uniform",
        colorValue="red",
        opacity=1.0
    )

    view.center()
    return view


In [54]:
upload1 = widgets.FileUpload(accept=".pdb", multiple=False, description="Upload PDB 1")
upload2 = widgets.FileUpload(accept=".pdb", multiple=False, description="Upload PDB 2")

chain1 = widgets.Text(value="A", description="Chain 1")
chain2 = widgets.Text(value="A", description="Chain 2")

start1 = widgets.IntText(description="Start 1")
end1   = widgets.IntText(description="End 1")
start2 = widgets.IntText(description="Start 2")
end2   = widgets.IntText(description="End 2")

btn_range = widgets.Button(description="Auto-fill ranges")
btn_run = widgets.Button(description="Align + Visualize", button_style="primary")

out = widgets.Output()
workdir = "./tmalign_upload_tool"


In [55]:
def autofill_ranges(_):
    out.clear_output()
    with out:
        try:
            pdb1 = save_upload(upload1, workdir)
            pdb2 = save_upload(upload2, workdir)

            s1, e1 = chain_range_from_pdb(pdb1, chain1.value)
            s2, e2 = chain_range_from_pdb(pdb2, chain2.value)

            start1.value, end1.value = s1, e1
            start2.value, end2.value = s2, e2

            print("Ranges auto-filled.")
        except Exception as e:
            print("ERROR:", e)

btn_range.on_click(autofill_ranges)


In [56]:
def run_align(_):
    out.clear_output()
    with out:
        try:
            pdb1 = save_upload(upload1, workdir)
            pdb2 = save_upload(upload2, workdir)

            seg1 = os.path.join(workdir, "seg1.pdb")
            seg2 = os.path.join(workdir, "seg2.pdb")

            parser = PDBParser(QUIET=True)
            io = PDBIO()

            io.set_structure(parser.get_structure("X", pdb1))
            io.save(seg1, select=ChainRangeSelect(chain1.value, start1.value, end1.value))

            io.set_structure(parser.get_structure("Y", pdb2))
            io.save(seg2, select=ChainRangeSelect(chain2.value, start2.value, end2.value))

            aligned_pdb, stdout = run_tmalign_write(
                seg1, seg2, out_dir=workdir, out_name="aligned"
            )

            print(stdout.splitlines()[0])
            display(visualize_ngl(seg2, aligned_pdb))


        except Exception as e:
            print("ERROR:", e)

btn_run.on_click(run_align)


## Download Structures in .pdb format from RCSB.org or predicted structures in .pdb format
1. Upload your structures one by one
2. Specify chains A or B etc..
3. Auto-fill ranges to fill start and end range of structures
4. if you want to align specific domains then fill in those ranges or leave it as it is!
5. Align+visualize

In [101]:
display(
    widgets.VBox([
        widgets.HBox([upload1, upload2]),
        widgets.HBox([chain1, start1, end1]),
        widgets.HBox([chain2, start2, end2]),
        widgets.HBox([btn_range, btn_run]),
        out,
    ])
)
