# Scop3P

A comprehensive database of human phosphosites within their full context. Scop3P integrates sequences (UniProtKB/Swiss-Prot), structures (PDB), and uniformly reprocessed phosphoproteomics data (PRIDE) to annotate all known human phosphosites. 

Scop3P, available at https://iomics.ugent.be/scop3p, presents a unique resource for visualization and analysis of phosphosites and for understanding of phosphosite structure–function relationships.

## Install Dependencies

In [109]:
%%capture
!jupyter labextension install jupyterlab_3dmol
!pip install pandas matplotlib py3Dmol b2btools==3.0.7b2

In [3]:
%%capture
import requests, tempfile
import pandas as pd 
from b2bTools import SingleSeq, constants
import py3Dmol

## Target protein

In [4]:
TARGET_PROTEIN_ID = "O00571" # Write here the Protein ID of your protein of interest
PDB_ID = "4PXA" # Write here the PDB ID of your protein of interest

## API Request

This function makes a GET request to Scop3P API endpoint for a given protein accession ID and returns the protein sequence in string format.

In [5]:
def fetch_protein_modifications(accession):
    """
    Fetches protein modifications for a given UniProt ID.

    Parameters:
    accession (str): UniProt ID of the protein.

    Returns:
    dict: A dictionary containing protein modifications.
    """
    BASE_URL = "https://iomics.ugent.be/scop3p/api/modifications"
    url = f'{BASE_URL}?accession={accession}'
    headers = {'accept': 'application/json'}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        return None

## Data parsing

For parsing the JSON as a table, we'll use pandas library.

In [110]:
scop3P_results = fetch_protein_modifications(TARGET_PROTEIN_ID)

protein_name = scop3P_results['proteinName']
entry_name = scop3P_results['entryName']
accession = scop3P_results['accession']
url = scop3P_results['url']
modifications = scop3P_results['modifications']

print("""
--------------------------------------------------------------------------------
Scop3P: A Comprehensive Resource of Human Phosphosites within Their Full Context
--------------------------------------------------------------------------------

{0}:{1}

Phospho-sites found: {3} entries.

Full entry available on SCOP3P website: {2}
""".format(entry_name, protein_name, url, len(modifications)))

In [111]:
def get_modification_table(modifications):
    """
    Displays the protein modifications in a pandas DataFrame.

    Parameters:
    modifications (list): A list of dictionaries, each representing a protein modification.
    """
    df = pd.DataFrame(modifications)
    df = df[['residue', 'name', 'evidence', 'position', 'source', 'reference', 'functionalScore', 'specificSinglyPhosphorylated']]
    
    return df 
    
modification_table = get_modification_table(modifications)
display(modification_table)

## Rendering results

For visualizing the 3D structure, we'll use py3Dmol library.

In [116]:
def display_3D(modification_table):
    view = py3Dmol.view(query=f'pdb:{PDB_ID}')
    
    view.setStyle({'cartoon': { 'color': 'skyblue' }})
    view.addSurface(py3Dmol.VDW, {'opacity': 0.60, 'color': 'white' })
    
    for index, row in modification_table.iterrows():
        position = row['position']
        residue = row['residue']
        
        if residue == 'PhosphoY':
            color = '#2CA02C'
        elif residue == 'PhosphoS':
            color = '#1F77B4'
        elif residue == 'PhosphoT':
            color = '#FF7F0E'
        else:
            color = '#7B241C'
        
        view.addSurface(py3Dmol.VDW, {'opacity': 1.0, 'color': color }, {'resi': [str(position)]})
        view.setHoverable(
            {},
            True,
            """
            function(atom, viewer, event, container) {{
                if (!atom.label) {{
                    atom.label = viewer.addLabel(
                        "{0} " + atom.resn + {1}, {{ 'position': atom, backgroundColor: 'mintcream', fontColor:'black' }}
                    );
                }}
            }}
            """.format(residue, position),
            """
            function(atom,viewer) {
                if(atom.label) {
                    viewer.removeLabel(atom.label);
                    delete atom.label;
                }
            }
            """
        )
    
    view.zoomTo()
    return view

In [117]:
display_3D(modification_table)

## Let's get AlphaFold model for the target protein


In [10]:
## Get alphaFold model for the protein
import urllib.request
AFurl="https://alphafold.ebi.ac.uk/files/AF-"
modelurl = f'{AFurl}{accession}{"-F1-model_v4.pdb"}'
AFmodel = urllib.request.urlretrieve(modelurl,f'{accession}{".pdb"}')


## Rendering results

For visualizing the 3D structure, we'll use py3Dmol library.

In [125]:
def display_3D(modification_table):
    view = py3Dmol.view()
    view.addModel(open((accession+'.pdb'), 'r').read(),'pdb')
    
    view.setStyle({'cartoon': { 'color': 'silver' }})
    view.addSurface(py3Dmol.VDW, {'opacity': 0.60, 'color': 'white' })
    
    for index, row in modification_table.iterrows():
        position = row['position']
        residue = row['residue']
        if residue == 'PhosphoY':
            color = '#2CA02C'
        elif residue == 'PhosphoS':
            color = '#1F77B4'
        elif residue == 'PhosphoT':
            color = '#FF7F0E'
        else:
            color = '#7B241C'
        view.addSurface(py3Dmol.VDW, {'opacity': 1.0, 'color': color }, {'resi': [str(position)]})
        view.setHoverable(
            {},
            True,
            """
            function(atom, viewer, event, container) {{
                if (!atom.label) {{
                    atom.label = viewer.addLabel(
                        "{0} " + atom.resn + {1}, {{ 'position': atom, backgroundColor: 'mintcream', fontColor:'black' }}
                    );
                }}
            }}
            """.format(residue, position),
            """
            function(atom,viewer) {
                if(atom.label) {
                    viewer.removeLabel(atom.label);
                    delete atom.label;
                }
            }
            """
        )
    
    view.zoomTo()
    return view

In [127]:
display_3D(modification_table)

## Mapping Biophysical properties on AF models

For visualizing the 3D structure, we'll use py3Dmol library.

### Predict dynamic properties from Bio2Byte tools

We will fetch the amino acid sequence from UniProtKB API

In [13]:
def fetch_sequence_aminoacids(accession):
    BASE_URL = f"http://uniprot.org/uniprotkb/{accession}.fasta"
    url = f'{BASE_URL}?accession={accession}'
    response = requests.get(url)
    if response.status_code == 200:
        raw_fasta_sequence = response.content.decode("utf-8")
    else:
        raw_fasta_sequence = ""
    
    lines = raw_fasta_sequence.split('\n')
    protein_id = str(lines[0])
    amino_acids = "".join([str(l) for l in lines[1:]])
    
    return protein_id, amino_acids

#### Predict Dynamic properties

In [14]:
_protein_id, sequence = fetch_sequence_aminoacids(TARGET_PROTEIN_ID)

In [15]:
%%capture
def predict_biophysical_features(accession, sequence):

    with tempfile.NamedTemporaryFile(prefix="seq_", suffix=".fasta", mode="w") as fp:
        fp.write(f">{accession}\n{sequence}\n")
        fp.flush()
        fp.seek(0)
        
        pred = SingleSeq(fp.name).predict(tools=[constants.TOOL_DYNAMINE, constants.TOOL_DISOMINE, constants.TOOL_EFOLDMINE]).get_all_predictions()
    
    return pred


biophysical_features = predict_biophysical_features(TARGET_PROTEIN_ID, sequence)

biophysical_features_target_protein = biophysical_features['proteins'][TARGET_PROTEIN_ID]

In [16]:
# values above 1.0 membrane spanning regions, 
# Values above 0.8 indicate rigid conformations, 
# Values between 0.69-0.80 are 'context' dependent and capable of being either rigid or flexible.
# values below 0.69 flexible regions. 

# phosphorylated = list(modifications_table['position'])
biophysical_features_target_protein['BD_label'] = []
biophysical_features_target_protein['DO_label'] = []
biophysical_features_target_protein['EF_label'] = []
# biophysical_features_target_protein['psites'] = []

for index, residue in enumerate(biophysical_features_target_protein['seq']):
    current_backbone = biophysical_features_target_protein['backbone'][index]
    biophysical_features_target_protein['BD_label'].append(1 if current_backbone > 1 else 2 if current_backbone > 0.8 else 3 if current_backbone > 0.69 else 4)

    current_disorder_propensity = biophysical_features_target_protein['disoMine'][index]
    biophysical_features_target_protein['DO_label'].append(1 if current_disorder_propensity > 0.5 else 0)

    current_early_folding = biophysical_features_target_protein['earlyFolding'][index]
    biophysical_features_target_protein['EF_label'].append(1 if current_early_folding > 0.169 else 0)
    
#     biophysical_features_target_protein['psites'].append(1 if index + 1 in phosphorylated else 0)

### Get the dynamic properties in a pandas DataFrame object

In [17]:
dynamic_properties=pd.DataFrame(biophysical_features_target_protein)
dynamic_properties['seqpos']=range(1,len(dynamic_properties)+1)

### Map onto AF models and visualize

For visualizing the 3D structure, we'll use py3Dmol library.

In [18]:
import colorsys


def pseudocolor(minval, maxval,val):
    """ Convert predicted values min.....max in range Green...Yellow..RED 
        The colors correspond to Red and Green in the HSV colorspace
    """
    minval,maxval=float(minval),float(maxval)
    h = (float(maxval-val) / (maxval-minval)) * 120
    r, g, b = colorsys.hsv_to_rgb(h/360, 1., 1.)
    rgb=map(lambda x: int(255 * x), (r, g, b))
    rgb=tuple(rgb)
    rgb='0x%02x%02x%02x' % rgb
    return rgb

In [40]:
def remap(df):
    BDcolor,EFcolor,DOcolor={},{},{}
    seqpos=0
    min_BD,max_BD=min(df.backbone),max(df.backbone)
    min_DO,max_DO=min(df.disoMine),max(df.disoMine)
    min_EF,max_EF=min(df.earlyFolding),max(df.earlyFolding)
    
    for index, row in df.iterrows():
        seqpos+=1
        BDrescol=pseudocolor(min_BD,max_BD,float(row.backbone))
        DOrescol=pseudocolor(min_EF,max_EF,float(row.disoMine))
        EFrescol=pseudocolor(min_EF,max_EF,float(row.earlyFolding))
        BDcolor[seqpos]=BDrescol
        DOcolor[seqpos]=DOrescol
        EFcolor[seqpos]=EFrescol
    return BDcolor,EFcolor,DOcolor
        
        

In [106]:
def display_3D(dynamic_properties):
    BDcolor,EFcolor,DOcolor=remap(dynamic_properties)
    
    
    view = py3Dmol.view(viewergrid=(2,2))
    view.addModel(open((accession+'.pdb'), 'r').read(),'pdb')
   
    
    view.setStyle({'cartoon': {'colorscheme': {'prop':'b','gradient': 'rwb','min':0.0,'max':100.0}}},viewer=(0,0))
    view.setStyle({'cartoon': {'colorscheme':{'prop':'resi','map':BDcolor}}}, viewer=(0,1))
    view.setStyle({'cartoon': {'colorscheme':{'prop':'resi','map':DOcolor}}}, viewer=(1,0))
    view.setStyle({'cartoon': {'colorscheme':{'prop':'resi','map':EFcolor}}}, viewer=(1,1))
    
    view.setBackgroundColor('#e6e6e6',viewer=(0,0))
    view.setBackgroundColor('#e6e6e6',viewer=(0,1))        
    view.setBackgroundColor('#e6e6e6',viewer=(1,0))
    view.setBackgroundColor('#e6e6e6',viewer=(1,1))  

    view.zoomTo()
    view.render()
    return view

In [128]:
display_3D(dynamic_properties)