# PDBe API Training

### PDBe Interactions

This tutorial will guide you through searching PDBe programmatically for finding macromolecular interactions.


First we will import the code which will do the work
Run the cell below - by pressing the green play button.

In [1]:
import pandas as pd
from pprint import pprint
import sys
sys.path.insert(0,'..')
from python_modules.api_modules import run_sequence_search, explode_dataset, get_macromolecule_interaction_data


In [2]:

uniprot_accession = "P22303"

Get macromolecules which interact with the UniProt

In [3]:
interaction_data = get_macromolecule_interaction_data(uniprot_accession=uniprot_accession)

https://www.ebi.ac.uk/pdbe/graph-api/uniprot/interface_residues/P22303


In [4]:
pprint(interaction_data)

[{'allPDBEntries': ['6ntk',
                    '6wvp',
                    '1vzj',
                    '6u37',
                    '6wuv',
                    '7rb5',
                    '6wuy',
                    '4m0e',
                    '6ntm',
                    '7p1n',
                    '8dt2',
                    '7d9p',
                    '4pqe',
                    '4ey4',
                    '7xn1',
                    '4ey5',
                    '6o66',
                    '6wvo',
                    '6wuz',
                    '6nth',
                    '7rb6',
                    '1b41',
                    '7e3d',
                    '6o5v',
                    '5hf6',
                    '6wv1',
                    '7p1p',
                    '6ntn',
                    '6o4w',
                    '6o5s',
                    '5hq3',
                    '6cqx',
                    '6cqv',
                    '6f25',
                    '6o4x',
                    

In [5]:
df2 = explode_dataset(result=interaction_data, column_to_explode='interactingPDBEntries')

In [6]:
df2

Unnamed: 0,startIndex,endIndex,startCode,endCode,indexType,interactingPDBEntries,allPDBEntries,interaction_accession,interaction_name,length,uniprot_accession,interaction_accession_type,interaction_ratio
0,56,56,PRO,PRO,UNIPROT,"{'pdbId': '7p1n', 'entityId': 2, 'chainIds': '...","[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986
1,56,56,PRO,PRO,UNIPROT,"{'pdbId': '7p1p', 'entityId': 2, 'chainIds': '...","[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986
2,74,74,GLY,GLY,UNIPROT,"{'pdbId': '7p1p', 'entityId': 2, 'chainIds': '...","[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986
3,74,74,GLY,GLY,UNIPROT,"{'pdbId': '7p1n', 'entityId': 2, 'chainIds': '...","[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986
4,75,75,PRO,PRO,UNIPROT,"{'pdbId': '7p1p', 'entityId': 2, 'chainIds': '...","[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1895,595,595,MET,MET,UNIPROT,"{'pdbId': '1vzj', 'entityId': 2, 'chainIds': '...",[1vzj],Q9Y215,Acetylcholinesterase collagenic tail peptide,614,P22303,UNP,1.000000
1896,598,598,TRP,TRP,UNIPROT,"{'pdbId': '1vzj', 'entityId': 2, 'chainIds': '...",[1vzj],Q9Y215,Acetylcholinesterase collagenic tail peptide,614,P22303,UNP,1.000000
1897,601,601,GLN,GLN,UNIPROT,"{'pdbId': '1vzj', 'entityId': 2, 'chainIds': '...",[1vzj],Q9Y215,Acetylcholinesterase collagenic tail peptide,614,P22303,UNP,1.000000
1898,602,602,PHE,PHE,UNIPROT,"{'pdbId': '1vzj', 'entityId': 2, 'chainIds': '...",[1vzj],Q9Y215,Acetylcholinesterase collagenic tail peptide,614,P22303,UNP,1.000000


Get all the macromolecules interacting with our protein

In [7]:
df2['interaction_accession'].unique()

array(['P22303', 'P0C1Z0', 'Q9Y215'], dtype=object)

Some post processing is required to separating interactingPDBEntries into separate columns

In [8]:
data = pd.json_normalize(df2['interactingPDBEntries'])
df3 = df2.join(data).drop(columns='interactingPDBEntries')

In [9]:
df3

Unnamed: 0,startIndex,endIndex,startCode,endCode,indexType,allPDBEntries,interaction_accession,interaction_name,length,uniprot_accession,interaction_accession_type,interaction_ratio,pdbId,entityId,chainIds
0,56,56,PRO,PRO,UNIPROT,"[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986,7p1n,2,"B,A"
1,56,56,PRO,PRO,UNIPROT,"[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986,7p1p,2,"B,A"
2,74,74,GLY,GLY,UNIPROT,"[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986,7p1p,2,"B,A"
3,74,74,GLY,GLY,UNIPROT,"[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986,7p1n,2,"B,A"
4,75,75,PRO,PRO,UNIPROT,"[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986,7p1p,2,"B,A"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1895,595,595,MET,MET,UNIPROT,[1vzj],Q9Y215,Acetylcholinesterase collagenic tail peptide,614,P22303,UNP,1.000000,1vzj,2,"J,I"
1896,598,598,TRP,TRP,UNIPROT,[1vzj],Q9Y215,Acetylcholinesterase collagenic tail peptide,614,P22303,UNP,1.000000,1vzj,2,"J,I"
1897,601,601,GLN,GLN,UNIPROT,[1vzj],Q9Y215,Acetylcholinesterase collagenic tail peptide,614,P22303,UNP,1.000000,1vzj,2,"J,I"
1898,602,602,PHE,PHE,UNIPROT,[1vzj],Q9Y215,Acetylcholinesterase collagenic tail peptide,614,P22303,UNP,1.000000,1vzj,2,"J,I"


In [10]:
df3['residue_number'] = df3['startIndex']
df3['count'] = df3['pdbId']


Now we are ready to use the data.

In [11]:
df3.head()

Unnamed: 0,startIndex,endIndex,startCode,endCode,indexType,allPDBEntries,interaction_accession,interaction_name,length,uniprot_accession,interaction_accession_type,interaction_ratio,pdbId,entityId,chainIds,residue_number,count
0,56,56,PRO,PRO,UNIPROT,"[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986,7p1n,2,"B,A",56,7p1n
1,56,56,PRO,PRO,UNIPROT,"[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986,7p1p,2,"B,A",56,7p1p
2,74,74,GLY,GLY,UNIPROT,"[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986,7p1p,2,"B,A",74,7p1p
3,74,74,GLY,GLY,UNIPROT,"[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986,7p1n,2,"B,A",74,7p1n
4,75,75,PRO,PRO,UNIPROT,"[6ntk, 6wvp, 1vzj, 6u37, 6wuv, 7rb5, 6wuy, 4m0...",P22303,Acetylcholinesterase,614,P22303,UNP,0.028986,7p1p,2,"B,A",75,7p1p
