# PDBe API Training

### PDBe Interactions

This tutorial will guide you through searching PDBe programmatically.


First we will import the code which will do the work
Run the cell below - by pressing the green play button.

In [1]:
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
from IPython.display import SVG, display
from pprint import pprint
import sys
sys.path.insert(0,'..')
from python_modules.api_modules import run_sequence_search, explode_dataset, get_macromolecule_interaction_data


Now we are ready to actually run the sequence search we did in the last module

We will search for a sequence with an example sequence from UniProt P24941 -
Cyclin-dependent kinase 2

In [2]:
sequence_to_search = """
MEDAKNIKKGPAPFYPLEDGTAGEQLHKAMKRYALVPGTIAFTDAHIEVNITYAEYFEMS
VRLAEAMKRYGLNTNHRIVVCSENSLQFFMPVLGALFIGVAVAPANDIYNERELLNSMNI
SQPTVVFVSKKGLQKILNVQKKLPIIQKIIIMDSKTDYQGFQSMYTFVTSHLPPGFNEYD
FVPESFDRDKTIALIMNSSGSTGLPKGVALPHRTACVRFSHARDPIFGNQIIPDTAILSV
VPFHHGFGMFTTLGYLICGFRVVLMYRFEEELFLR
SLQDYKIQSALLVPTLFSFFAKSTL
IDKYDLSNLHEIASGGAPLSKEVGEAVAKRFHLPGIRQGYGLTETTSAILITPEGDDKPG
AVGKVVPFFEAKVVDLDTGKTLGVNQRGELCVRGPMIMSGYVNNPEATNALIDKDGWLHS
GDIAYWDEDEHFFIVDRLKSLIKYKGYQVAPAELESILLQHPNIFDAGVAGLPDDDAGEL
PAAVVVLEHGKTMTEKEIVDYVASQVTTAKKLRGGVVFVDEVPKGLTGKLDARKIREILI
KAKKGGKSKL
"""
filter_list = ['pfam_accession', 'pdb_id', 'molecule_name', 'ec_number',
               'uniprot_accession_best', 'tax_id']

search_results = run_sequence_search(sequence_to_search,
                                     filter_terms=filter_list,
                                     number_of_rows=1000
                                     )

Number of results 315


Load the data into a Dataframe

In [3]:
df = explode_dataset(search_results)
df = df.query('percentage_identity > 80')
group_by_uniprot = df.groupby('uniprot_accession_best').count().sort_values('pdb_id', ascending=False)

How many UniProt accessions were there?

In [4]:
len(group_by_uniprot)

2

lets look at the data to see what we have

In [5]:
group_by_uniprot.head()

Unnamed: 0_level_0,chain_id,ec_number,entity_id,entry_entity,molecule_name,pdb_id,pfam_accession,tax_id,e_value,percentage_identity,result_sequence
uniprot_accession_best,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
P08659,48,48,48,48,48,48,46,48,48,48,0
Q5UFR2,4,4,4,4,4,4,4,4,4,4,0


get the first UniProt from the results

In [6]:
uniprot_accession = df['uniprot_accession_best'].iloc[0]

In [7]:
uniprot_accession

uniprot_accession = "P68871"

Get macromolecules which interact with the UniProt

In [8]:
interaction_data = get_macromolecule_interaction_data(uniprot_accession=uniprot_accession)

https://www.ebi.ac.uk/pdbe/graph-api/uniprot/interface_residues/P68871


In [9]:
pprint(interaction_data[0])

{'allPDBEntries': ['3d17',
                   '1yff',
                   '2dn3',
                   '6kav',
                   '4mqi',
                   '5wog',
                   '1lfy',
                   '1nej',
                   '7jy0',
                   '3r5i',
                   '1cbl',
                   '4mqg',
                   '1mko',
                   '3ic2',
                   '6xe7',
                   '5vmm',
                   '1lfq',
                   '1bbb',
                   '4ni0',
                   '6xdt',
                   '1rvw',
                   '4n7n',
                   '7jy3',
                   '1aj9',
                   '7jjq',
                   '1cbm',
                   '1yzi',
                   '1m9p',
                   '6fqf',
                   '1sdl',
                   '4ij2',
                   '6l5y',
                   '5e6e',
                   '6bnr',
                   '1lft',
                   '7jxz',
                   '5e83',
 

In [10]:
df2 = explode_dataset(result=interaction_data, column_to_explode='interactingPDBEntries')

In [11]:
df2

Unnamed: 0,startIndex,endIndex,startCode,endCode,indexType,interactingPDBEntries,allPDBEntries,interaction_accession,interaction_name,length,uniprot_accession,interaction_accession_type,interaction_ratio
0,2,2,VAL,VAL,UNIPROT,"{'pdbId': '5e6e', 'entityId': 2, 'chainIds': 'B'}","[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48
1,2,2,VAL,VAL,UNIPROT,"{'pdbId': '1yff', 'entityId': 2, 'chainIds': '...","[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48
2,2,2,VAL,VAL,UNIPROT,"{'pdbId': '4n7n', 'entityId': 2, 'chainIds': '...","[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48
3,2,2,VAL,VAL,UNIPROT,"{'pdbId': '5woh', 'entityId': 2, 'chainIds': '...","[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48
4,2,2,VAL,VAL,UNIPROT,"{'pdbId': '1mko', 'entityId': 2, 'chainIds': '...","[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3209,45,45,SER,SER,UNIPROT,"{'pdbId': '5jdo', 'entityId': 1, 'chainIds': 'A'}",[5jdo],G0UVW6,Uncharacterized protein,147,P68871,UNP,1.00
3210,91,91,GLU,GLU,UNIPROT,"{'pdbId': '5jdo', 'entityId': 1, 'chainIds': 'A'}",[5jdo],G0UVW6,Uncharacterized protein,147,P68871,UNP,1.00
3211,96,96,LYS,LYS,UNIPROT,"{'pdbId': '5jdo', 'entityId': 1, 'chainIds': 'A'}",[5jdo],G0UVW6,Uncharacterized protein,147,P68871,UNP,2.00
3212,96,96,LYS,LYS,UNIPROT,"{'pdbId': '5jdo', 'entityId': 2, 'chainIds': 'B'}",[5jdo],G0UVW6,Uncharacterized protein,147,P68871,UNP,2.00


Some post processing is required to separating interactingPDBEntries into separate columns

In [12]:
data = pd.json_normalize(df2['interactingPDBEntries'])
df3 = df2.join(data).drop(columns='interactingPDBEntries')

In [13]:
df3

Unnamed: 0,startIndex,endIndex,startCode,endCode,indexType,allPDBEntries,interaction_accession,interaction_name,length,uniprot_accession,interaction_accession_type,interaction_ratio,pdbId,entityId,chainIds
0,2,2,VAL,VAL,UNIPROT,"[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48,5e6e,2,B
1,2,2,VAL,VAL,UNIPROT,"[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48,1yff,2,"F,H,B,D"
2,2,2,VAL,VAL,UNIPROT,"[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48,4n7n,2,"F,J,H,B,L,D"
3,2,2,VAL,VAL,UNIPROT,"[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48,5woh,2,"B,D"
4,2,2,VAL,VAL,UNIPROT,"[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48,1mko,2,"B,D"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3209,45,45,SER,SER,UNIPROT,[5jdo],G0UVW6,Uncharacterized protein,147,P68871,UNP,1.00,5jdo,1,A
3210,91,91,GLU,GLU,UNIPROT,[5jdo],G0UVW6,Uncharacterized protein,147,P68871,UNP,1.00,5jdo,1,A
3211,96,96,LYS,LYS,UNIPROT,[5jdo],G0UVW6,Uncharacterized protein,147,P68871,UNP,2.00,5jdo,1,A
3212,96,96,LYS,LYS,UNIPROT,[5jdo],G0UVW6,Uncharacterized protein,147,P68871,UNP,2.00,5jdo,2,B


In [14]:
df3['residue_number'] = df3['startIndex']
df3['count'] = df3['pdbId']


Now we are ready to use the data.

In [15]:
df3.head()

Unnamed: 0,startIndex,endIndex,startCode,endCode,indexType,allPDBEntries,interaction_accession,interaction_name,length,uniprot_accession,interaction_accession_type,interaction_ratio,pdbId,entityId,chainIds,residue_number,count
0,2,2,VAL,VAL,UNIPROT,"[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48,5e6e,2,B,2,5e6e
1,2,2,VAL,VAL,UNIPROT,"[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48,1yff,2,"F,H,B,D",2,1yff
2,2,2,VAL,VAL,UNIPROT,"[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48,4n7n,2,"F,J,H,B,L,D",2,4n7n
3,2,2,VAL,VAL,UNIPROT,"[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48,5woh,2,"B,D",2,5woh
4,2,2,VAL,VAL,UNIPROT,"[3d17, 1yff, 2dn3, 6kav, 4mqi, 5wog, 1lfy, 1ne...",P68871,Hemoglobin subunit beta,147,P68871,UNP,0.48,1mko,2,"B,D",2,1mko
