# PDBe API Training

### PDBe Interactions

This tutorial will guide you through searching PDBe programmatically.


First we will import the code which will do the work
Run the cell below - by pressing the green play button.

In [1]:
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
from IPython.display import SVG, display
import sys
sys.path.insert(0,'..')
from python_modules.api_modules import run_sequence_search, explode_dataset, get_macromolecule_interaction_data


Now we are ready to actually run the sequence search we did in the last module

We will search for a sequence with an example sequence from UniProt P24941 -
Cyclin-dependent kinase 2

In [2]:
sequence_to_search = """
MEDAKNIKKGPAPFYPLEDGTAGEQLHKAMKRYALVPGTIAFTDAHIEVNITYAEYFEMS
VRLAEAMKRYGLNTNHRIVVCSENSLQFFMPVLGALFIGVAVAPANDIYNERELLNSMNI
SQPTVVFVSKKGLQKILNVQKKLPIIQKIIIMDSKTDYQGFQSMYTFVTSHLPPGFNEYD
FVPESFDRDKTIALIMNSSGSTGLPKGVALPHRTACVRFSHARDPIFGNQIIPDTAILSV
VPFHHGFGMFTTLGYLICGFRVVLMYRFEEELFLR
SLQDYKIQSALLVPTLFSFFAKSTL
IDKYDLSNLHEIASGGAPLSKEVGEAVAKRFHLPGIRQGYGLTETTSAILITPEGDDKPG
AVGKVVPFFEAKVVDLDTGKTLGVNQRGELCVRGPMIMSGYVNNPEATNALIDKDGWLHS
GDIAYWDEDEHFFIVDRLKSLIKYKGYQVAPAELESILLQHPNIFDAGVAGLPDDDAGEL
PAAVVVLEHGKTMTEKEIVDYVASQVTTAKKLRGGVVFVDEVPKGLTGKLDARKIREILI
KAKKGGKSKL
"""
filter_list = ['pfam_accession', 'pdb_id', 'molecule_name', 'ec_number',
               'uniprot_accession_best', 'tax_id']

search_results = run_sequence_search(sequence_to_search,
                                     filter_terms=filter_list,
                                     number_of_rows=1000
                                     )

Number of results 222


Load the data into a Dataframe

In [3]:
df = explode_dataset(search_results)
df = df.query('percentage_identity > 80')
group_by_uniprot = df.groupby('uniprot_accession_best').count().sort_values('pdb_id', ascending=False)

How many UniProt accessions were there?

In [4]:
len(group_by_uniprot)

2

lets look at the data to see what we have

In [5]:
group_by_uniprot.head()

Unnamed: 0_level_0,chain_id,ec_number,entity_id,entry_entity,molecule_name,pdb_id,pfam_accession,tax_id,e_value,percentage_identity,result_sequence
uniprot_accession_best,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
P08659,48,48,48,48,48,48,46,48,48,48,0
Q5UFR2,4,4,4,4,4,4,4,4,4,4,0


get the first UniProt from the results

In [8]:
uniprot_accession = df['uniprot_accession_best'].iloc[0]

'P07550'

In [2]:
uniprot_accession = 'P07550'
uniprot_accession


'P07550'

Get macromolecules which interact with the UniProt

In [3]:
interaction_data = get_macromolecule_interaction_data(uniprot_accession=uniprot_accession)
df2 = explode_dataset(interaction_data)

https://www.ebi.ac.uk/pdbe/graph-api/uniprot/interface_residues/P07550


In [4]:
df2

Unnamed: 0,startIndex,endIndex,startCode,endCode,indexType,interactingPDBEntries,allPDBEntries,interaction_accession,interaction_name,length,uniprot_accession,interaction_accession_type,interacting_pdb_entries,interaction_ratio
0,63,63,ARG,ARG,UNIPROT,"{'pdbId': '5jqh', 'entityId': 2}",4qkx,IG-heavy chain,Camelid Antibody Fragment,,P07550,AB,"{'pdbId': '5jqh', 'entityId': 2}",0.125
1,63,63,ARG,ARG,UNIPROT,"{'pdbId': '5jqh', 'entityId': 2}",3p0g,IG-heavy chain,Camelid Antibody Fragment,,P07550,AB,"{'pdbId': '5jqh', 'entityId': 2}",0.125
2,63,63,ARG,ARG,UNIPROT,"{'pdbId': '5jqh', 'entityId': 2}",4ldo,IG-heavy chain,Camelid Antibody Fragment,,P07550,AB,"{'pdbId': '5jqh', 'entityId': 2}",0.125
3,63,63,ARG,ARG,UNIPROT,"{'pdbId': '5jqh', 'entityId': 2}",6n48,IG-heavy chain,Camelid Antibody Fragment,,P07550,AB,"{'pdbId': '5jqh', 'entityId': 2}",0.125
4,63,63,ARG,ARG,UNIPROT,"{'pdbId': '5jqh', 'entityId': 2}",4lde,IG-heavy chain,Camelid Antibody Fragment,,P07550,AB,"{'pdbId': '5jqh', 'entityId': 2}",0.125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1683,225,225,GLU,GLU,UNIPROT,"{'pdbId': '6ni3', 'entityId': 1, 'chainIds': 'A'}",6ni3,P63092,Guanine nucleotide-binding protein G(s) subuni...,,P07550,UNP,"{'pdbId': '6ni3', 'entityId': 1, 'chainIds': 'A'}",1.000
1684,229,229,GLN,GLN,UNIPROT,"{'pdbId': '6ni3', 'entityId': 1, 'chainIds': 'A'}",6ni3,P63092,Guanine nucleotide-binding protein G(s) subuni...,,P07550,UNP,"{'pdbId': '6ni3', 'entityId': 1, 'chainIds': 'A'}",1.000
1685,232,232,LYS,LYS,UNIPROT,"{'pdbId': '6ni3', 'entityId': 1, 'chainIds': 'A'}",6ni3,P63092,Guanine nucleotide-binding protein G(s) subuni...,,P07550,UNP,"{'pdbId': '6ni3', 'entityId': 1, 'chainIds': 'A'}",1.000
1686,239,239,ARG,ARG,UNIPROT,"{'pdbId': '6ni3', 'entityId': 1, 'chainIds': 'A'}",6ni3,P63092,Guanine nucleotide-binding protein G(s) subuni...,,P07550,UNP,"{'pdbId': '6ni3', 'entityId': 1, 'chainIds': 'A'}",1.000
