2

In [11]:
from plip.structure.preparation import PDBComplex
import requests
import pandas as pd


def fetch_pdb(pdb_structure_id: str)-> str:
    """This function fetches the pdb file from ePDB server as a string
    
    Args:
        pdb_structure_id (str)
    Returns:
        structure data in pdb format as string
    """
    if not pdb_structure_id:
        return ''
    
    url = f'https://www.ebi.ac.uk/pdbe/entry-files/download/pdb{pdb_structure_id}.ent'
    
    try:
        response = requests.get(url)
        data = response.text
    except ConnectionError:
        data = ''
        
    return data

# my_mol = PDBComplex()
# my_mol.load_pdb('/tmp/1EVE.pdb') # Load the PDB file into PLIP class
# print(my_mol) # Shows name of structure and ligand binding sites
# my_bsid = 'E20:A:2001' # Unique binding site identifier (HetID:Chain:Position)
# my_mol.analyze()
# my_interactions = my_mol.interaction_sets[my_bsid] # Contains all interaction data

# # Now print numbers of all residues taking part in pi-stacking
# print([pistack.resnr for pistack in my_interactions.pistacking]) # Prints [84, 129]

In [57]:
pdb_structure_id = '1ed4'
pdb_compound_id = 'IPU'

# Columns of interest:
interaction_columns = {
    'a_orig_idx': 'acceptor_index',
    'd_orig_idx': 'donor_index',
    'type': 'type',
    'resnr': 'residue_number',
    'restype': 'resitude_type', 
    'reschain': 'residue_chain',
    'restype_l': 'pdb_compound_id'
}

# Managing complex:
mol_complex = PDBComplex()

# Fetch and load structure:
pdb_string = fetch_pdb(pdb_structure_id)
mol_complex.load_pdb(pdb_string, as_string=True)

# Filtering out only the relevant ligands:
ligands_of_interest = [ligand for ligand in mol_complex.ligands if ligand.hetid == pdb_compound_id ]

# Characterizing relevant complex:
[mol_complex.characterize_complex(ligand) for ligand in ligands_of_interest]

# Extract data in the right shape:
(
    pd.DataFrame([x._asdict() for x in mol_complex.interaction_sets['AIN:A:1202'].all_itypes])
    [interaction_columns.keys()]
    .rename(columns=interaction_columns)
)

KeyError: 'AIN:A:1202'

In [88]:
interactions = []
for molecule, interaction_set in mol_complex.interaction_sets.items():
    if pdb_compound_id not in molecule:
        continue

    interactions += [interaction._asdict() for interaction in interaction_set.all_itypes]

df = (
    pd.DataFrame(interactions)
#     [interaction_columns.keys()]
#     .rename(columns=interaction_columns)
)


In [89]:
df.columns

Index(['a', 'a_orig_idx', 'd', 'd_orig_idx', 'h', 'distance_ah', 'distance_ad',
       'angle', 'type', 'protisdon', 'resnr', 'restype', 'reschain', 'resnr_l',
       'restype_l', 'reschain_l', 'sidechain', 'atype', 'dtype', 'bsatom',
       'bsatom_orig_idx', 'ligatom', 'ligatom_orig_idx', 'distance', 'water',
       'water_orig_idx', 'distance_aw', 'distance_dw', 'd_angle', 'w_angle'],
      dtype='object')

In [92]:
mol_complex.interaction_sets['IPU:A:1830'].hbonds_ldon #all_itypes[0]._asdict()

[hbond(a=<openbabel.pybel.Atom object at 0x168229700>, a_orig_idx=2354, d=<openbabel.pybel.Atom object at 0x16866afd0>, d_orig_idx=6654, h=<openbabel.pybel.Atom object at 0x16866edf0>, distance_ah=1.7314022722175801, distance_ad=2.714410801628963, angle=176.04977730747, type='strong', protisdon=False, resnr=363, restype='GLU', reschain='A', resnr_l=1830, restype_l='IPU', reschain_l='A', sidechain=True, atype='O2', dtype='Ng+')]

str