# PLIP
#### Let's try to use PLIP with DOB

**Result table from the [#1892](https://github.com/opentargets/platform/issues/1892) ticket**

|MOLECULE_CHEMBL_ID|MOLECULE_PDB_ID|        STRUCTURE_ID|
|-------------------------|---------------------|--------------------|
|      CHEMBL313405        |            ABZ                 | [1s9g, 6qbb]           |
|      CHEMBL334167        |            SG2                 | [1kwr]                       |
|      CHEMBL328910        |            DOB                | [1dod, 1doe, 1pbb]  |
|        CHEMBL1364           |            UEG                | [4c5l, 4c5n, 5eb3...|
|         CHEMBL614            |            PZA                 | [3r4x, 3r55, 5fpd... |
|      CHEMBL295698        |            KLN                 | [2jjp, 2v0m]             |
|        CHEMBL1427            |            HPA                | [1a9q, 1a9r, 1a9t...  |

DOB is:
|MOLECULE_CHEMBL_ID|MOLECULE_PDB_ID|        STRUCTURE_ID|
|-------------------------|---------------------|--------------------|
|      CHEMBL328910        |            DOB                | [1dod, 1doe, 1pbb]  |


#### Let's import PLIP and it's packages

In [65]:
from pathlib import Path
import time
import warnings

warnings.filterwarnings("ignore")

import pandas as pd
import nglview as nv
import openbabel
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
from plip.structure.preparation import PDBComplex
from plip.exchange.report import BindingSiteReport

from opencadd.structure.core import Structure

Small code just to find the molecules in interaction with the structure

In [66]:
struct_of_interest = '1DOD'
molecule_of_interest = 'DOB'

path = '/Users/marinegirardey/Documents/opentarget_internship/OpenTargetInternship/structure_files/' + struct_of_interest +'.pdb'
structure = PDBComplex()
structure.load_pdb(path) # Load the PDB file into PLIP class

mol_interacting_list = str(structure).split('\n')
mol_interacting_list.pop(0)

for molecule in mol_interacting_list:
    molecule_name = molecule.split(':')[0]
    if molecule_name == molecule_of_interest:
        my_bs_id = molecule # Shows name of structure and ligand binding sites
        print(my_bs_id) # Unique binding site identifier (HetID:Chain:Position)

structure.analyze()
my_interactions = structure.interaction_sets[my_bs_id] # Contains all interaction data

DOB:X:396


In [69]:
pdb_file = Structure.from_pdbid(struct_of_interest)

In [70]:
pdb_file.write(path)

In [74]:
ngl_viewer = nv.show_pdbid(struct_of_interest)
# Add the ligands
ngl_viewer.add_representation(repr_type="ball+stick", selection="hetero and not water")
# Center view on binding site
ngl_viewer.center("ligand")

In [81]:
ngl_viewer

NGLWidget(n_components=1)

Function found on [this site](https://projects.volkamerlab.org/teachopencadd/talktorials/T016_protein_ligand_interactions.html) creating a dictionaries with interctions of structure with each molecule 

In [83]:
def retrieve_plip_interactions(pdb_file):
    """
    Retrieves the interactions from PLIP.

    Parameters
    ----------
    pdb_file :
        The PDB file of the complex.

    Returns
    -------
    dict :
        A dictionary of the binding sites and the interactions.
    """
    protlig = PDBComplex()
    protlig.load_pdb(pdb_file)  # load the pdb file
    for ligand in protlig.ligands:
        protlig.characterize_complex(ligand)  # find ligands and analyze interactions
    sites = {}
    # loop over binding sites
    for key, site in sorted(protlig.interaction_sets.items()):
        binding_site = BindingSiteReport(site)  # collect data about interactions
        # tuples of *_features and *_info will be converted to pandas DataFrame
        keys = (
            "hydrophobic",
            "hbond",
            "waterbridge",
            "saltbridge",
            "pistacking",
            "pication",
            "halogen",
            "metal",
        )
        # interactions is a dictionary which contains relevant information for each
        # of the possible interactions: hydrophobic, hbond, etc. in the considered
        # binding site. Each interaction contains a list with
        # 1. the features of that interaction, e.g. for hydrophobic:
        # ('RESNR', 'RESTYPE', ..., 'LIGCOO', 'PROTCOO')
        # 2. information for each of these features, e.g. for hydrophobic
        # (residue nb, residue type,..., ligand atom 3D coord., protein atom 3D coord.)
        interactions = {
            k: [getattr(binding_site, k + "_features")] + getattr(binding_site, k + "_info")
            for k in keys
        }
        sites[key] = interactions
    return sites

In [84]:
interactions_by_site = retrieve_plip_interactions(path)

In [87]:
print(
    f"Number of binding sites detected in {struct_of_interest} : "
    f"{len(interactions_by_site)}\n"
    f"with {interactions_by_site.keys()}"
)
# NBVAL_CHECK_OUTPUT

Number of binding sites detected in 1DOD : 2
with dict_keys(['DOB:X:396', 'FAD:X:395'])


In [88]:
index_of_selected_site = 0
selected_site = list(interactions_by_site.keys())[index_of_selected_site]
print(selected_site)

DOB:X:396


In [89]:
def create_df_from_binding_site(selected_site_interactions, interaction_type="hbond"):
    """
    Creates a data frame from a binding site and interaction type.

    Parameters
    ----------
    selected_site_interactions : dict
        Precaluclated interactions from PLIP for the selected site
    interaction_type : str
        The interaction type of interest (default set to hydrogen bond).

    Returns
    -------
    pd.DataFrame :
        DataFrame with information retrieved from PLIP.
    """

    # check if interaction type is valid:
    valid_types = [
        "hydrophobic",
        "hbond",
        "waterbridge",
        "saltbridge",
        "pistacking",
        "pication",
        "halogen",
        "metal",
    ]

    if interaction_type not in valid_types:
        print("!!! Wrong interaction type specified. Hbond is chosen by default!!!\n")
        interaction_type = "hbond"

    df = pd.DataFrame.from_records(
        # data is stored AFTER the column names
        selected_site_interactions[interaction_type][1:],
        # column names are always the first element
        columns=selected_site_interactions[interaction_type][0],
    )
    return df

In [90]:
create_df_from_binding_site(interactions_by_site[selected_site], interaction_type="hydrophobic")

Unnamed: 0,RESNR,RESTYPE,RESCHAIN,RESNR_LIG,RESTYPE_LIG,RESCHAIN_LIG,DIST,LIGCARBONIDX,PROTCARBONIDX,LIGCOO,PROTCOO
0,185,TRP,X,396,DOB,X,3.78,3183,1446,"(18.417, 93.341, 46.169)","(21.791, 92.701, 44.596)"
1,210,LEU,X,396,DOB,X,3.95,3185,1638,"(16.259, 93.274, 45.09)","(18.937, 93.234, 42.182)"
2,222,TYR,X,396,DOB,X,3.95,3181,1738,"(17.326, 95.447, 45.546)","(20.54, 96.847, 43.732)"
3,47,VAL,X,396,DOB,X,3.85,3186,347,"(16.265, 94.688, 45.039)","(12.941, 94.178, 46.919)"
