In [1]:
%matplotlib inline

import os
import requests
import pandas
from matplotlib import pyplot

# Try import inline 3D structure viewer if installed
viewer_enabled = True
try:
    import py3Dmol
except ImportError:
    viewer_enabled = False

SERVICE_BASE_URL = 'http://mdstudio-smartcyp.dev.openrisknet.org/'
SERVICE_BASE_URL = 'http://localhost:8081'

# Performing structure and reactivity based CYP SOM prediction

MDStudio SMARTCyp supports nearly all configuration options for PLANTS in two convenient endpoints. A flexible PLANTS docking workflow requires the combined use of two endpoints:

1. The 'plants_docking' endpoint performs the actual docking with support for most of the configuration options of  
   PLANTS. The endpoint returns the docking results as JSON file. This is the content of the PLANTS features.csv or 
   alternatively the ranking.csv file. The results have the 'PATH' column added that provides a unique pointer to the
   docking poses stored on the server for a set time, by default a maximum of 1 hour before the results are deleted.
   In addition, a structure based clustering of the docking poses is performed. The obtained cluster numbers are 
   listed in the 'CLUSTER' column.
2. Based on the results from step 1 one or multiple (selection) of docking poses can be retrieved as multi-structure    MOL2 file using the 'plants_docking_structures' endpoint using the unique pointers to the poses in the PATH 
   column. The same procedure can be used to redo the clustering for all of the solutions or a selection using the
   'plants_docking_statistics' endpoint

### Running a simple PLANTS docking

Starting of simple with step 1, uploading required ligand and protein structure files in MOL2 format and a target binding site center coordinate. The result JSON object is loaded into a Pandas DataFrame ready for analysis and friendly visualization.

In [2]:
files = {'ligand_file': open(os.path.join('files/ligand.mol2'), 'rb')}
data = {'cyp': '3A4'}

response = requests.post('{0}/som_prediction'.format(SERVICE_BASE_URL), files=files, data=data)
response_json = response.json() 

response_df = pandas.DataFrame.from_dict(response_json, orient='index')
pandas.options.display.max_columns = None 
display(response_df.T)

Unnamed: 0,C.10,C.11,C.13,C.2,C.3,C.4,C.5,C.6,C.8,C.9,N.12,O.1,S.7
Docking,0.9,1.0,0.75,0.0,0.0,0.6,0.6,0.0,0.0,0.6,0.9,0.0,0.2
SMARTCyp,0.921657,0.917123,0.914811,0.967875,0.002091,0.928713,0.927637,0.0,0.001015,0.92044,0.93205,0.005649,0.936392
docking-j52cdi2z/_entry_00001_conf_02.mol2,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
docking-j52cdi2z/_entry_00001_conf_03.mol2,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
docking-j52cdi2z/_entry_00001_conf_04.mol2,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
docking-j52cdi2z/_entry_00001_conf_05.mol2,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
docking-j52cdi2z/_entry_00001_conf_07.mol2,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
docking-j52cdi2z/_entry_00001_conf_08.mol2,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
docking-j52cdi2z/_entry_00001_conf_10.mol2,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
docking-j52cdi2z/_entry_00001_conf_11.mol2,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


### Retrieving docking poses for cluster 1

Selects all poses belonging to cluster 1 from the previous obtained DataFrame and upload them to the 'plants_docking_structures' endpoint. The endpoint returns a multi-structure (combined) MOL2 file containing all the ligand poses.

If the inline 3D viewer is installed, the example below splits the multi-structure file into individual ligand poses and displays them together with the protein strurcture.

In [9]:
f = response_df.sort_values('Docking', ascending=False).head(3)
fs = f.sum()
poses = [p for p in fs[f.sum() >= 2].index if p.startswith('docking')]

protein_pose = requests.post('{0}/plants_docking_structures'.format(SERVICE_BASE_URL), 
                         data={'paths': poses[0], 'output_format': 'pdb', 'include_protein': True})
ligand_poses = requests.post('{0}/plants_docking_structures'.format(SERVICE_BASE_URL), 
                         data={'paths': poses[1:], 'output_format': 'mol2', 'include_protein': False})

protein_pose_mol = protein_pose.text
ligand_poses_mol = ligand_poses.text

# Display ligand cluster and protein structure
if viewer_enabled:
    mols = []
    for mol in ligand_poses_mol.split('@<TRIPOS>MOLECULE\n'):
        if len(mol):
            mols.append('@<TRIPOS>MOLECULE\n{0}'.format(mol))
    
    view = py3Dmol.view()
    view.addModel(protein_pose_mol,'pdb')
    view.setStyle({'cartoon':{'color': 'spectrum'}}) 
    for mol in mols:
        view.addModel(mol,'mol2')
    
    view.setStyle({'stick':{}})
    display(view)

else:
    response_mol

<py3Dmol.view at 0x110bf5860>