# Import python libraries

In [10]:
# JUPYTER SPECIALS (lines 1 and 2 are to auto-reload after doing any change in php_lib)
# %load_ext autoreload
# %autoreload 2
%matplotlib inline
import pandas as pd
from rdkit import Chem

# Transformation costs used in our tests

In [15]:
# Read substitution, insertion and deletion costs
mutationCosts = pd.read_csv('ergMutationCosts.csv', index_col=0)
insDelCosts = pd.read_csv('ergInsDelCosts.csv', index_col=0)
display(mutationCosts)
display(insDelCosts)

Unnamed: 0,[],[0],[1],[2],[3],[4],[5],[6],[7],"[0, 1]","[0, 2]","[0, 3]","[1, 2]","[1, 3]","[2, 3]","[0, 1, 2]",-,=
[],0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3
[0],2,0,2,2,2,2,2,2,2,1,1,1,2,2,2,1,2,3
[1],2,2,0,2,2,2,2,2,2,1,2,2,1,1,2,1,2,3
[2],2,2,2,0,2,2,2,2,2,2,1,2,1,2,1,1,2,3
[3],2,2,2,2,0,2,2,2,2,2,2,1,2,1,1,2,2,3
[4],2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,3
[5],2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,3
[6],2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,3
[7],2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,3
"[0, 1]",2,1,1,2,2,2,2,2,2,0,2,2,2,2,2,2,2,3


Unnamed: 0,insert,delete
[],0.5,0.5
[0],1.0,1.0
[1],1.0,1.0
[2],1.0,1.0
[3],1.0,1.0
[4],1.0,1.0
[5],1.0,1.0
[6],1.0,1.0
[7],0.5,0.5
"[0, 1]",1.0,1.0


# Node attributes legend

[]      -   non-carbon link  
[0]     -   donor  
[1]     -   acceptor  
[2]     -   posIonizable  
[3]     -   negIonizable  
[4]     -   hydrophobic  
[5]     -   aromatic  
[6]     -   aliphatic  
[7]     -   carbon link  
[0, 1]  -   donor + acceptor  
[0, 2]  -   donor + posIonizable  
[0, 3]  -   donor + negIonizable  
[1, 2]  -   acceptor + posIonizable  
[1, 3]  -   acceptor + negIonizable  
[2, 3]  -   posIonizable + negIonizable  
[0, 1, 2]-  donor + acceptor + posIonizable  

# Fixed transformation costs for reduced graphs proposed by Harper et al 2004

![](harper_01.png)
![](harper_02.png)

# Demonstration for how to read and draw a molecule from Skoda et al 2016 benchmarking tool 
## (recommended using 'landscape mode' on mobile devices for watching and handling 3D molecules)

In [6]:
dataset_name = '10.1021%2Fci200412p'
rootPath = '/home/cj/Downloads/dataset_test/lbvs-environment/data/datasets/' + dataset_name + '/molecules/sdf/'
target = 'ACM1_Agonist'
bind_type = 'Ligands'
sdfdir = rootPath + target + '_' + bind_type + '.sdf'
mols_ligs = [ Chem.RWMol(mol) for mol in Chem.SDMolSupplier(sdfdir) if mol != None ]
print("len(mols_ligs)", len(mols_ligs))
bind_type = 'Decoys'
sdfdir = rootPath + target + '_' + bind_type + '.sdf'
mols_decs = [ Chem.RWMol(mol) for mol in Chem.SDMolSupplier(sdfdir) if mol != None ]
print("len(mols_decs)", len(mols_decs))

len(mols_ligs) 806
len(mols_decs) 31433


In [8]:
# 3D drawing method
import py3Dmol
def drawit(m, width=400,height=400,p=None,confId=-1):
    mb = Chem.MolToMolBlock(m,confId=confId)
    if p is None:
        p = py3Dmol.view(width=width,height=height)
    p.removeAllModels()
    p.addModel(mb,'sdf')
    p.setStyle({'stick':{}})
    # p.setBackgroundColor('0xeeeeee')
    p.setBackgroundColor('0xffffff')
    p.zoomTo()
    return p.show()

print("Then the two winners are ligand mols_ligs[614]: ")
drawit(mols_ligs[614], width=600, height=600)
print("and decoy mols_decs[21948]: ")
drawit(mols_decs[21948], width=600, height=600)

Then the two winners are ligand mols_ligs[614]: 


and decoy mols_decs[21948]: 


# List of Available Fingerprints inside RDKit library

- **RDKit: ** 	a Daylight-like fingerprint based on hashing molecular subgraphs
- **Atom Pairs: ** 	JCICS 25:64-73 (1985) 
- **Topological Torsions: ** 	JCICS 27:82-5 (1987) 
- **MACCS keys: ** 	Using the 166 public keys implemented as SMARTS 
- **Morgan/Circular: ** 	Fingerprints based on the Morgan algorithm, similar to the ECFP/FCFP fingerprints JCIM 50:742-54 (2010).
- **2D Pharmacophore: ** 	Uses topological distances between pharmacophoric points.
- **Pattern: ** 	a topological fingerprint optimized for substructure screening 	
- **Extended Reduced Graphs: ** 	Derived from the ErG fingerprint published by Stiefl et al. in JCIM 46:208–20 (2006).   


Data obtained from:  
http://www.rdkit.org/docs/GettingStartedInPython.html  
check the link for more information.

# Partial results obtained

## Results obtained with 10 random selections datasets

dataset = 10.1021%2Fci200412p  
target = 'ACM1_Agonist'  
recognition ratio AUC = 8.5-9.0  
difficulty level = low

![](plots01.png)
![](means01.png)

dataset = 10.1021%2Fci200412p  
target = '5HT2C_Antagonist'  
recognition ratio AUC = 0.70-0.80  
difficulty level = medium-low

![](plots02.png)
![](means02.png)

dataset = 10.1021%2Fci200412p  
target = '5HT2B_Antagonist'  
recognition ratio AUC = 0.60-0.70  
difficulty level = medium-high

![](plots03.png)
![](means03.png)

dataset = 10.1186%2F1758-2946-5-26  
target = '466'  
recognition ratio AUC = 0.50-0.60  
difficulty level = high

![](plots04.png)
![](means04.png)
