## Compute lipinsky descriptors for a Database

# Import libraries

In [1]:
import pandas as pd
import numpy as  np

from rdkit import Chem
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw, Descriptors

# RDKit

Cheminformatics toolkits (software development kits) that allow  develop custom computer applications. (Compute Descriptors, FP, visualize chemical structures)

Documentation:http://rdkit.org/docs/index.html

Jupyter Notebooks: https://github.com/rdkit/rdkit-tutorials

# Open Database

In [2]:
Data = pd.read_csv("Database_CABANA.csv", sep = ",")
#Data.head()

In [3]:
#Count elements
#Data.shape[0]

In [4]:
#Identify features
list(Data.columns)

['Unnamed: 0', 'Name', 'SMILES', 'Library']

# Compute descriptors

In [5]:
"""Select SMILES"""
smiles = list(Data["SMILES"])
#smiles[0]

In [6]:
"""Convert SMILES TO RDKit Format"""
#create an empty list to storage results
sm = list()

for i in smiles:
    sm.append(Chem.MolFromSmiles(i)) 

In [7]:
#create an empty list to storage results
HBA = list()
HBD = list()
RB = list()
LogP = list()
TPSA = list()
MW = list()
HeavyAtom = list()
RingCount = list()
FractionCSP3 = list()

In [8]:
#compute descriptors for each smile in sm
for i in sm:
    HBA.append(Descriptors.NumHAcceptors(i))
    HBD.append(Descriptors.NumHDonors(i))
    RB.append(Descriptors.NumRotatableBonds(i))
    LogP.append(Descriptors.MolLogP(i))
    TPSA.append(Descriptors.TPSA(i))
    MW.append(Descriptors.MolWt(i))
print("calcule descriptores")

calcule descriptores


In [9]:
#explore list elements
HBA[0:5]

[3, 10, 6, 2, 9]

In [10]:
#storage in a DataFrame
columns = ["SMILES", "HBA", "HBD", "RB", "LogP", "TPSA", "MW"]
data = [smiles, HBA, HBD, RB, LogP, TPSA, MW]
data = np.transpose(data, axes=None)
Database = pd.DataFrame(
            data = data,
            columns = columns)
print(Database.head())

                                              SMILES HBA HBD  RB  \
0                     CC(=O)Nc1cc(ccc1O)[As](=O)(O)O   3   4   2   
1  CC[P]([Au]SC1OC(COC(=O)C)C(C(C1OC(=O)C)OC(=O)C...  10   0  11   
2     CC(CC(B(O)O)NC(=O)C(NC(=O)c1cnccn1)Cc1ccccc1)C   6   4   9   
3                               COC(C[Hg]Cl)CNC(=O)N   2   2   5   
4   O=C(c1ccccn1)O[Cr](OC(=O)c1ccccn1)OC(=O)c1ccccn1   9   0   6   

                  LogP                TPSA                  MW  
0  -1.0884000000000005  106.86000000000001             275.092  
1   2.7925000000000004              114.43   678.4910000000001  
2   0.3606000000000009              124.44   384.2450000000001  
3   0.3243000000000002               64.35  367.19800000000015  
4               2.1056  117.57000000000001   418.3050000000001  


In [11]:
"""Merge Results"""
#Merge Database results with Name and Library (From input file)
Database["Name"] = Data["Name"]
Database["Library"] = Data["Library"]
Database.head()

Unnamed: 0,SMILES,HBA,HBD,RB,LogP,TPSA,MW,Name,Library
0,CC(=O)Nc1cc(ccc1O)[As](=O)(O)O,3,4,2,-1.0884000000000005,106.86,275.092,Acetarsol,FDA
1,CC[P]([Au]SC1OC(COC(=O)C)C(C(C1OC(=O)C)OC(=O)C...,10,0,11,2.7925000000000004,114.43,678.4910000000001,Auranofin,FDA
2,CC(CC(B(O)O)NC(=O)C(NC(=O)c1cnccn1)Cc1ccccc1)C,6,4,9,0.3606000000000009,124.44,384.2450000000001,Bortezomib,FDA
3,COC(C[Hg]Cl)CNC(=O)N,2,2,5,0.3243000000000002,64.35,367.19800000000015,Chlormerodrin,FDA
4,O=C(c1ccccn1)O[Cr](OC(=O)c1ccccn1)OC(=O)c1ccccn1,9,0,6,2.1056,117.57,418.3050000000001,Chromium picolinate,FDA


# Save Results 

In [12]:
"""Save Results as .csv File"""
Database.to_csv("Database_descriptors.csv", sep = ",")

In [13]:
#verify resutlt
d = pd.read_csv("Database_descriptors.csv")
d.head()

Unnamed: 0.1,Unnamed: 0,SMILES,HBA,HBD,RB,LogP,TPSA,MW,Name,Library
0,0,CC(=O)Nc1cc(ccc1O)[As](=O)(O)O,3,4,2,-1.0884,106.86,275.092,Acetarsol,FDA
1,1,CC[P]([Au]SC1OC(COC(=O)C)C(C(C1OC(=O)C)OC(=O)C...,10,0,11,2.7925,114.43,678.491,Auranofin,FDA
2,2,CC(CC(B(O)O)NC(=O)C(NC(=O)c1cnccn1)Cc1ccccc1)C,6,4,9,0.3606,124.44,384.245,Bortezomib,FDA
3,3,COC(C[Hg]Cl)CNC(=O)N,2,2,5,0.3243,64.35,367.198,Chlormerodrin,FDA
4,4,O=C(c1ccccn1)O[Cr](OC(=O)c1ccccn1)OC(=O)c1ccccn1,9,0,6,2.1056,117.57,418.305,Chromium picolinate,FDA
