## Compute lipinsky descriptors for a Database

# Import libraries

In [None]:
import pandas as pd
import numpy as  np

from rdkit import Chem
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw, Descriptors

# RDKit

Cheminformatics toolkits (software development kits) that allow  develop custom computer applications. (Compute Descriptors, FP, visualize chemical structures)

Documentation:http://rdkit.org/docs/index.html

Jupyter Notebooks: https://github.com/rdkit/rdkit-tutorials

# Open Database

In [None]:
Data = pd.read_csv("Database_CABANA.csv", sep = ",")
#Data.head()

In [None]:
#Count elements
#Data.shape[0]

In [None]:
#Identify features
list(Data.columns)

# Compute descriptors

In [None]:
"""Select SMILES"""
smiles = list(Data["SMILES"])
#smiles[0]

In [None]:
"""Convert SMILES TO RDKit Format"""
#create an empty list to storage results
sm = list()

for i in smiles:
    sm.append(Chem.MolFromSmiles(i)) 

In [None]:
#create an empty list to storage results
HBA = list()
HBD = list()
RB = list()
LogP = list()
TPSA = list()
MW = list()
HeavyAtom = list()
RingCount = list()
FractionCSP3 = list()

In [None]:
#compute descriptors for each smile in sm
for i in sm:
    HBA.append(Descriptors.NumHAcceptors(i))
    HBD.append(Descriptors.NumHDonors(i))
    RB.append(Descriptors.NumRotatableBonds(i))
    LogP.append(Descriptors.MolLogP(i))
    TPSA.append(Descriptors.TPSA(i))
    MW.append(Descriptors.MolWt(i))
print("calcule descriptores")

In [None]:
#explore list elements
HBA[0:5]

In [None]:
#storage in a DataFrame
columns = ["SMILES", "HBA", "HBD", "RB", "LogP", "TPSA", "MW"]
data = [smiles, HBA, HBD, RB, LogP, TPSA, MW]
data = np.transpose(data, axes=None)
Database = pd.DataFrame(
            data = data,
            columns = columns)
print(Database.head())

In [None]:
"""Merge Results"""
#Merge Database results with Name and Library (From input file)
Database["Name"] = Data["Name"]
Database["Library"] = Data["Library"]
Database.head()

# Save Results 

In [None]:
"""Save Results as .csv File"""
Database.to_csv("Database_descriptors.csv", sep = ",")

In [None]:
#verify resutlt
d = pd.read_csv("Database_descriptors.csv")
d.head()