In this notebook we physicochemical descriptors from SMILES

In [1]:
import pandas as pd
import numpy as np
import os

from descriptors import compute_descriptors

"""Get data"""

In [2]:
#load file
input_file = "BIOFACQUIM.csv"
data = pd.read_csv(input_file, sep = ",")   

In [3]:
#visualize data
data.head()

Unnamed: 0.1,Unnamed: 0,Library,SMILES
0,0,BIOFACQUIM 2V,c1(C2OCC3C(c4cc5OCOc5cc4)OCC23)cc2OCOc2cc1
1,1,BIOFACQUIM 2V,[C@H](CCC(C(=C)C)(C)C)(C)[C@@H]1[C@]2(C)[C@](C...
2,2,BIOFACQUIM 2V,S=C=N[C@@]1(C)[C@H]2[C@@](C)(CC[C@@](OO)(C(=C)...
3,3,BIOFACQUIM 2V,S=C=N[C@@]1(C)[C@H]2[C@@](C)(CC[C@@](O)(C(=C)C...
4,4,BIOFACQUIM 2V,S(CCC(=O)N)[C@H]1c2c(OC)cccc2-c2c(OC)c(OC)cc3C...


In [4]:
#SMILES into list
smiles = list(data["SMILES"])
smiles[:5]

['c1(C2OCC3C(c4cc5OCOc5cc4)OCC23)cc2OCOc2cc1',
 '[C@H](CCC(C(=C)C)(C)C)(C)[C@@H]1[C@]2(C)[C@](C)(C3[C@](C)([C@]45O[C@H](C(C)(C)C4CC3)CC5)CC2)CC1',
 'S=C=N[C@@]1(C)[C@H]2[C@@](C)(CC[C@@](OO)(C(=C)C)C2)CCC1',
 'S=C=N[C@@]1(C)[C@H]2[C@@](C)(CC[C@@](O)(C(=C)C)C2)CCC1',
 'S(CCC(=O)N)[C@H]1c2c(OC)cccc2-c2c(OC)c(OC)cc3C(=O)ON=C1c23']

In [5]:
#Library into list 
library = list(data["Library"])

"""Compute descriptors"""

In [6]:
#compute descriptors with function "compute_descriptors"
CanonicalSmiles, HBA, HBD, RB, LOGP, TPSA, MW = compute_descriptors(smiles)

In [7]:
# storage results
data = {
        "Library": library,
        "SMILES": CanonicalSmiles,
        "HBA": HBA,
        "HBD": HBD,
        "RB": RB,
        "LOGP": LOGP,
        "TPSA": TPSA,
        "MW": MW,
}

"""Write results"""

In [8]:
#write DataFrame
DF = pd.DataFrame.from_dict(data)
DF.head(5)

Unnamed: 0,Library,SMILES,HBA,HBD,RB,LOGP,TPSA,MW
0,BIOFACQUIM 2V,c1cc2c(cc1C1OCC3C(c4ccc5c(c4)OCO5)OCC13)OCO2,6,0,2,3.2192,55.38,354.358
1,BIOFACQUIM 2V,C=C(C)C(C)(C)CC[C@@H](C)[C@H]1CC[C@@]2(C)C3CCC...,1,0,5,9.2115,9.23,454.783
2,BIOFACQUIM 2V,C=C(C)[C@]1(OO)CC[C@@]2(C)CCC[C@@](C)(N=C=S)[C...,4,1,3,4.6427,41.82,295.448
3,BIOFACQUIM 2V,C=C(C)[C@]1(O)CC[C@@]2(C)CCC[C@@](C)(N=C=S)[C@...,3,1,2,4.1454,32.59,279.449
4,BIOFACQUIM 2V,COc1cc2c(=O)onc3c2c(c1OC)-c1cccc(OC)c1[C@@H]3S...,8,1,7,2.8922,113.88,428.466


In [9]:
#write csv
DF.to_csv("biofacquim_descriptors.csv", sep=",")