In [1]:
import sys
sys.path.append('../pkg_mod')

import db_connector as db
import chemistry_psql as cp
import importlib
importlib.reload(cp)
import psycopg2
import pandas as pd
from rdkit import Chem
from rdkit.Chem import rdchem as rdch
from rdkit.Chem import Descriptors as rdd
from rdkit.Chem import rdMolDescriptors as rdmd
#from rdkit.Chem import SaltRemover as rdsr
from rdkit.Chem import PandasTools
import plotly.graph_objects as go

In [2]:
##### Global Variables #####

molprop = ['numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd','tpsa']

### Druglikeness Boundaries ###
### Lipinski
### Veber
### Ghosh
druglike_max = [55,500,0.5,10,5,1,3,10,0.6,5,140]
druglike_min = [30,160,0,0,0,0,0,6,0.4,0,0]

In [3]:
##### Function Definitions #####

### Calculate Descriptor Space

def collection_properties(dlist):

    d_temp = []


    for i in dlist:
        smiles = i[0]
        m = i[1]
        numatoms = rdch.Mol.GetNumHeavyAtoms(m)
        mw = rdd.ExactMolWt(m)
        fsp3 = rdmd.CalcFractionCSP3(m)
        HBA = rdmd.CalcNumLipinskiHBA(m)
        HBD = rdmd.CalcNumLipinskiHBD(m)
        stereo = rdmd.CalcNumAtomStereoCenters(m)
        ring = rdmd.CalcNumRings(m)
        rotbond = rdmd.CalcNumRotatableBonds(m)
        qed = Chem.QED.qed(m)
        logd = Chem.Crippen.MolLogP(m)
        tpsa = rdmd.CalcTPSA(m)
        #print(smiles, numatoms,mw, fsp3, HBA, HBD, stereo, ring, rotbond, qed, logd)
        a = [smiles, numatoms,mw, fsp3, HBA, HBD, stereo, ring, rotbond, qed, logd,tpsa]
        d_temp.append(a)
    
    #df_drugs = pd.DataFrame(drug_temp, columns = ['smiles', 'numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd','tpsa'])
    d = ['smiles']
    for i in molprop:
        d.append(i)
    
    #print(d)
    df_d = pd.DataFrame(d_temp, columns = d)
    
    return df_d

### Preparing the Compound Lists

def list_prep(dlist):
    temp_list = []

    for i in range(0, len(dlist)):
        j = dfr[i]
        #print(j, type(j))
        if type(j) == str:
            j = j.split('.')
            for k in j:
                try:
                    #print(k)
                    m = Chem.MolFromSmiles(k)
                    numatoms = rdch.Mol.GetNumAtoms(m)
                    if numatoms >5:
                        #print(k, numatoms)
                        a = (k,m)
                        if a not in temp_list:
                            temp_list.append(a)
                except:
                    #print(j,k, ' - no valid SMILES/Molecule')
                    pass
            
    return temp_list
        

In [4]:
##### Read and Prepare Druglists

dfm = pd.read_excel("../Drug_Libraries/L1021-DiscoveryProbe-FDA-approved-Drug-Library.xlsx", sheet_name=1)
dfr = dfm['SMILES']

drug_list = list_prep(dfr)
#print(drug_list)
print('Druglist Prepared')

df_drugs = collection_properties(drug_list)
print('Collection Properties')
print(df_drugs)

### Calculate Statistics

drugs_mean = df_drugs[molprop].mean()
drugs_std = df_drugs[molprop].std()
drugs_min = df_drugs[molprop].min()
drugs_max = df_drugs[molprop].max()
print(drugs_mean, type(drugs_mean))
print(drugs_std)
print(drugs_min)
print(drugs_max)

Druglist Prepared
Collection Properties
                                                 smiles  numatoms  \
0     CCC(C)C(C(=O)NC(CC1=CN=CN1)C(=O)N2CCCC2C(=O)NC...        75   
1                                  CCC(C(=O)N)N1CCCC1=O        12   
2     CCCCCCCCCC(=O)NC(CC1=CNC2=CC=CC=C21)C(=O)NC(CC...       115   
3       CC1=C(C=CN=C1CS(=O)C2=NC3=CC=CC=C3N2)OCC(F)(F)F        25   
4     COC1=C(C=C(C=C1)C2=CC3=C(C=C2)C=C(C=C3)C(=O)O)...        31   
...                                                 ...       ...   
2153       C1=COC(C2C1C(C=C2CO)O)OC3C(C(C(C(O3)CO)O)O)O        24   
2154  CN1CCC2=CC3=C(C4=C2C1CC5=CC=C(C=C5)OC6=C(C=CC(...        45   
2155                         C[N+]1=CC=CC(=C1)C(=O)[O-]        10   
2156                                       C(CC(=O)O)CN         7   
2157  CC1N2C3=CC(=C(C=C3C(=O)C(=C2S1)C(=O)O)F)N4CCN(...        32   

               mw      fsp3  HBA  HBD  stereo  ring  rotbond       qed  \
0     1045.534515  0.500000   25   16       9     4      

In [5]:
##### Read and Prepare DEL2 Structures

dfm = pd.read_csv("../DEL2_enum.csv")
dfr=dfm['Structure']

del2_list = list_prep(dfr)
#print(drug_list)
print('DEL2-list Prepared')

df_del2 = collection_properties(del2_list)
print('Collection Properties')
print(df_def2)

### Calculate Statistics

del2_mean = df_del2[molprop].mean()
del2_std = df_del2[molprop].std()
del2_min = df_del2[molprop].min()
del2_max = df_del2[molprop].max()
print(del2_mean)
print(del2_std)
print(del2_min)
print(del2_max)

DEL2-list Prepared


KeyboardInterrupt: 

numatoms     23.211770
mw          332.288675
fsp3          0.414283
HBA           5.803522
HBD           2.430491
stereo        1.775255
ring          2.490269
rotbond       4.816960
qed           0.543775
logd          2.001095
tpsa         85.699018
dtype: float64 <class 'pandas.core.series.Series'>
numatoms     12.385146
mw          176.088772
fsp3          0.278539
HBA           3.993080
HBD           2.683858
stereo        3.496074
ring          1.679914
rotbond       4.114985
qed           0.207004
logd          2.608327
tpsa         64.317321
dtype: float64
numatoms     6.000000
mw          82.053098
fsp3         0.000000
HBA          0.000000
HBD          0.000000
stereo       0.000000
ring         0.000000
rotbond      0.000000
qed          0.011297
logd        -8.895300
tpsa         0.000000
dtype: float64
numatoms     115.000000
mw          1663.492352
fsp3           1.000000
HBA           43.000000
HBD           25.000000
stereo        30.000000
ring          12.000000
rot

In [18]:
val = []
ax = molprop.copy()
scale = [1,1,1,1,1,1,1,1,1,1,1]
val_width = [5,12,15,8,16,32,64,10,20,40,10]
maxval = 0

dl_max = druglike_max
dl_min = druglike_min

for i in range(0,len(drugs_mean)):
    #print(i)
    j = drugs_mean[i]
    if ax[i] in ['qed','fsp3']:
        val.append(j)
        val_width[i] = (100*drugs_std[i])
        maxval = max(j, maxval)
    else:
        amin = drugs_min[i]
        if amin > 0:
            amin = 0
        amax = drugs_max[i]
        scale[i] = amax - amin
        j = (j-amin)/scale[i]
        val.append(j)
        dl_max[i] = (druglike_max[i]-amin)/scale[i]
        dl_min[i] = (druglike_min[i]-amin)/scale[i]
        val_width[i] = (100*drugs_std[i])/scale[i]
        maxval = max(j, maxval)

print(scale)
print(dl_max)
print(dl_min)  
print(val)
print(val_width)


ax.append(ax[0])
val.append(val[0])
dl_max.append(dl_max[0])
dl_min.append(dl_min[0])

fig = go.Figure()

fig.add_trace(go.Scatterpolar(
    name = 'Druglikeness (lower)',
  r=dl_min,
  theta=ax,
    opacity =.25,
    #fillcolor = 'white',
    line = dict(color = 'green')
  #fill='none'
))

fig.add_trace(go.Scatterpolar(name = 'Druglikeness (upper)',
  r=dl_max,
  theta=ax,
    line = dict(color = 'green'),
    opacity = .5,
    fillcolor = 'lightgreen',
  fill='tonext'
))


fig.add_trace(go.Scatterpolar(name = 'Drug Collection',
  r=val,
  theta=ax,
    line = dict(width = 5, color = 'blue'),
    marker = dict(size=val_width, sizeref = maxval, sizemode = 'diameter', color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    
  #fill='toself'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig.show()

[115.0, 1663.492351879999, 1, 43.0, 25.0, 30.0, 12.0, 35.0, 1, 26.74919999999999, 702.02]
[0.4782608695652174, 0.30057246697583195, 0.5, 0.23255813953488372, 0.2, 0.03333333333333333, 0.25, 0.2857142857142857, 0.6, 0.5194660027215775, 0.19942451782000514]
[0.2608695652173913, 0.09618318943226623, 0, 0.0, 0.0, 0.0, 0.0, 0.17142857142857143, 0.4, 0.33254452469606643, 0.0]
[0.20184147963089816, 0.19975365373933, 0.41428299194174506, 0.13496562277733473, 0.09721964782205746, 0.05917516218721038, 0.20752239728143343, 0.1376274328081557, 0.5437749113563763, 0.40735406643500555, 0.12207489474501759]
[10.769692483936716, 10.5854873473527, 27.853927003860395, 9.28623275571795, 10.735432148566328, 11.653578682089224, 13.999285765188072, 11.757099873614157, 20.70042243348997, 9.751045794057307, 9.161750469989459]


In [10]:
val_max = []
val_min = []

for i in range(0,len(val)-1):
    j = val[i]+drugs_std[i]/scale[i]
    val_max.append(j)
    j = val[i]-drugs_std[i]/scale[i]
    val_min.append(j)
    
val_min.append(val_min[0])
val_max.append(val_max[0])
    
print(val_min)
print(val_max)
    
fig2 = go.Figure()

fig2.add_trace(go.Scatterpolar(
    name = 'Druglikeness (lower)', showlegend=False,
  r=dl_min,
  theta=ax,
    opacity =.25,
    #fillcolor = 'white',
    line = dict(color = 'green')
  #fill='none'
))

fig2.add_trace(go.Scatterpolar(name = 'Druglikeness (upper)',
  r=dl_max,
  theta=ax,
    line = dict(color = 'green'),
    opacity = .5,
    fillcolor = 'lightgreen',
  fill='tonext'
))


fig2.add_trace(go.Scatterpolar(name = 'Drug Collection', showlegend=False,
  r=val_min,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig2.add_trace(go.Scatterpolar(name = 'Drug Collection',
  r=val_max,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
  fill='tonext'
))

fig2.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig2.show()

[0.094144554791531, 0.09389878026580299, 0.1357437219031411, 0.042103295220155226, -0.010134673663605834, -0.05736062463368185, 0.06752953962955272, 0.02005643407201413, 0.33677068702147667, 0.3098436084944325, 0.030457390045123003, 0.094144554791531]
[0.3095384044702653, 0.305608527212857, 0.692822261980349, 0.22782795033451425, 0.20457396930772076, 0.1757109490081026, 0.34751525493331414, 0.2551984315442973, 0.7507791356912761, 0.5048645243755786, 0.2136923994449122, 0.3095384044702653]


In [11]:
fig.show()
fig2.show()

In [None]:
print(dir(rdmd.Mol))

In [4]:
dfm = pd.read_csv("../DEL2_enum.csv")
dfr=dfm['Structure']
print(dfr)

del2_list = []

for i in range(0, len(dfr)):
    j = dfr[i]
    #print(j, type(j))
    if type(j) == str:
        j = j.split('.')
        for k in j:
            try:
                #print(k)
                m = Chem.MolFromSmiles(k)
                numatoms = rdch.Mol.GetNumAtoms(m)
                if numatoms >5:
                    #print(k, numatoms)
                    a = (k,m)
                    if a not in drug_list:
                        del2_list.append(a)
            except:
                #print(j,k, ' - no valid SMILES/Molecule')
                pass
        
print()
print(len(del2_list))
#print()
#print(del2_list)

0         Cc1cc(O)c(C)c(C)c1S(=O)(=O)NC(=N)NCCC[C@H](N)C...
1         Cc1cc(O)c(C)c(C)c1S(=O)(=O)NC(=N)NCCC[C@H](N)C...
2         Cc1cc(O)c(C)c(C)c1S(=O)(=O)NC(=N)NCCC[C@H](N)C...
3         Cc1cc(O)c(C)c(C)c1S(=O)(=O)NC(=N)NCCC[C@H](N)C...
4         Cc1cc(O)c(C)c(C)c1S(=O)(=O)NC(=N)NCCC[C@H](N)C...
                                ...                        
453374                               O=C1CN[C@@H](C(=O)O)C1
453375                            O=C(O)[C@H]1C[C@@H](O)CN1
453376                       O=C(O)C1CC2(CCN(C(=O)O)CC2)CN1
453377                  O=C(O)CN1CN(c2ccccc2)C2(CCNCC2)C1=O
453378                              CC1(C)N[C@H](C(=O)O)CS1
Name: Structure, Length: 453379, dtype: object

453378


In [6]:
drug_temp = []


for i in del2_list:
    smiles = i[0]
    m = i[1]
    numatoms = rdch.Mol.GetNumHeavyAtoms(m)
    mw = rdd.ExactMolWt(m)
    fsp3 = rdmd.CalcFractionCSP3(m)
    HBA = rdmd.CalcNumLipinskiHBA(m)
    HBD = rdmd.CalcNumLipinskiHBD(m)
    stereo = rdmd.CalcNumAtomStereoCenters(m)
    ring = rdmd.CalcNumRings(m)
    rotbond = rdmd.CalcNumRotatableBonds(m)
    qed = Chem.QED.qed(m)
    logd = Chem.Crippen.MolLogP(m)
    tpsa = rdmd.CalcTPSA(m)
    #print(smiles, numatoms,mw, fsp3, HBA, HBD, stereo, ring, rotbond, qed, logd)
    a = [smiles, numatoms,mw, fsp3, HBA, HBD, stereo, ring, rotbond, qed, logd, tpsa]
    drug_temp.append(a)
    
df_del2 = pd.DataFrame(drug_temp, columns = ['smiles', 'numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd', 'tpsa'])

print(df_del2)

                                                   smiles  numatoms  \
0       Cc1cc(O)c(C)c(C)c1S(=O)(=O)NC(=N)NCCC[C@H](N)C...        73   
1       Cc1cc(O)c(C)c(C)c1S(=O)(=O)NC(=N)NCCC[C@H](N)C...        73   
2       Cc1cc(O)c(C)c(C)c1S(=O)(=O)NC(=N)NCCC[C@H](N)C...        73   
3       Cc1cc(O)c(C)c(C)c1S(=O)(=O)NC(=N)NCCC[C@H](N)C...        75   
4       Cc1cc(O)c(C)c(C)c1S(=O)(=O)NC(=N)NCCC[C@H](N)C...        73   
...                                                   ...       ...   
453373                             O=C1CN[C@@H](C(=O)O)C1         9   
453374                          O=C(O)[C@H]1C[C@@H](O)CN1         9   
453375                     O=C(O)C1CC2(CCN(C(=O)O)CC2)CN1        16   
453376                O=C(O)CN1CN(c2ccccc2)C2(CCNCC2)C1=O        21   
453377                            CC1(C)N[C@H](C(=O)O)CS1        10   

                 mw      fsp3  HBA  HBD  stereo  ring  rotbond       qed  \
0       1068.473237  0.577778   26   15       7     3       21  0.03655

In [24]:
del2_mean = df_del2[['numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd']].mean()
del2_std = df_del2[['numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd']].std()
del2_min = df_del2[['numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd']].min()
del2_max = df_del2[['numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd']].max()
print(del2_mean)
print(del2_std)
print(del2_min)
print(drugs_max)

val = []
ax = molprop.copy()
#dl_max = [55,500,0.5,10,5,1,3,10,0.6,5]
#dl_min = [30,160,0,0,0,0,0,6,0.4,0]
scale = [1,1,1,1,1,1,1,1,1,1]
val_width = [5,12,15,8,16,32,64,10,20,40]
val_min = []
val_max = []

for i in range(0,len(del2_mean)):
    val_width[i] = del2_std[i]

for i in range(0,len(del2_mean)):
    #print(i)
    j = del2_mean[i]
    if ax[i] in ['qed','fsp3']:
        val.append(j)
        jmin = j-val_width[i]
        jmax = j+val_width[i]
        val_min.append(jmin)
        val_max.append(jmax)
        print(i,j, ax[i])
        val_width[i] = del2_std[i]
    else:
        amin = min(drugs_min[i],druglike_min[i],del2_min[i])
        if amin > 0:
            amin = 0
        amax = max(drugs_max[i],druglike_max[i],del2_max[i])
        scale[i] = amax - amin
        jmin = (j-val_width[i]-amin)/scale[i]
        jmax = (j+val_width[i]-amin)/scale[i]
        val_min.append(jmin)
        val_max.append(jmax)
        j = (j-amin)/scale[i]
        val.append(j)
        dl_max[i] = (druglike_max[i]-amin)/scale[i]
        dl_min[i] = (druglike_min[i]-amin)/scale[i]
        val_width[i] = del2_std[i]/scale[i]

print()
print(scale)
print(val)
print('Likeness')
print(dl_max)
print(dl_min)  
print('Drugs')
print(val_min)
print(val_max)

ax.append(ax[0])
val.append(val[0])
val_min.append(val_min[0])
val_max.append(val_max[0])
dl_max.append(dl_max[0])
dl_min.append(dl_min[0])

fig = go.Figure()

fig.add_trace(go.Scatterpolar(
    name = 'Druglikeness (lower)',
  r=dl_min,
  theta=ax,
    opacity =.25,
    #fillcolor = 'white',
    line = dict(color = 'green')
  #fill='none'
))

fig.add_trace(go.Scatterpolar(name = 'Druglikeness (upper)',
  r=dl_max,
  theta=ax,
    line = dict(color = 'green'),
    opacity = .5,
    fillcolor = 'silver',
  fill='tonext'
))


fig.add_trace(go.Scatterpolar(name = 'DEL2',
  r=val,
  theta=ax,
    line = dict(width = 5, color = 'blue'),
    marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    
  #fill='toself'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig.show()


##### Double Trace #####

#val_max = []

#for i in val:
#    j = i+.1
#    val_max.append(j)
    
fig2 = go.Figure()

fig2.add_trace(go.Scatterpolar(
    name = 'Druglikeness (lower)', showlegend=False,
  r=dl_min,
  theta=ax,
    opacity =.25,
    #fillcolor = 'white',
    line = dict(color = 'green')
  #fill='none'
))

fig2.add_trace(go.Scatterpolar(name = 'Druglikeness (upper)',
  r=dl_max,
  theta=ax,
    line = dict(color = 'green'),
    opacity = .5,
    fillcolor = 'silver',
  fill='tonext'
))


fig2.add_trace(go.Scatterpolar(name = 'Del2', showlegend=False,
  r=val_min,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig2.add_trace(go.Scatterpolar(name = 'DEL2',
  r=val_max,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
  fill='tonext'
))

fig2.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig2.show()

numatoms     31.980983
mw          451.127516
fsp3          0.478996
HBA          10.120959
HBD           5.809210
stereo        3.020976
ring          2.524668
rotbond       9.466503
qed           0.343021
logd          0.233724
dtype: float64 <class 'pandas.core.series.Series'>
numatoms     6.413035
mw          86.674432
fsp3         0.162820
HBA          2.350128
HBD          1.548287
stereo       1.014620
ring         1.255691
rotbond      2.642168
qed          0.152613
logd         1.666573
dtype: float64
numatoms     6.000000
mw          89.047678
fsp3         0.000000
HBA          3.000000
HBD          2.000000
stereo       0.000000
ring         0.000000
rotbond      1.000000
qed          0.021258
logd        -7.239200
dtype: float64
numatoms      77.000000
mw          1096.431766
fsp3           0.888889
HBA           28.000000
HBD           17.000000
stereo        11.000000
ring          11.000000
rotbond       27.000000
qed            0.908471
logd           6.951500
dtype: fl

In [2]:
import sys
sys.path

['/mnt/raid1/Python_projects/Neural_Network',
 '/home/peter/anaconda3/envs/rdkit/lib/python37.zip',
 '/home/peter/anaconda3/envs/rdkit/lib/python3.7',
 '/home/peter/anaconda3/envs/rdkit/lib/python3.7/lib-dynload',
 '',
 '/home/peter/anaconda3/envs/rdkit/lib/python3.7/site-packages',
 '/home/peter/anaconda3/envs/rdkit/lib/python3.7/site-packages/IPython/extensions',
 '/home/peter/.ipython']

In [3]:
sys.path.append('../pkg_mod')

In [22]:
struct_list = []
print(struct_raw)
print()

for i in struct_raw:
    #print(i)
    a = i[0].split('.')
    #print(a)
    for j in a:
        b = (j,)
        #print(j, b)
        if b not in struct_list:
            struct_list.append(b)

print(struct_list)
        

[('C1=CC=C(C(=C1)N)SC(=C(C#N)C(=C(N)SC2=CC=CC=C2N)C#N)N',)
 ('CC1CNCCN1S(=O)(=O)C2=CC=CC3=C2C=CN=C3',)
 ('C1=CC2=C(C=CN=C2)C(=C1)S(=O)(=O)NCCN',) ('C(CC(C(=O)O)N)CN=C(N)N',)
 ('C1CN(CCN1CC=CC2=CC=CC=C2)C(C3=CC=C(C=C3)F)C4=CC=C(C=C4)F.Cl.Cl',)
 ('CC1=C(C(C(=C(N1)C)C(=O)OC)C2=CC=CC=C2[N+](=O)[O-])C(=O)OC',)
 ('CC(C)(C)C1=CC(=CC(=C1O)C(C)(C)C)C=C(C#N)C#N',)
 ('CC1=C(C(C(=C(N1)C)C(=O)OCCN(C)CC2=CC=CC=C2)C3=CC(=CC=C3)[N+](=O)[O-])C(=O)OC.Cl',)
 ('C1CN(CCC1N2C3=CC=CC=C3NC2=O)CCCC(C4=CC=C(C=C4)F)C5=CC=C(C=C5)F',)
 ('C1COCCN1C2=CC(=O)C3=C(O2)C(=CC=C3)C4=CC=CC=C4',)
 ('CN(C)C(=O)C(CCN1CCC(CC1)(C2=CC=C(C=C2)Cl)O)(C3=CC=CC=C3)C4=CC=CC=C4.Cl',)
 ('CC(=O)OC1CC2(C(CCC2=O)C3=C1C4(C(OC(=O)C5=COC(=C54)C3=O)COC)C)C',)
 ('C1=CC(=C(C=C1CN=C(N)NC(=O)C2=C(N=C(C(=N2)Cl)N)N)Cl)Cl',)
 ('CC1=CC(=O)C2=C(C3=C(C=C(C4=C3C5=C2C1=C6C(=CC(=O)C7=C(C8=C(C=C(C4=C8C5=C67)O)O)O)C)O)O)O',)
 ('C1=CC2=C(C=CN=C2)C(=C1)S(=O)(=O)NCCNCC=CC3=CC=C(C=C3)Br',)
 ('COC1=C(C=C(C=C1)Cl)C(=O)NCCC2=CC=C(C=C2)S(=O)(=O)NC(=O)NC3CCCCC3',)
 ('

In [None]:
dfm = pd.read_excel("../Drug_Libraries/L1021-DiscoveryProbe-FDA-approved-Drug-Library.xlsx", sheet_name=1)
#print(dfm)
dfr = dfm['SMILES']
#print(dfr)

drug_list = list_prep(dfr)

for i in range(0, len(dfr)):
    j = dfr[i]
    #print(j, type(j))
    if type(j) == str:
        j = j.split('.')
        for k in j:
            try:
                #print(k)
                m = Chem.MolFromSmiles(k)
                numatoms = rdch.Mol.GetNumAtoms(m)
                if numatoms >5:
                    #print(k, numatoms)
                    a = (k,m)
                    if a not in drug_list:
                        drug_list.append(a)
            except:
                #print(j,k, ' - no valid SMILES/Molecule')
                pass
        

print(len(drug_list))
print()
print(drug_list)

In [None]:
drug_temp = []


for i in drug_list:
    smiles = i[0]
    m = i[1]
    numatoms = rdch.Mol.GetNumHeavyAtoms(m)
    mw = rdd.ExactMolWt(m)
    fsp3 = rdmd.CalcFractionCSP3(m)
    HBA = rdmd.CalcNumLipinskiHBA(m)
    HBD = rdmd.CalcNumLipinskiHBD(m)
    stereo = rdmd.CalcNumAtomStereoCenters(m)
    ring = rdmd.CalcNumRings(m)
    rotbond = rdmd.CalcNumRotatableBonds(m)
    qed = Chem.QED.qed(m)
    logd = Chem.Crippen.MolLogP(m)
    tpsa = rdmd.CalcTPSA(m)
    #print(smiles, numatoms,mw, fsp3, HBA, HBD, stereo, ring, rotbond, qed, logd)
    a = [smiles, numatoms,mw, fsp3, HBA, HBD, stereo, ring, rotbond, qed, logd,tpsa]
    drug_temp.append(a)
    
#df_drugs = pd.DataFrame(drug_temp, columns = ['smiles', 'numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd','tpsa'])
d = ['smiles']
for i in molprop:
    d.append(i)
    
print(d)
    
df_drugs = pd.DataFrame(drug_temp, columns = d)
print(df_drugs)