In [1]:
import sys
sys.path.append('../pkg_mod')

import db_connector as db
import chemistry_psql as cp
import importlib
importlib.reload(cp)
import psycopg2
import pandas as pd
from rdkit import Chem
from rdkit.Chem import rdchem as rdch
from rdkit.Chem import Descriptors as rdd
from rdkit.Chem import rdMolDescriptors as rdmd
#from rdkit.Chem import SaltRemover as rdsr
from rdkit.Chem import PandasTools
import plotly.graph_objects as go

In [2]:
##### Global Variables #####

molprop = ['Atoms','MW', 'fSP3', 'HBA', 'HBD', 'Stereo Atoms', 'Rings', 'Rotatable Bonds', 'QED', 'logD','TPSA']

### Druglikeness Boundaries ###
### Lipinski
### Veber
### Ghosh
druglike_max = [55,500,0.5,10,5,1,3,10,0.6,5,140]
druglike_min = [30,160,0,0,0,0,0,6,0.4,0,0]

In [7]:
##### Function Definitions #####

### Calculate Descriptor Space

def collection_properties(dlist):

    d_temp = []
    line_count = 0
    d_lines = min(int(len(dlist)/50),1000)

    for i in dlist:
        smiles = i[0]
        m = i[1]
        numatoms = rdch.Mol.GetNumHeavyAtoms(m)
        mw = rdd.ExactMolWt(m)
        fsp3 = rdmd.CalcFractionCSP3(m)
        HBA = rdmd.CalcNumLipinskiHBA(m)
        HBD = rdmd.CalcNumLipinskiHBD(m)
        stereo = rdmd.CalcNumAtomStereoCenters(m)
        ring = rdmd.CalcNumRings(m)
        rotbond = rdmd.CalcNumRotatableBonds(m)
        qed = Chem.QED.qed(m)
        logd = Chem.Crippen.MolLogP(m)
        tpsa = rdmd.CalcTPSA(m)
        #print(smiles, numatoms,mw, fsp3, HBA, HBD, stereo, ring, rotbond, qed, logd)
        a = [smiles, numatoms,mw, fsp3, HBA, HBD, stereo, ring, rotbond, qed, logd,tpsa]
        d_temp.append(a)
        line_count = line_count+1
        if (line_count % d_lines) == 0:
            print('Lines processed : ', line_count)
    
    #df_drugs = pd.DataFrame(drug_temp, columns = ['smiles', 'numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd','tpsa'])
    d = ['smiles']
    for i in molprop:
        d.append(i)
    
    #print(d)
    df_d = pd.DataFrame(d_temp, columns = d)
    
    return df_d

### Preparing the Compound Lists

def list_prep(dlist):
    temp_list = []
    line_count = 0
    d_lines = min(int(len(dlist)/50),1000)
    print(len(dlist), d_lines)

    for i in range(0, len(dlist)):
        j = dfr[i]
        #print(j, type(j))
        #print(line_count)
        if (line_count % d_lines) == 0:
            print('Lines processed : ', line_count)
        line_count = line_count+1
        if type(j) == str:
            j = j.split('.')
            for k in j:
                try:
                    #print(line_count, k)
                    m = Chem.MolFromSmiles(k)
                    numatoms = rdch.Mol.GetNumAtoms(m)
                    if numatoms >5:
                        #print(k, numatoms)
                        a = (k,m)
                        if a not in temp_list:
                            temp_list.append(a)
                except:
                    #print(j,k, ' - no valid SMILES/Molecule')
                    pass
        
        #line_count = line_count+1
            
    return temp_list
        

In [4]:
##### Read and Prepare Druglists

dfm = pd.read_csv("../Drug_Libraries/L1021_FDA_Drugs.csv", encoding = 'UTF-8')
dfr = dfm['SMILES']
#print(dfr)

drug_list = list_prep(dfr)
#print(drug_list)
print('Druglist Prepared')

df_drugs = collection_properties(drug_list)
print('Collection Properties')
print(df_drugs)

### Calculate Statistics

drugs_mean = df_drugs[molprop].mean()
drugs_std = df_drugs[molprop].std()
drugs_min = df_drugs[molprop].min()
drugs_max = df_drugs[molprop].max()
print(drugs_mean, type(drugs_mean))
print(drugs_std)
print(drugs_min)
print(drugs_max)

df_drugs.to_csv('../Drug_Properties.csv')

0       CCC(C)C(C(=O)NC(CC1=CN=CN1)C(=O)N2CCCC2C(=O)NC...
1                                    CCC(C(=O)N)N1CCCC1=O
2       CCCCCCCCCC(=O)NC(CC1=CNC2=CC=CC=C21)C(=O)NC(CC...
3         CC1=C(C=CN=C1CS(=O)C2=NC3=CC=CC=C3N2)OCC(F)(F)F
4       COC1=C(C=C(C=C1)C2=CC3=C(C=C2)C=C(C=C3)C(=O)O)...
                              ...                        
2319                                         C(CC(=O)O)CN
2320    CC1N2C3=CC(=C(C=C3C(=O)C(=C2S1)C(=O)O)F)N4CCN(...
2321                                                  NaN
2322                                                  NaN
2323                                                  NaN
Name: SMILES, Length: 2324, dtype: object
2324 46
CCC(C)C(C(=O)NC(CC1=CN=CN1)C(=O)N2CCCC2C(=O)NC(CC3=CC=CC=C3)C(=O)O)NC(=O)C(CC4=CC=C(C=C4)O)NC(=O)C(C(C)C)NC(=O)C(CCCN=C(N)N)NC(=O)C(CC(=O)O)N.CC(=O)O <class 'str'>
Lines processed :  0
CCC(C(=O)N)N1CCCC1=O <class 'str'>
CCCCCCCCCC(=O)NC(CC1=CNC2=CC=CC=C21)C(=O)NC(CC(=O)N)C(=O)NC(CC(=O)O)C(=O)NC3C(OC(=O)C(NC(=O)

RDKit ERROR: [17:39:44] SMILES Parse Error: syntax error while parsing: C1=CN(C(=O)N=C1N)[[C@H]2C([[C@@H]([[C@H](O2)CO)O)(F)F
RDKit ERROR: [17:39:44] SMILES Parse Error: Failed parsing SMILES 'C1=CN(C(=O)N=C1N)[[C@H]2C([[C@@H]([[C@H](O2)CO)O)(F)F' for input: 'C1=CN(C(=O)N=C1N)[[C@H]2C([[C@@H]([[C@H](O2)CO)O)(F)F'
[17:39:44] SMILES Parse Error: syntax error while parsing: C1=CN(C(=O)N=C1N)[[C@H]2C([[C@@H]([[C@H](O2)CO)O)(F)F
RDKit ERROR: [17:39:44] SMILES Parse Error: syntax error while parsing: O=C1N(C([[C@H](C)NC2=C3N=CNC3=NC=N2)=CC4=C1C(Cl)=CC=C4)C5=CC=CC=C5
RDKit ERROR: [17:39:44] SMILES Parse Error: Failed parsing SMILES 'O=C1N(C([[C@H](C)NC2=C3N=CNC3=NC=N2)=CC4=C1C(Cl)=CC=C4)C5=CC=CC=C5' for input: 'O=C1N(C([[C@H](C)NC2=C3N=CNC3=NC=N2)=CC4=C1C(Cl)=CC=C4)C5=CC=CC=C5'
RDKit ERROR: [17:39:44] SMILES Parse Error: syntax error while parsing: COC1=CC=C2C3=C1O[[C@H]4([H])[[C@H]3(C=C[[C@H](O)C4)CCN(C)C2
RDKit ERROR: [17:39:44] SMILES Parse Error: Failed parsing SMILES 'COC1=CC=C2C3=C1O[[C

<class 'str'>
CC1=NC(=NC=C1)NS(=O)(=O)C2=CC=C(C=C2)N <class 'str'>
COC1=CN=C(N=C1)NS(=O)(=O)C2=CC=C(C=C2)N <class 'str'>
CC1=NN=C(S1)NS(=O)(=O)C2=CC=C(C=C2)N <class 'str'>
CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N <class 'str'>
C1=CC(=CC=C1N)S(=O)(=O)N <class 'str'>
C1=CC(=CC=C1N)S(=O)(=O)NC2=NC=CS2 <class 'str'>
CC1=C(ON=C1C)NS(=O)(=O)C2=CC=C(C=C2)N <class 'str'>
CC1=C(C2=C(C1=CC3=CC=C(C=C3)S(=O)C)C=CC(=C2)F)CC(=O)O <class 'str'>
CC(C)(C)C#CC=CCN(C)CC1=CC=CC2=CC=CC=C21.Cl <class 'str'>
CCC1C(C=C(C=CC(=O)C(CC(C(C(C(CC(=O)O1)O)C)OC2C(C(C(C(O2)C)O)N(C)C)O)CCN3CC(CC(C3)C)C)C)C)COC4C(C(C(C(O4)C)O)OC)OC <class 'str'>
CCS(=O)(=O)CCN1C(=NC=C1[N+](=O)[O-])C <class 'str'>
C1=CC(=C(C=C1Cl)Cl)C(CN2C=CN=C2)OCC3=C(SC=C3)Cl <class 'str'>
CC1=CC(=CC=C1)N(C)C(=S)OC2=CC3=CC=CC=C3C=C2 <class 'str'>
CC1=C(C=CC(=C1)N2C(=O)NC(=O)N(C2=O)C)OC3=CC=C(C=C3)SC(F)(F)F <class 'str'>
CC1=CC(=CC=C1)NC2=C(C=NC=C2)S(=O)(=O)NC(=O)NC(C)C <class 'str'>
C1=C2C(=CC(=C1Cl)S(=O)(=O)N)S(=O)(=O)NC(N2)C(Cl)Cl <class 'str'>
COC1=CC(=

[[C@H@](C([H])([H])C3([H])[H])([H])[[C@H@]4([H])[[C@H@](C([H])([H])[H])(C([H])=C5[H])C3=C([H])C5=O)([H])[[C@H]2(C([H])([H])[H])C([H])([H])[[C@H]4([H])O[H]
[17:39:44] SMILES Parse Error: Failed parsing SMILES 'O=C(C([H])([H])O[H])[[C@H@]12OC(C([H])([H])[H])(C([H])([H])[H])O[[C@H]1([H])C([H])([H])[[C@H]([[C@H@](C([H])([H])C3([H])[H])([H])[[C@H@]4([H])[[C@H@](C([H])([H])[H])(C([H])=C5[H])C3=C([H])C5=O)([H])[[C@H]2(C([H])([H])[H])C([H])([H])[[C@H]4([H])O[H]' for input: 'O=C(C([H])([H])O[H])[[C@H@]12OC(C([H])([H])[H])(C([H])([H])[H])O[[C@H]1([H])C([H])([H])[[C@H]([[C@H@](C([H])([H])C3([H])[H])([H])[[C@H@]4([H])[[C@H@](C([H])([H])[H])(C([H])=C5[H])C3=C([H])C5=O)([H])[[C@H]2(C([H])([H])[H])C([H])([H])[[C@H]4([H])O[H]'
RDKit ERROR: [17:39:44] SMILES Parse Error: syntax error while parsing: O=C(O[[C@H]1C[[C@@H]2CC[[C@H](C1)N2C)[[C@@H]3C4=C(N=C3)C=CC=C4
RDKit ERROR: [17:39:44] SMILES Parse Error: Failed parsing SMILES 'O=C(O[[C@H]1C[[C@@H]2CC[[C@H](C1)N2C)[[C@@H]3C4=C(N=C3)C=CC=C4' for input: 'O

 - no valid SMILES/Molecule
COC1=C(C=C(C=C1)C2=C(C(=O)C3=C(C(=C(C=C3O2)OC)OC)O)OC)O <class 'str'>
CC1C(CC2C1C(OC=C2C(=O)OC)OC3C(C(C(C(O3)CO)O)O)O)O <class 'str'>
C1=CC(=O)OC2=CC3=C(C=CO3)C=C21 <class 'str'>
C1=C(OC(=C1)C=O)CO <class 'str'>
C[N+]1=CC2=C3OCOC3=CC=C2C(C=C4)=C1C(C4=C5)=CC6=C5OCO6 <class 'str'>
CC1CCC2(CCC3(C(=CCC4C3(CCC5C4(CC(C(C5(C)CO)O)O)C)C)C2C1C)C)C(=O)O <class 'str'>
CC1CCC2(CCC3(C(=CCC4C3(CCC5C4(CC(C(C5(C)CO)O)O)C)C)C2C1C)C)C(=O)OC6C(C(C(C(O6)COC7C(C(C(C(O7)CO)OC8C(C(C(C(O8)C)O)O)O)O)O)O)O)O <class 'str'>
Lines processed :  2162
COC1=C(C=CC(=C1)C2=C(C(=O)C3=C(C=C(C=C3O2)O)O)O)O <class 'str'>
C=CCC1=CC(=C(C=C1)O)C2=C(C=CC(=C2)CC=C)O <class 'str'>
COC(=O)C1=COC(C2C1CC=C2CO)OC3C(C(C(C(O3)CO)O)O)O <class 'str'>
[H]C(C([H])([H])C1=C2[H])([H])[N+](C([H])=C(C(OC([H])([H])[H])=C3OC([H])([H])[H])C(C([H])=C3[H])=C4[H])=C4C1=C([H])C5=C2OC([H])([H])O5 <class 'str'>
COC1=C(C=C2C(=C1O)C3C(C(C(C(O3)CO)O)O)OC2=O)O <class 'str'>
O=C([[C@H@]1(C(N([H])C2=C([H])C([H])=C([H])C([H])=C32)=

Lines processed :  215
Lines processed :  258
Lines processed :  301
Lines processed :  344
Lines processed :  387
Lines processed :  430
Lines processed :  473
Lines processed :  516
Lines processed :  559
Lines processed :  602
Lines processed :  645
Lines processed :  688
Lines processed :  731
Lines processed :  774
Lines processed :  817
Lines processed :  860
Lines processed :  903
Lines processed :  946
Lines processed :  989
Lines processed :  1032
Lines processed :  1075
Lines processed :  1118
Lines processed :  1161
Lines processed :  1204
Lines processed :  1247
Lines processed :  1290
Lines processed :  1333
Lines processed :  1376
Lines processed :  1419
Lines processed :  1462
Lines processed :  1505
Lines processed :  1548
Lines processed :  1591
Lines processed :  1634
Lines processed :  1677
Lines processed :  1720
Lines processed :  1763
Lines processed :  1806
Lines processed :  1849
Lines processed :  1892
Lines processed :  1935
Lines processed :  1978
Lines proce

In [8]:
##### Read and Prepare DEL1 Structures

dfm = pd.read_csv("../DEL1_enum.csv")
dfr=dfm['Structure']

del1_list = list_prep(dfr)
#print(drug_list)
print('DEL1-list Prepared')

df_del1 = collection_properties(del1_list)
print('Collection Properties')
#print(df_del1)

### Calculate Statistics

del1_mean = df_del1[molprop].mean()
del1_std = df_del1[molprop].std()
del1_min = df_del1[molprop].min()
del1_max = df_del1[molprop].max()
print(del1_mean)
print(del1_std)
print(del1_min)
print(del1_max)

df_del1.to_csv('../DEL1_properties.csv')

995885 1000
Lines processed :  0
Lines processed :  1000
Lines processed :  2000
Lines processed :  3000
Lines processed :  4000
Lines processed :  5000
Lines processed :  6000
Lines processed :  7000
Lines processed :  8000
Lines processed :  9000
Lines processed :  10000
Lines processed :  11000
Lines processed :  12000
Lines processed :  13000
Lines processed :  14000
Lines processed :  15000
Lines processed :  16000
Lines processed :  17000
Lines processed :  18000
Lines processed :  19000
Lines processed :  20000
Lines processed :  21000
Lines processed :  22000
Lines processed :  23000
Lines processed :  24000
Lines processed :  25000
Lines processed :  26000
Lines processed :  27000
Lines processed :  28000
Lines processed :  29000
Lines processed :  30000
Lines processed :  31000
Lines processed :  32000
Lines processed :  33000
Lines processed :  34000
Lines processed :  35000
Lines processed :  36000
Lines processed :  37000
Lines processed :  38000
Lines processed :  39000
L

Lines processed :  319000
Lines processed :  320000
Lines processed :  321000
Lines processed :  322000
Lines processed :  323000
Lines processed :  324000
Lines processed :  325000
Lines processed :  326000
Lines processed :  327000
Lines processed :  328000
Lines processed :  329000
Lines processed :  330000
Lines processed :  331000
Lines processed :  332000
Lines processed :  333000
Lines processed :  334000
Lines processed :  335000
Lines processed :  336000
Lines processed :  337000
Lines processed :  338000
Lines processed :  339000
Lines processed :  340000
Lines processed :  341000
Lines processed :  342000
Lines processed :  343000
Lines processed :  344000
Lines processed :  345000
Lines processed :  346000
Lines processed :  347000
Lines processed :  348000
Lines processed :  349000
Lines processed :  350000
Lines processed :  351000
Lines processed :  352000
Lines processed :  353000
Lines processed :  354000
Lines processed :  355000
Lines processed :  356000
Lines proces

Lines processed :  635000
Lines processed :  636000
Lines processed :  637000
Lines processed :  638000
Lines processed :  639000
Lines processed :  640000
Lines processed :  641000
Lines processed :  642000
Lines processed :  643000
Lines processed :  644000
Lines processed :  645000
Lines processed :  646000
Lines processed :  647000
Lines processed :  648000
Lines processed :  649000
Lines processed :  650000
Lines processed :  651000
Lines processed :  652000
Lines processed :  653000
Lines processed :  654000
Lines processed :  655000
Lines processed :  656000
Lines processed :  657000
Lines processed :  658000
Lines processed :  659000
Lines processed :  660000
Lines processed :  661000
Lines processed :  662000
Lines processed :  663000
Lines processed :  664000
Lines processed :  665000
Lines processed :  666000
Lines processed :  667000
Lines processed :  668000
Lines processed :  669000
Lines processed :  670000
Lines processed :  671000
Lines processed :  672000
Lines proces

Lines processed :  951000
Lines processed :  952000
Lines processed :  953000
Lines processed :  954000
Lines processed :  955000
Lines processed :  956000
Lines processed :  957000
Lines processed :  958000
Lines processed :  959000
Lines processed :  960000
Lines processed :  961000
Lines processed :  962000
Lines processed :  963000
Lines processed :  964000
Lines processed :  965000
Lines processed :  966000
Lines processed :  967000
Lines processed :  968000
Lines processed :  969000
Lines processed :  970000
Lines processed :  971000
Lines processed :  972000
Lines processed :  973000
Lines processed :  974000
Lines processed :  975000
Lines processed :  976000
Lines processed :  977000
Lines processed :  978000
Lines processed :  979000
Lines processed :  980000
Lines processed :  981000
Lines processed :  982000
Lines processed :  983000
Lines processed :  984000
Lines processed :  985000
Lines processed :  986000
Lines processed :  987000
Lines processed :  988000
Lines proces

Lines processed :  275000
Lines processed :  276000
Lines processed :  277000
Lines processed :  278000
Lines processed :  279000
Lines processed :  280000
Lines processed :  281000
Lines processed :  282000
Lines processed :  283000
Lines processed :  284000
Lines processed :  285000
Lines processed :  286000
Lines processed :  287000
Lines processed :  288000
Lines processed :  289000
Lines processed :  290000
Lines processed :  291000
Lines processed :  292000
Lines processed :  293000
Lines processed :  294000
Lines processed :  295000
Lines processed :  296000
Lines processed :  297000
Lines processed :  298000
Lines processed :  299000
Lines processed :  300000
Lines processed :  301000
Lines processed :  302000
Lines processed :  303000
Lines processed :  304000
Lines processed :  305000
Lines processed :  306000
Lines processed :  307000
Lines processed :  308000
Lines processed :  309000
Lines processed :  310000
Lines processed :  311000
Lines processed :  312000
Lines proces

Lines processed :  591000
Lines processed :  592000
Lines processed :  593000
Lines processed :  594000
Lines processed :  595000
Lines processed :  596000
Lines processed :  597000
Lines processed :  598000
Lines processed :  599000
Lines processed :  600000
Lines processed :  601000
Lines processed :  602000
Lines processed :  603000
Lines processed :  604000
Lines processed :  605000
Lines processed :  606000
Lines processed :  607000
Lines processed :  608000
Lines processed :  609000
Lines processed :  610000
Lines processed :  611000
Lines processed :  612000
Lines processed :  613000
Lines processed :  614000
Lines processed :  615000
Lines processed :  616000
Lines processed :  617000
Lines processed :  618000
Lines processed :  619000
Lines processed :  620000
Lines processed :  621000
Lines processed :  622000
Lines processed :  623000
Lines processed :  624000
Lines processed :  625000
Lines processed :  626000
Lines processed :  627000
Lines processed :  628000
Lines proces

Lines processed :  907000
Lines processed :  908000
Lines processed :  909000
Lines processed :  910000
Lines processed :  911000
Lines processed :  912000
Lines processed :  913000
Lines processed :  914000
Lines processed :  915000
Lines processed :  916000
Lines processed :  917000
Lines processed :  918000
Lines processed :  919000
Lines processed :  920000
Lines processed :  921000
Lines processed :  922000
Lines processed :  923000
Lines processed :  924000
Lines processed :  925000
Lines processed :  926000
Lines processed :  927000
Lines processed :  928000
Lines processed :  929000
Lines processed :  930000
Lines processed :  931000
Lines processed :  932000
Lines processed :  933000
Lines processed :  934000
Lines processed :  935000
Lines processed :  936000
Lines processed :  937000
Lines processed :  938000
Lines processed :  939000
Lines processed :  940000
Lines processed :  941000
Lines processed :  942000
Lines processed :  943000
Lines processed :  944000
Lines proces

In [9]:
##### Read and Prepare DEL2 Structures

dfm = pd.read_csv("../DEL2_enum.csv")
dfr=dfm['Structure']

del2_list = list_prep(dfr)
#print(drug_list)
print('DEL2-list Prepared')

df_del2 = collection_properties(del2_list)
print('Collection Properties')
print(df_del2)

### Calculate Statistics

del2_mean = df_del2[molprop].mean()
del2_std = df_del2[molprop].std()
del2_min = df_del2[molprop].min()
del2_max = df_del2[molprop].max()
print(del2_mean)
print(del2_std)
print(del2_min)
print(del2_max)

df_del2.to_csv('../DEL2_properties.csv')

453379 1000
Lines processed :  0
Lines processed :  1000
Lines processed :  2000
Lines processed :  3000
Lines processed :  4000
Lines processed :  5000
Lines processed :  6000
Lines processed :  7000
Lines processed :  8000
Lines processed :  9000
Lines processed :  10000
Lines processed :  11000
Lines processed :  12000
Lines processed :  13000
Lines processed :  14000
Lines processed :  15000
Lines processed :  16000
Lines processed :  17000
Lines processed :  18000
Lines processed :  19000
Lines processed :  20000
Lines processed :  21000
Lines processed :  22000
Lines processed :  23000
Lines processed :  24000
Lines processed :  25000
Lines processed :  26000
Lines processed :  27000
Lines processed :  28000
Lines processed :  29000
Lines processed :  30000
Lines processed :  31000
Lines processed :  32000
Lines processed :  33000
Lines processed :  34000
Lines processed :  35000
Lines processed :  36000
Lines processed :  37000
Lines processed :  38000
Lines processed :  39000
L

Lines processed :  319000
Lines processed :  320000
Lines processed :  321000
Lines processed :  322000
Lines processed :  323000
Lines processed :  324000
Lines processed :  325000
Lines processed :  326000
Lines processed :  327000
Lines processed :  328000
Lines processed :  329000
Lines processed :  330000
Lines processed :  331000
Lines processed :  332000
Lines processed :  333000
Lines processed :  334000
Lines processed :  335000
Lines processed :  336000
Lines processed :  337000
Lines processed :  338000
Lines processed :  339000
Lines processed :  340000
Lines processed :  341000
Lines processed :  342000
Lines processed :  343000
Lines processed :  344000
Lines processed :  345000
Lines processed :  346000
Lines processed :  347000
Lines processed :  348000
Lines processed :  349000
Lines processed :  350000
Lines processed :  351000
Lines processed :  352000
Lines processed :  353000
Lines processed :  354000
Lines processed :  355000
Lines processed :  356000
Lines proces

Lines processed :  185000
Lines processed :  186000
Lines processed :  187000
Lines processed :  188000
Lines processed :  189000
Lines processed :  190000
Lines processed :  191000
Lines processed :  192000
Lines processed :  193000
Lines processed :  194000
Lines processed :  195000
Lines processed :  196000
Lines processed :  197000
Lines processed :  198000
Lines processed :  199000
Lines processed :  200000
Lines processed :  201000
Lines processed :  202000
Lines processed :  203000
Lines processed :  204000
Lines processed :  205000
Lines processed :  206000
Lines processed :  207000
Lines processed :  208000
Lines processed :  209000
Lines processed :  210000
Lines processed :  211000
Lines processed :  212000
Lines processed :  213000
Lines processed :  214000
Lines processed :  215000
Lines processed :  216000
Lines processed :  217000
Lines processed :  218000
Lines processed :  219000
Lines processed :  220000
Lines processed :  221000
Lines processed :  222000
Lines proces

In [None]:
del2_mean = df_del2[molprop].mean()
del2_std = df_del2[molprop].std()
del2_min = df_del2[molprop].min()
del2_max = df_del2[molprop].max()
print(del2_mean)
print(del2_std)
print(del2_min)
print(del2_max)

In [None]:
df_drugs.to_csv('../Drug_Properties.csv')
df_del1.to_csv('../DEL1_properties.csv')
df_del2.to_csv('../DEL2_properties.csv')

In [11]:
ax = molprop.copy()
scale = [1,1,1,1,1,1,1,1,1,1,1]

##### Calculate Axes and Scales for Plots #####

axmin = []
axmax = []

for i in range(0,len(molprop)):
    #print(i)
    if ax[i] in ['qed','fsp3']:
        axmin.append(0)
        axmax.append(1)
    else:
        amin = min(druglike_max[i], drugs_min[i], del2_min[i], del1_min[i])
        if amin > 0:
            amin = 0
        amax = max(druglike_max[i], drugs_max[i], del2_max[i], del1_max[i])
        scale[i] = amax - amin
        axmin.append(amin)
        axmax.append(amax)
                   
print('Scales : ',scale)
print()
print('Maxima : ',axmin)
print()
print('Minima : ',axmax)


Scales :  [115.0, 1663.492351879999, 1.0, 43.0, 25.0, 30.0, 13.0, 35.0, 0.9437114379351169, 26.74919999999999, 702.02]

Maxima :  [0, 0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, -8.895300000000017, 0.0]

Minima :  [115.0, 1663.492351879999, 1.0, 43.0, 25.0, 30.0, 13.0, 35.0, 0.9437114379351169, 17.853899999999975, 702.02]


In [19]:
##### Calculate trace data #####

std_fact = 2

### Druglikeness

dl_min = []
dl_max = []

for i in range(0,len(molprop)):
    dmax = (druglike_max[i]-axmin[i])/scale[i]
    dmin = (druglike_min[i]-axmin[i])/scale[i]
    dl_max.append(dmax)
    dl_min.append(dmin)

print(dl_min)
print(dl_max)
print()

### Drug Collection

drug_min = []
drug_max = []

for i in range(0,len(molprop)):
    dmax = drugs_mean[i] + std_fact*drugs_std[i]
    dmin = drugs_mean[i] - std_fact*drugs_std[i]
    #print(molprop[i],drugs_mean[i],drugs_std[i],dmax,dmin)
    dmax = (dmax-axmin[i])/scale[i]
    dmin = (dmin-axmin[i])/scale[i]
    if dmin<0:
        dmin = 0
    #print(molprop[i],drugs_mean[i],drugs_std[i],dmax,dmin)
    drug_max.append(dmax)
    drug_min.append(dmin)
    
print(drug_min)
print(drug_max)
print()

### DEL2 Collection

del2p_min = []
del2p_max = []

for i in range(0,len(molprop)):
    dmax = del2_mean[i] + std_fact*del2_std[i]
    dmin = del2_mean[i] - std_fact*del2_std[i]    
    dmax = (dmax-axmin[i])/scale[i]
    dmin = (dmin-axmin[i])/scale[i]
    if dmin<0:
        dmin = 0
    del2p_max.append(dmax)
    del2p_min.append(dmin)
    
print(del2p_min)
print(del2p_max)
print()

### DEL1 Collection

del1p_min = []
del1p_max = []

for i in range(0,len(molprop)):
    dmax = del1_mean[i] + std_fact*del1_std[i]
    dmin = del1_mean[i] - std_fact*del1_std[i]    
    dmax = (dmax-axmin[i])/scale[i]
    dmin = (dmin-axmin[i])/scale[i]
    if dmin<0:
        dmin = 0
    del1p_max.append(dmax)
    del1p_min.append(dmin)
    
print(del1p_min)
print(del1p_max)

[0.2608695652173913, 0.09618318943226623, 0.0, 0.0, 0.0, 0.0, 0.0, 0.17142857142857143, 0.42385837865356174, 0.33254452469606643, 0.0]
[0.4782608695652174, 0.30057246697583195, 0.5, 0.23255813953488372, 0.2, 0.03333333333333333, 0.23076923076923078, 0.2857142857142857, 0.6357875679803425, 0.5194660027215775, 0.19942451782000514]

[0, 0, 0, 0, 0, 0, 0, 0, 0.13704849898035934, 0.21202128684158728, 0]
[0.41752017882356696, 0.41172738538861786, 0.9692648073649767, 0.3208935348809258, 0.3122356893415111, 0.2926442992395945, 0.45004903972139626, 0.3727808349471888, 1.0155910461149467, 0.6022791095653641, 0.3055540241754811]

[0.16656445266628966, 0.16698522876846242, 0.15335544598544854, 0.1260628662197918, 0.10850540151015825, 0.03305787196237301, 0.0010220197217084986, 0.1194904857024104, 0.040050396060874384, 0.21667477802003848, 0.12468700891588404]
[0.38962655178781114, 0.3754008126954943, 0.8046372501099088, 0.3446793975127043, 0.35623139648180424, 0.16834051966668706, 0.38738846607771

In [20]:
##### Plot Radar Plots #####
ax_max = [1,1,1,1,1,1,1,1,1,1,1,1]
### Close Traces
ax.append(ax[0])
dl_max.append(dl_max[0])
dl_min.append(dl_min[0])
drug_max.append(drug_max[0])
drug_min.append(drug_min[0])
del2p_max.append(del2p_max[0])
del2p_min.append(del2p_min[0])
del1p_max.append(del1p_max[0])
del1p_min.append(del1p_min[0])

#print(drug_max)
#print(drug_min)

### Drugs & Druglikeness ###

fig_drug_like = go.Figure()

fig_drug_like.add_trace(go.Scatterpolar(name = 'leveling', showlegend=False, #visible = False,
  r=ax_max,
  theta=ax,
    line = dict(width = 0, color = 'silver'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig_drug_like.add_trace(go.Scatterpolar(name = 'Drug Collection', showlegend=False,
  r=drug_min,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig_drug_like.add_trace(go.Scatterpolar(name = 'Drug Collection',
  r=drug_max,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
    #opacity = .25,
  fill='tonext'#, fillcolor ='lightblue'
))

fig_drug_like.add_trace(go.Scatterpolar(
    name = 'Druglikeness (lower)', showlegend=False,
  r=dl_min,
  theta=ax,
    opacity =.25,
    #fillcolor = 'white',
    marker = dict(size = 1),
    line = dict(color = 'red')
  #fill='none'
))

fig_drug_like.add_trace(go.Scatterpolar(name = 'Druglikeness',
  r=dl_max,
  theta=ax,
    line = dict(color = 'red'),
    opacity = .5,
    marker = dict(size = 1),
    fillcolor = 'red',
  fill='tonext'
))




fig_drug_like.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig_drug_like.show()

### DEL2 & Druglikeness ###

fig_del2_like = go.Figure()

fig_del2_like.add_trace(go.Scatterpolar(name = 'leveling', showlegend=False, #visible = False,
  r=ax_max,
  theta=ax,
    line = dict(width = 0, color = 'silver'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))

fig_del2_like.add_trace(go.Scatterpolar(name = 'DEL2 Library', showlegend=False,
  r=del2p_min,
  theta=ax,
    line = dict(width = 2, color = 'green'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig_del2_like.add_trace(go.Scatterpolar(name = 'Zafrens Library 2',
  r=del2p_max,
  theta=ax,
    line = dict(width = 2, color = 'green'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
    #opacity = .25,
  fill='tonext'#, fillcolor ='lightblue'
))

fig_del2_like.add_trace(go.Scatterpolar(
    name = 'Druglikeness (lower)', showlegend=False,
  r=dl_min,
  theta=ax,
    opacity =.25,
    #fillcolor = 'white',
    marker = dict(size = 1),
    line = dict(color = 'red')
  #fill='none'
))

fig_del2_like.add_trace(go.Scatterpolar(name = 'Druglikeness',
  r=dl_max,
  theta=ax,
    line = dict(color = 'red'),
    opacity = .5,
    marker = dict(size = 1),
    fillcolor = 'red',
  fill='tonext'
))




fig_del2_like.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig_del2_like.show()

### DEL2 & Drug ###

fig_del2_drug = go.Figure()

fig_del2_drug.add_trace(go.Scatterpolar(name = 'leveling', showlegend=False, #visible = False,
  r=ax_max,
  theta=ax,
    line = dict(width = 0, color = 'silver'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))

fig_del2_drug.add_trace(go.Scatterpolar(name = 'Drug Collection', showlegend=False,
  r=drug_min,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig_del2_drug.add_trace(go.Scatterpolar(name = 'Drug Collection',
  r=drug_max,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
    #opacity = .25,
  fill='tonext'#, fillcolor ='lightblue'
))

fig_del2_drug.add_trace(go.Scatterpolar(name = 'DEL2 Library', showlegend=False,
  r=del2p_min,
  theta=ax,
    line = dict(width = 2, color = 'green'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig_del2_drug.add_trace(go.Scatterpolar(name = 'Zafrens Library 2',
  r=del2p_max,
  theta=ax,
    line = dict(width = 2, color = 'green'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
    #opacity = .25,
  fill='tonext'#, fillcolor ='lightblue'
))


fig_del2_drug.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig_del2_drug.show()


### DEL2 & Drug & Druglikeness ###

fig_del2_drug_like = go.Figure()

fig_del2_drug.add_trace(go.Scatterpolar(name = 'leveling', showlegend=False, #visible = False,
  r=ax_max,
  theta=ax,
    line = dict(width = 0, color = 'silver'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))

fig_del2_drug_like.add_trace(go.Scatterpolar(name = 'Drug Collection', showlegend=False,
  r=drug_min,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig_del2_drug_like.add_trace(go.Scatterpolar(name = 'Drug Collection',
  r=drug_max,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
    #opacity = .25,
  fill='tonext'#, fillcolor ='lightblue'
))

fig_del2_drug_like.add_trace(go.Scatterpolar(name = 'DEL2 Library', showlegend=False,
  r=del2p_min,
  theta=ax,
    line = dict(width = 2, color = 'green'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig_del2_drug_like.add_trace(go.Scatterpolar(name = 'Zafrens Library 2',
  r=del2p_max,
  theta=ax,
    line = dict(width = 2, color = 'green'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
    #opacity = .25,
  fill='tonext'#, fillcolor ='lightblue'
))

fig_del2_drug_like.add_trace(go.Scatterpolar(
    name = 'Druglikeness (lower)', showlegend=False,
  r=dl_min,
  theta=ax,
    opacity =.25,
    #fillcolor = 'white',
    marker = dict(size = 1),
    line = dict(color = 'red')
  #fill='none'
))

fig_del2_drug_like.add_trace(go.Scatterpolar(name = 'Druglikeness',
  r=dl_max,
  theta=ax,
    line = dict(color = 'red'),
    opacity = .5,
    marker = dict(size = 1),
    fillcolor = 'red',
  fill='tonext'
))


fig_del2_drug_like.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig_del2_drug_like.show()


### DEL1 & Druglikeness ###

fig_del1_like = go.Figure()

fig_del1_like.add_trace(go.Scatterpolar(name = 'leveling', showlegend=False, #visible = False,
  r=ax_max,
  theta=ax,
    line = dict(width = 0, color = 'silver'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))

fig_del1_like.add_trace(go.Scatterpolar(name = 'DEL2 Library', showlegend=False,
  r=del1p_min,
  theta=ax,
    line = dict(width = 2, color = 'yellow'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig_del1_like.add_trace(go.Scatterpolar(name = 'Zafrens Library 1',
  r=del1p_max,
  theta=ax,
    line = dict(width = 2, color = 'yellow'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
    #opacity = .25,
  fill='tonext'#, fillcolor ='lightblue'
))

fig_del1_like.add_trace(go.Scatterpolar(
    name = 'Druglikeness (lower)', showlegend=False,
  r=dl_min,
  theta=ax,
    opacity =.25,
    #fillcolor = 'white',
    marker = dict(size = 1),
    line = dict(color = 'red')
  #fill='none'
))

fig_del1_like.add_trace(go.Scatterpolar(name = 'Druglikeness',
  r=dl_max,
  theta=ax,
    line = dict(color = 'red'),
    opacity = .5,
    marker = dict(size = 1),
    fillcolor = 'red',
  fill='tonext'
))


fig_del1_like.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig_del1_like.show()

### DEL1 & Drug ###

fig_del1_drug = go.Figure()

fig_del1_drug.add_trace(go.Scatterpolar(name = 'leveling', showlegend=False, #visible = False,
  r=ax_max,
  theta=ax,
    line = dict(width = 0, color = 'silver'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))

fig_del1_drug.add_trace(go.Scatterpolar(name = 'Drug Collection', showlegend=False,
  r=drug_min,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig_del1_drug.add_trace(go.Scatterpolar(name = 'Drug Collection',
  r=drug_max,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
    #opacity = .25,
  fill='tonext'#, fillcolor ='lightblue'
))

fig_del1_drug.add_trace(go.Scatterpolar(name = 'DEL1 Library', showlegend=False,
  r=del1p_min,
  theta=ax,
    line = dict(width = 2, color = 'yellow'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig_del1_drug.add_trace(go.Scatterpolar(name = 'Zafrens Library 1',
  r=del1p_max,
  theta=ax,
    line = dict(width = 2, color = 'yellow'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
    #opacity = .25,
  fill='tonext'#, fillcolor ='lightblue'
))


fig_del1_drug.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig_del1_drug.show()

### DEL1 & DEL2 ###

fig_del1_del2 = go.Figure()

fig_del1_del2.add_trace(go.Scatterpolar(name = 'leveling', showlegend=False, #visible = False,
  r=ax_max,
  theta=ax,
    line = dict(width = 0, color = 'silver'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))

fig_del1_del2.add_trace(go.Scatterpolar(name = 'DEL2 Library', showlegend=False,
  r=del2p_min,
  theta=ax,
    line = dict(width = 2, color = 'green'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig_del1_del2.add_trace(go.Scatterpolar(name = 'Zafrens Library 2',
  r=del2p_max,
  theta=ax,
    line = dict(width = 2, color = 'green'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
    #opacity = .25,
  fill='tonext'#, fillcolor ='lightblue'
))



fig_del1_del2.add_trace(go.Scatterpolar(name = 'DEL1 Library', showlegend=False,
  r=del1p_min,
  theta=ax,
    line = dict(width = 2, color = 'yellow'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig_del1_del2.add_trace(go.Scatterpolar(name = 'Zafrens Library 1',
  r=del1p_max,
  theta=ax,
    line = dict(width = 2, color = 'yellow'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
    #opacity = .25,
  fill='tonext'#, fillcolor ='lightblue'
))


fig_del1_del2.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig_del1_del2.show()



In [None]:
val = []
ax = molprop.copy()
scale = [1,1,1,1,1,1,1,1,1,1,1]
val_width = [5,12,15,8,16,32,64,10,20,40,10]
maxval = 0

dl_max = druglike_max
dl_min = druglike_min

for i in range(0,len(drugs_mean)):
        amin = drugs_min[i]
        if amin > 0:
            amin = 0
        amax = drugs_max[i]
        scale[i] = amax - amin
        j = (j-amin)/scale[i]
        val.append(j)
        dl_max[i] = (druglike_max[i]-amin)/scale[i]
        dl_min[i] = (druglike_min[i]-amin)/scale[i]
        val_width[i] = (100*drugs_std[i])/scale[i]
        maxval = max(j, maxval)

print(scale)
print(dl_max)
print(dl_min)  
print(val)
print(val_width)


ax.append(ax[0])
val.append(val[0])
dl_max.append(dl_max[0])
dl_min.append(dl_min[0])

fig = go.Figure()

fig.add_trace(go.Scatterpolar(
    name = 'Druglikeness (lower)',
  r=dl_min,
  theta=ax,
    opacity =.25,
    #fillcolor = 'white',
    line = dict(color = 'green')
  #fill='none'
))

fig.add_trace(go.Scatterpolar(name = 'Druglikeness (upper)',
  r=dl_max,
  theta=ax,
    line = dict(color = 'green'),
    opacity = .5,
    fillcolor = 'lightgreen',
  fill='tonext'
))


fig.add_trace(go.Scatterpolar(name = 'Drug Collection',
  r=val,
  theta=ax,
    line = dict(width = 5, color = 'blue'),
    marker = dict(size=val_width, sizeref = maxval, sizemode = 'diameter', color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    
  #fill='toself'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig.show()

In [None]:
val_max = []
val_min = []

for i in range(0,len(val)-1):
    j = val[i]+drugs_std[i]/scale[i]
    val_max.append(j)
    j = val[i]-drugs_std[i]/scale[i]
    val_min.append(j)
    
val_min.append(val_min[0])
val_max.append(val_max[0])
    
print(val_min)
print(val_max)
    
fig2 = go.Figure()

fig2.add_trace(go.Scatterpolar(
    name = 'Druglikeness (lower)', showlegend=False,
  r=dl_min,
  theta=ax,
    opacity =.25,
    #fillcolor = 'white',
    line = dict(color = 'green')
  #fill='none'
))

fig2.add_trace(go.Scatterpolar(name = 'Druglikeness (upper)',
  r=dl_max,
  theta=ax,
    line = dict(color = 'green'),
    opacity = .5,
    fillcolor = 'lightgreen',
  fill='tonext'
))


fig2.add_trace(go.Scatterpolar(name = 'Drug Collection', showlegend=False,
  r=val_min,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig2.add_trace(go.Scatterpolar(name = 'Drug Collection',
  r=val_max,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
  fill='tonext'
))

fig2.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig2.show()

In [None]:
fig.show()
fig2.show()

In [None]:
print(dir(rdmd.Mol))

In [None]:
dfm = pd.read_csv("../DEL2_enum.csv")
dfr=dfm['Structure']
print(dfr)

del2_list = []

for i in range(0, len(dfr)):
    j = dfr[i]
    #print(j, type(j))
    if type(j) == str:
        j = j.split('.')
        for k in j:
            try:
                #print(k)
                m = Chem.MolFromSmiles(k)
                numatoms = rdch.Mol.GetNumAtoms(m)
                if numatoms >5:
                    #print(k, numatoms)
                    a = (k,m)
                    if a not in drug_list:
                        del2_list.append(a)
            except:
                #print(j,k, ' - no valid SMILES/Molecule')
                pass
        
print()
print(len(del2_list))
#print()
#print(del2_list)

In [None]:
drug_temp = []


for i in del2_list:
    smiles = i[0]
    m = i[1]
    numatoms = rdch.Mol.GetNumHeavyAtoms(m)
    mw = rdd.ExactMolWt(m)
    fsp3 = rdmd.CalcFractionCSP3(m)
    HBA = rdmd.CalcNumLipinskiHBA(m)
    HBD = rdmd.CalcNumLipinskiHBD(m)
    stereo = rdmd.CalcNumAtomStereoCenters(m)
    ring = rdmd.CalcNumRings(m)
    rotbond = rdmd.CalcNumRotatableBonds(m)
    qed = Chem.QED.qed(m)
    logd = Chem.Crippen.MolLogP(m)
    tpsa = rdmd.CalcTPSA(m)
    #print(smiles, numatoms,mw, fsp3, HBA, HBD, stereo, ring, rotbond, qed, logd)
    a = [smiles, numatoms,mw, fsp3, HBA, HBD, stereo, ring, rotbond, qed, logd, tpsa]
    drug_temp.append(a)
    
df_del2 = pd.DataFrame(drug_temp, columns = ['smiles', 'numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd', 'tpsa'])

print(df_del2)

In [None]:
del2_mean = df_del2[['numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd']].mean()
del2_std = df_del2[['numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd']].std()
del2_min = df_del2[['numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd']].min()
del2_max = df_del2[['numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd']].max()
print(del2_mean)
print(del2_std)
print(del2_min)
print(drugs_max)

val = []
ax = molprop.copy()
#dl_max = [55,500,0.5,10,5,1,3,10,0.6,5]
#dl_min = [30,160,0,0,0,0,0,6,0.4,0]
scale = [1,1,1,1,1,1,1,1,1,1]
val_width = [5,12,15,8,16,32,64,10,20,40]
val_min = []
val_max = []

for i in range(0,len(del2_mean)):
    val_width[i] = del2_std[i]

for i in range(0,len(del2_mean)):
    #print(i)
    j = del2_mean[i]
    if ax[i] in ['qed','fsp3']:
        val.append(j)
        jmin = j-val_width[i]
        jmax = j+val_width[i]
        val_min.append(jmin)
        val_max.append(jmax)
        print(i,j, ax[i])
        val_width[i] = del2_std[i]
    else:
        amin = min(drugs_min[i],druglike_min[i],del2_min[i])
        if amin > 0:
            amin = 0
        amax = max(drugs_max[i],druglike_max[i],del2_max[i])
        scale[i] = amax - amin
        jmin = (j-val_width[i]-amin)/scale[i]
        jmax = (j+val_width[i]-amin)/scale[i]
        val_min.append(jmin)
        val_max.append(jmax)
        j = (j-amin)/scale[i]
        val.append(j)
        dl_max[i] = (druglike_max[i]-amin)/scale[i]
        dl_min[i] = (druglike_min[i]-amin)/scale[i]
        val_width[i] = del2_std[i]/scale[i]

print()
print(scale)
print(val)
print('Likeness')
print(dl_max)
print(dl_min)  
print('Drugs')
print(val_min)
print(val_max)

ax.append(ax[0])
val.append(val[0])
val_min.append(val_min[0])
val_max.append(val_max[0])
dl_max.append(dl_max[0])
dl_min.append(dl_min[0])

fig = go.Figure()

fig.add_trace(go.Scatterpolar(
    name = 'Druglikeness (lower)',
  r=dl_min,
  theta=ax,
    opacity =.25,
    #fillcolor = 'white',
    line = dict(color = 'green')
  #fill='none'
))

fig.add_trace(go.Scatterpolar(name = 'Druglikeness (upper)',
  r=dl_max,
  theta=ax,
    line = dict(color = 'green'),
    opacity = .5,
    fillcolor = 'silver',
  fill='tonext'
))


fig.add_trace(go.Scatterpolar(name = 'DEL2',
  r=val,
  theta=ax,
    line = dict(width = 5, color = 'blue'),
    marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    
  #fill='toself'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig.show()


##### Double Trace #####

#val_max = []

#for i in val:
#    j = i+.1
#    val_max.append(j)
    
fig2 = go.Figure()

fig2.add_trace(go.Scatterpolar(
    name = 'Druglikeness (lower)', showlegend=False,
  r=dl_min,
  theta=ax,
    opacity =.25,
    #fillcolor = 'white',
    line = dict(color = 'green')
  #fill='none'
))

fig2.add_trace(go.Scatterpolar(name = 'Druglikeness (upper)',
  r=dl_max,
  theta=ax,
    line = dict(color = 'green'),
    opacity = .5,
    fillcolor = 'silver',
  fill='tonext'
))


fig2.add_trace(go.Scatterpolar(name = 'Del2', showlegend=False,
  r=val_min,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1)
    
  #fill='toself'
))


fig2.add_trace(go.Scatterpolar(name = 'DEL2',
  r=val_max,
  theta=ax,
    line = dict(width = 2, color = 'blue'),
    #marker = dict(size=val_width, color = 'blue', symbol = 'circle', gradient = dict(type = 'radial',color = 'lightblue'))
    marker = dict(size = 1),
  fill='tonext'
))

fig2.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=True
)

fig2.show()

In [None]:
import sys
sys.path

In [None]:
sys.path.append('../pkg_mod')

In [None]:
struct_list = []
print(struct_raw)
print()

for i in struct_raw:
    #print(i)
    a = i[0].split('.')
    #print(a)
    for j in a:
        b = (j,)
        #print(j, b)
        if b not in struct_list:
            struct_list.append(b)

print(struct_list)
        

In [None]:
dfm = pd.read_excel("../Drug_Libraries/L1021-DiscoveryProbe-FDA-approved-Drug-Library.xlsx", sheet_name=1)
#print(dfm)
dfr = dfm['SMILES']
#print(dfr)

drug_list = list_prep(dfr)

for i in range(0, len(dfr)):
    j = dfr[i]
    #print(j, type(j))
    if type(j) == str:
        j = j.split('.')
        for k in j:
            try:
                #print(k)
                m = Chem.MolFromSmiles(k)
                numatoms = rdch.Mol.GetNumAtoms(m)
                if numatoms >5:
                    #print(k, numatoms)
                    a = (k,m)
                    if a not in drug_list:
                        drug_list.append(a)
            except:
                #print(j,k, ' - no valid SMILES/Molecule')
                pass
        

print(len(drug_list))
print()
print(drug_list)

In [None]:
drug_temp = []


for i in drug_list:
    smiles = i[0]
    m = i[1]
    numatoms = rdch.Mol.GetNumHeavyAtoms(m)
    mw = rdd.ExactMolWt(m)
    fsp3 = rdmd.CalcFractionCSP3(m)
    HBA = rdmd.CalcNumLipinskiHBA(m)
    HBD = rdmd.CalcNumLipinskiHBD(m)
    stereo = rdmd.CalcNumAtomStereoCenters(m)
    ring = rdmd.CalcNumRings(m)
    rotbond = rdmd.CalcNumRotatableBonds(m)
    qed = Chem.QED.qed(m)
    logd = Chem.Crippen.MolLogP(m)
    tpsa = rdmd.CalcTPSA(m)
    #print(smiles, numatoms,mw, fsp3, HBA, HBD, stereo, ring, rotbond, qed, logd)
    a = [smiles, numatoms,mw, fsp3, HBA, HBD, stereo, ring, rotbond, qed, logd,tpsa]
    drug_temp.append(a)
    
#df_drugs = pd.DataFrame(drug_temp, columns = ['smiles', 'numatoms','mw', 'fsp3', 'HBA', 'HBD', 'stereo', 'ring', 'rotbond', 'qed', 'logd','tpsa'])
d = ['smiles']
for i in molprop:
    d.append(i)
    
print(d)
    
df_drugs = pd.DataFrame(drug_temp, columns = d)
print(df_drugs)