In [27]:
import pandas as pd
import time
from rdkit import DataStructs, Chem

In [28]:
def get_fingerprints(smiles):
    try:
        fp = Chem.MolFromSmiles(smiles)
        print(".",end="")
    except Exception as e:
        print(e,'\t',smiles,'\n')
        fp = None
    return fp

In [86]:
def process(df,label):
    begin = time.time()
    smiles_list = pd.Series(list(df['Canonical_SMILES']))
    df['Fingerprints'] = smiles_list.map(get_fingerprints)
    end = time.time()

    elapsed = end - begin
    print(elapsed)
    return df[['ID','Canonical_SMILES','Fingerprints',f'{label}']]

----

# DILI

In [87]:
df = pd.read_csv('./DILI/final/DILI_Final.csv')
df

Unnamed: 0,ID,Canonical_SMILES,DILI
0,ref1_00001,CNCC[C@@H](Oc1ccccc1C)c1ccccc1,1
1,ref1_00002,CN1CCC[C@@H]1CCO[C@](C)(c1ccccc1)c1ccc(Cl)cc1,0
2,ref1_00003,CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21,1
3,ref1_00004,CN1CCN(C2=Nc3cc(Cl)ccc3Nc3ccccc32)CC1,1
4,ref1_00005,NC(=O)C(c1ccccc1)(c1ccccc1)[C@@H]1CCN(CCc2ccc3...,0
...,...,...,...
2256,ref6_01298,CN1C[C@H](C(=O)N[C@]2(C)O[C@@]3(O)[C@@H]4CCCN4...,0
2257,ref6_01300,CNC[C@H](O)c1ccc(O)c(O)c1,0
2258,ref6_01301,CCCCCCCC/C=C/CCCCCCCC(=O)O,0
2259,ref6_01304,NC(N)=NCCN1CCCCCCC1,0


In [88]:
df = process(df,'DILI')

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [89]:
df

Unnamed: 0,ID,Canonical_SMILES,Fingerprints,DILI
0,ref1_00001,CNCC[C@@H](Oc1ccccc1C)c1ccccc1,<rdkit.Chem.rdchem.Mol object at 0x0000018348E...,1
1,ref1_00002,CN1CCC[C@@H]1CCO[C@](C)(c1ccccc1)c1ccc(Cl)cc1,<rdkit.Chem.rdchem.Mol object at 0x0000018348E...,0
2,ref1_00003,CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21,<rdkit.Chem.rdchem.Mol object at 0x0000018348E...,1
3,ref1_00004,CN1CCN(C2=Nc3cc(Cl)ccc3Nc3ccccc32)CC1,<rdkit.Chem.rdchem.Mol object at 0x0000018348E...,1
4,ref1_00005,NC(=O)C(c1ccccc1)(c1ccccc1)[C@@H]1CCN(CCc2ccc3...,<rdkit.Chem.rdchem.Mol object at 0x0000018348E...,0
...,...,...,...,...
2256,ref6_01298,CN1C[C@H](C(=O)N[C@]2(C)O[C@@]3(O)[C@@H]4CCCN4...,<rdkit.Chem.rdchem.Mol object at 0x0000018348F...,0
2257,ref6_01300,CNC[C@H](O)c1ccc(O)c(O)c1,<rdkit.Chem.rdchem.Mol object at 0x0000018348F...,0
2258,ref6_01301,CCCCCCCC/C=C/CCCCCCCC(=O)O,<rdkit.Chem.rdchem.Mol object at 0x0000018348F...,0
2259,ref6_01304,NC(N)=NCCN1CCCCCCC1,<rdkit.Chem.rdchem.Mol object at 0x0000018348F...,0


In [90]:
df.to_csv('./DILI/final/DILI_Fingerprints.csv')

----

# Acute Oral Toxicity

In [91]:
df = pd.read_csv('./Acute Oral Toxicity/final/AOT_Final.csv')
df

Unnamed: 0.1,Unnamed: 0,ID,Canonical_SMILES,Acute Oral Toxicity
0,2085,ref1_00005,CNc1ccc([N+](=O)[O-])cc1,0
1,1775,ref1_00006,COc1ccc([N+](=O)[O-])cc1,0
2,5076,ref1_00007,CC(C)c1ccc(C(C)C)cc1,0
3,832,ref1_00008,O=C(Cl)c1ccc(C(=O)Cl)cc1,0
4,2780,ref1_00011,CCOc1ccc([N+](=O)[O-])cc1,0
...,...,...,...,...
5807,5058,ref1_11878,CC(C)CC(=O)CC(C)C,0
5808,2143,ref1_11879,CNC(=O)Oc1cccc2c1OC(C)(C)C2,1
5809,5595,ref1_11885,c1ccccc1,0
5810,1542,ref1_11886,COP(=O)(OC)OC=C(Cl)Cl,1


In [93]:
df = process(df,'Acute Oral Toxicity')

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [95]:
df

Unnamed: 0,ID,Canonical_SMILES,Fingerprints,Acute Oral Toxicity
0,ref1_00005,CNc1ccc([N+](=O)[O-])cc1,<rdkit.Chem.rdchem.Mol object at 0x00000183503...,0
1,ref1_00006,COc1ccc([N+](=O)[O-])cc1,<rdkit.Chem.rdchem.Mol object at 0x00000183500...,0
2,ref1_00007,CC(C)c1ccc(C(C)C)cc1,<rdkit.Chem.rdchem.Mol object at 0x00000183500...,0
3,ref1_00008,O=C(Cl)c1ccc(C(=O)Cl)cc1,<rdkit.Chem.rdchem.Mol object at 0x00000183500...,0
4,ref1_00011,CCOc1ccc([N+](=O)[O-])cc1,<rdkit.Chem.rdchem.Mol object at 0x00000183500...,0
...,...,...,...,...
5807,ref1_11878,CC(C)CC(=O)CC(C)C,<rdkit.Chem.rdchem.Mol object at 0x00000183500...,0
5808,ref1_11879,CNC(=O)Oc1cccc2c1OC(C)(C)C2,<rdkit.Chem.rdchem.Mol object at 0x00000183500...,1
5809,ref1_11885,c1ccccc1,<rdkit.Chem.rdchem.Mol object at 0x00000183500...,0
5810,ref1_11886,COP(=O)(OC)OC=C(Cl)Cl,<rdkit.Chem.rdchem.Mol object at 0x00000183500...,1


In [96]:
df.to_csv('./Acute Oral Toxicity/final/AOT_Fingerprints.csv')

----

# AMES Mutagenicity

In [97]:
df = pd.read_csv('./AMES Mutagenesis/final/AMES_Final.csv')
df

Unnamed: 0,ID,Canonical_SMILES,Mutagenicity
0,ref1_00001,O=c1c2ccccc2c(=O)c2c1ccc1c2[nH]c2c3c(=O)c4cccc...,0
1,ref1_00008,CC(=O)OC1(C(C)=O)CCC2C3C=C(Cl)C4=CC(=O)OCC4(C)...,0
2,ref1_00009,Nc1nc(N)nc(N)n1,0
3,ref1_00010,Cc1ccc(N=Nc2c(O)ccc3ccccc23)c([N+](=O)[O-])c1,1
4,ref1_00011,CC(C)CC(=O)Nc1snc2ccccc12,0
...,...,...,...
8629,ref4_08306,O=C(O)CCC(=O)OCC(N=C(O)C(Cl)Cl)C(O)c1ccc([N+](...,0
8630,ref4_08309,O=[N+]([O-])c1ccc(CNc2nc[nH]c3ncnc2-3)cc1,0
8631,ref4_08328,O=[N+]([O-])c1ccc(-[n+]2nc(-c3ccccc3)nn2-c2ccc...,0
8632,ref4_08330,O=C(C=Cc1cccc([N+](=O)[O-])c1)c1ccccc1,0


In [98]:
df = process(df,'Mutagenicity')


........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [99]:
df

Unnamed: 0,ID,Canonical_SMILES,Fingerprints,Mutagenicity
0,ref1_00001,O=c1c2ccccc2c(=O)c2c1ccc1c2[nH]c2c3c(=O)c4cccc...,<rdkit.Chem.rdchem.Mol object at 0x00000183630...,0
1,ref1_00008,CC(=O)OC1(C(C)=O)CCC2C3C=C(Cl)C4=CC(=O)OCC4(C)...,<rdkit.Chem.rdchem.Mol object at 0x00000183630...,0
2,ref1_00009,Nc1nc(N)nc(N)n1,<rdkit.Chem.rdchem.Mol object at 0x00000183630...,0
3,ref1_00010,Cc1ccc(N=Nc2c(O)ccc3ccccc23)c([N+](=O)[O-])c1,<rdkit.Chem.rdchem.Mol object at 0x00000183630...,1
4,ref1_00011,CC(C)CC(=O)Nc1snc2ccccc12,<rdkit.Chem.rdchem.Mol object at 0x0000018348D...,0
...,...,...,...,...
8629,ref4_08306,O=C(O)CCC(=O)OCC(N=C(O)C(Cl)Cl)C(O)c1ccc([N+](...,<rdkit.Chem.rdchem.Mol object at 0x00000183502...,0
8630,ref4_08309,O=[N+]([O-])c1ccc(CNc2nc[nH]c3ncnc2-3)cc1,<rdkit.Chem.rdchem.Mol object at 0x00000183502...,0
8631,ref4_08328,O=[N+]([O-])c1ccc(-[n+]2nc(-c3ccccc3)nn2-c2ccc...,<rdkit.Chem.rdchem.Mol object at 0x00000183502...,0
8632,ref4_08330,O=C(C=Cc1cccc([N+](=O)[O-])c1)c1ccccc1,<rdkit.Chem.rdchem.Mol object at 0x00000183502...,0


In [100]:
df.to_csv('./AMES Mutagenesis/final/AMES_Fingerprints.csv')

----

# hERG

In [101]:
df = pd.read_csv('./hERG/final/hERG_Final.csv')
df

Unnamed: 0,ID,Canonical_SMILES,hERG
0,ref1_00001,Brc1ccc(-n2ccc3ccncc32)cc1,1
1,ref1_00002,Brc1cnc(NCC2CC2)nc1,1
2,ref1_00003,C(#Cc1cc(-c2[nH]nc3c2Cc2cc(Cn4ccnc4)ccc2-3)cs1...,1
3,ref1_00004,C(#Cc1cc(-c2[nH]nc3c2Cc2cc(Cn4cncn4)ccc2-3)cs1...,1
4,ref1_00005,C1=C/COCc2cc(ccc2OCCN2CCCC2)Nc2nccc(n2)-c2ccc(...,1
...,...,...,...
13896,ref4_10415,CC1(C)CN(c2ccc3c(c2)[C@]2(COC(N)=N2)c2cc(-c4cn...,0
13897,ref4_10416,NC1=N[C@@]2(CO1)c1cc(-c3cncnc3)ccc1Oc1ccc(N3CC...,0
13898,ref4_10417,CC(C)(O)COc1ccc2c(c1)[C@]1(COC(N)=N1)c1cc(-c3c...,0
13899,ref4_10418,CC1(COc2ccc3c(c2)[C@]2(COC(N)=N2)c2cc(-c4cncnc...,0


In [102]:
df = process(df,'hERG')

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [103]:
df

Unnamed: 0,ID,Canonical_SMILES,Fingerprints,hERG
0,ref1_00001,Brc1ccc(-n2ccc3ccncc32)cc1,<rdkit.Chem.rdchem.Mol object at 0x00000183652...,1
1,ref1_00002,Brc1cnc(NCC2CC2)nc1,<rdkit.Chem.rdchem.Mol object at 0x00000183652...,1
2,ref1_00003,C(#Cc1cc(-c2[nH]nc3c2Cc2cc(Cn4ccnc4)ccc2-3)cs1...,<rdkit.Chem.rdchem.Mol object at 0x00000183652...,1
3,ref1_00004,C(#Cc1cc(-c2[nH]nc3c2Cc2cc(Cn4cncn4)ccc2-3)cs1...,<rdkit.Chem.rdchem.Mol object at 0x00000183652...,1
4,ref1_00005,C1=C/COCc2cc(ccc2OCCN2CCCC2)Nc2nccc(n2)-c2ccc(...,<rdkit.Chem.rdchem.Mol object at 0x00000183652...,1
...,...,...,...,...
13896,ref4_10415,CC1(C)CN(c2ccc3c(c2)[C@]2(COC(N)=N2)c2cc(-c4cn...,<rdkit.Chem.rdchem.Mol object at 0x00000183750...,0
13897,ref4_10416,NC1=N[C@@]2(CO1)c1cc(-c3cncnc3)ccc1Oc1ccc(N3CC...,<rdkit.Chem.rdchem.Mol object at 0x00000183750...,0
13898,ref4_10417,CC(C)(O)COc1ccc2c(c1)[C@]1(COC(N)=N1)c1cc(-c3c...,<rdkit.Chem.rdchem.Mol object at 0x00000183750...,0
13899,ref4_10418,CC1(COc2ccc3c(c2)[C@]2(COC(N)=N2)c2cc(-c4cncnc...,<rdkit.Chem.rdchem.Mol object at 0x00000183750...,0


In [104]:
df.to_csv('./hERG/final/hERG_Fingerprints.csv')