In [151]:
import pandas as pd
import numpy as np 
from rdkit import Chem
from rdkit.Chem import AllChem
from sklearn import datasets, metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb
from catboost import CatBoostRegressor,CatBoostClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import math
from rdkit.Chem import Descriptors
from rdkit import Chem
from mordred import Calculator,descriptors
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV,RepeatedStratifiedKFold,train_test_split,StratifiedKFold,KFold,cross_val_score
from sklearn.feature_selection import RFE,VarianceThreshold,RFECV,SelectFromModel
from sklearn import metrics

In [152]:
#Integrating cell morphology with gene expression and chemical structure to aid mitochondrial toxicity detection
#qHTS assay for small molecule disruptors of the mitochondrial membrane potential
#2,000 HepG2 cells in 5 uL/well
#qHTS assay for small molecule disruptors of the mitochondrial membrane potential - cell viability
#2,000 HepG2 cells in 5 uL/well were dispensed into black,CellTiter-Glo reagent was added at 4 uL/well and incubated 30 min at room temperature
#AID 720637
#AID 720635 and AID 720634
#測ATP量以及染色比例取交集
df1 = pd.read_csv("data/cp_pre")
d3 = pd.DataFrame()
d3["Inchi"] = df1["Standard_name"]
d3["Toxicity"] = df1["PUBCHEM_ACTIVITY_OUTCOME"]
d3.reset_index(inplace=True)
d3.drop("index",axis = 1,inplace = True)
for i in range(len(d3)):
    if d3["Inchi"][i] == None:
        print(i)
d3["Toxicity"].value_counts()

Toxicity
0    404
1     92
Name: count, dtype: int64

In [153]:
#Integrating cell morphology with gene expression and chemical structure to aid mitochondrial toxicity detection
#qHTS assay for small molecule disruptors of the mitochondrial membrane potential
#2,000 HepG2 cells in 5 uL/well
#qHTS assay for small molecule disruptors of the mitochondrial membrane potential - cell viability
#2,000 HepG2 cells in 5 uL/well were dispensed into black,CellTiter-Glo reagent was added at 4 uL/well and incubated 30 min at room temperature
#AID 720637
#AID 720635 and AID 720634
#測ATP量以及染色比例取交集
df2 = pd.read_csv("data/go_pre")
d5 = pd.DataFrame()
d5["Inchi"] = df2["Standard_name"]
d5["Toxicity"] = df2["PUBCHEM_ACTIVITY_OUTCOME"]
d5.reset_index(inplace=True)
d5.drop("index",axis = 1,inplace = True)
for i in range(len(d5)):
    if d5["Inchi"][i] == None:
        print(i)
d5["Toxicity"].value_counts()

Toxicity
0    607
1    161
Name: count, dtype: int64

In [154]:
#Using Machine Learning Methods and Structural Alerts for Prediction of Mitochondrial Toxicity
#Training_dataset_31072019.sdf
#AID 720637
#should be 5761
#Zhang, ChemBl, Tox21,PubChem
data = pd.read_csv("data/toxicity2.csv",index_col=0)
data = data.replace({"Activity" :{"Inactive" : 0,"Active" : 1,"Inactive, Active" : -1}})
d1 = pd.DataFrame()
d1["Inchi"] = data["InChICode_standardised"]
d1["Toxicity"] = data["Activity"]
d1.reset_index(inplace=True)
d1.drop("index",axis = 1,inplace = True)
for i in range(len(d1)):
    if d1["Inchi"][i] == None:
        print(i)
d1["Toxicity"].value_counts()

Toxicity
 0    4931
 1     824
-1       1
Name: count, dtype: int64

In [155]:
#Integrating cell morphology with gene expression and chemical structure to aid mitochondrial toxicity detection
#data is from Using Machine Learning Methods and Structural Alerts for Prediction of Mitochondrial Toxicity
#Zhang
data1 = pd.read_csv("data/New_test_244.csv",index_col=0)
d2 = pd.DataFrame()
data1.reset_index(inplace=True)
d2["Inchi"] = data1["InChICode_standardised"]
d2["Toxicity"] = data1["Activity"].astype(int)
d2.reset_index(inplace=True)
d2.drop("index",axis = 1,inplace = True)
for i in range(len(d2)):
    if d2["Inchi"][i] == None:
        print(i)
d2["Toxicity"].value_counts()

Toxicity
0    197
1     47
Name: count, dtype: int64

In [156]:
#In silico prediction of mitochondrial toxicity of chemicals using machine learning methods
#PubChem bioassay database (AID 720635, AID 1347389, AID 720637), 
#AID 1347389:qHTS assay for small molecule disruptors of mitochondrial membrane potential screened against the NCATS CANVASS Library
#2,000 HepG2 cells in 5 uL/well 
#DrugBank (Wishart et al., 2018), and literature (Zhang et al., 2009)
d4 = pd.read_csv("data/silico_pred.csv",index_col=0)
d4["Toxicity"] = d4["Toxicity"].astype(int)
d4.reset_index(inplace=True)
d4.drop("index",axis = 1,inplace = True)
for i in range(len(d4)):
    if d4["Inchi"][i] == None:
        print(i)
d4["Toxicity"].value_counts()

Toxicity
0    1869
1    1537
Name: count, dtype: int64

In [157]:
#Using Machine Learning Methods and Structural Alerts for Prediction of Mitochondrial Toxicity
#Tox21 SR-MMP” (stress response – mitochondrial membrane potential
#From https://tripod.nih.gov/tox21/challenge/data.jsp
suppl = Chem.SDMolSupplier('data/test_dataset_Tox21.sdf')
mol_list = []
inchi_list = []
tox_list = []
d6 = pd.DataFrame()
# 遍歷SDF文件中的化合物
for mol in suppl:
    if mol is None:
        continue

    # 提取InChI
    inchi_list.append(Chem.MolToInchi(mol))
    tox_list.append(mol.GetProp("SR-MMP"))  # 假設毒性值存儲在名為"toxicity"的屬性中
    mol_list.append(mol)
    # 將InChI和毒性值添加到DataFrame
d6["Mol"] = mol_list
d6["Inchi"] = inchi_list
d6["Toxicity"] = np.array(tox_list).astype(float).astype(int)
d6.reset_index(inplace=True)
d6.drop("index",axis = 1,inplace = True)
d6["Toxicity"].value_counts()






































































































































































































































































































Toxicity
0    614
1     92
Name: count, dtype: int64

In [158]:
#combine d1 and d4 into d1(same assay)
#delete the overlap part
num = []
counter1 = []
counter2 = []
for i in range(len(d1)):
    for y in range(len(d4)):
        if(d1["Inchi"][i] == d4["Inchi"][y]):
            num.append(y)
            print(i,":",d1["Inchi"][i]," // ",y,":",d4["Inchi"][y])
            print(i,":",d1["Toxicity"][i]," // ",y,":",d4["Toxicity"][y])
            if d1["Toxicity"][i] != d4["Toxicity"][y]:
                counter1.append(i)
                counter2.append(y)

5 : InChI=1S/C11H17BrN/c1-4-13(2,3)9-10-7-5-6-8-11(10)12/h5-8H,4,9H2,1-3H3/q+1  //  1286 : InChI=1S/C11H17BrN/c1-4-13(2,3)9-10-7-5-6-8-11(10)12/h5-8H,4,9H2,1-3H3/q+1
5 : 0  //  1286 : 0
7 : InChI=1S/C21H29FO5/c1-18-7-5-13(24)9-12(18)3-4-15-14-6-8-20(27,17(26)11-23)19(14,2)10-16(25)21(15,18)22/h9,14-16,23,25,27H,3-8,10-11H2,1-2H3/t14-,15-,16-,18-,19-,20-,21-/m0/s1  //  1753 : InChI=1S/C21H29FO5/c1-18-7-5-13(24)9-12(18)3-4-15-14-6-8-20(27,17(26)11-23)19(14,2)10-16(25)21(15,18)22/h9,14-16,23,25,27H,3-8,10-11H2,1-2H3/t14-,15-,16-,18-,19-,20-,21-/m0/s1
7 : 0  //  1753 : 0
9 : InChI=1S/C23H27N/c1-23(2,3)21-14-12-18(13-15-21)16-24(4)17-20-10-7-9-19-8-5-6-11-22(19)20/h5-15H,16-17H2,1-4H3  //  913 : InChI=1S/C23H27N/c1-23(2,3)21-14-12-18(13-15-21)16-24(4)17-20-10-7-9-19-8-5-6-11-22(19)20/h5-15H,16-17H2,1-4H3
9 : 0  //  913 : 0
14 : InChI=1S/C10H22N4/c11-10(12)13-6-9-14-7-4-2-1-3-5-8-14/h1-9H2,(H4,11,12,13)  //  1538 : InChI=1S/C10H22N4/c11-10(12)13-6-9-14-7-4-2-1-3-5-8-14/h1-9H2,(H4,11,12,13)
1

121 : InChI=1S/C19H28NO3/c1-20(2)13-12-17(14-20)23-18(21)19(22,16-10-6-7-11-16)15-8-4-3-5-9-15/h3-5,8-9,16-17,22H,6-7,10-14H2,1-2H3/q+1  //  1551 : InChI=1S/C19H28NO3/c1-20(2)13-12-17(14-20)23-18(21)19(22,16-10-6-7-11-16)15-8-4-3-5-9-15/h3-5,8-9,16-17,22H,6-7,10-14H2,1-2H3/q+1
121 : 0  //  1551 : 0
125 : InChI=1S/C21H13F3N2O4/c22-21(23,24)12-5-3-6-13(11-12)26-17-16(9-4-10-25-17)19(28)30-20-15-8-2-1-7-14(15)18(27)29-20/h1-11,20H,(H,25,26)  //  2703 : InChI=1S/C21H13F3N2O4/c22-21(23,24)12-5-3-6-13(11-12)26-17-16(9-4-10-25-17)19(28)30-20-15-8-2-1-7-14(15)18(27)29-20/h1-11,20H,(H,25,26)
125 : 1  //  2703 : 1
128 : InChI=1S/C12H8Cl2O2S/c13-7-1-3-9(15)11(5-7)17-12-6-8(14)2-4-10(12)16/h1-6,15-16H  //  1903 : InChI=1S/C12H8Cl2O2S/c13-7-1-3-9(15)11(5-7)17-12-6-8(14)2-4-10(12)16/h1-6,15-16H
128 : 1  //  1903 : 1
130 : InChI=1S/C8H11N7S/c9-7(10)14-12-5-1-3-6(4-2-5)13-15-8(11)16/h1-4,12H,(H2,11,16)(H4,9,10,14)  //  2423 : InChI=1S/C8H11N7S/c9-7(10)14-12-5-1-3-6(4-2-5)13-15-8(11)16/h1-4,12H,(H2,11,

206 : InChI=1S/C17H16Cl2O3/c1-11(2)22-16(20)17(21,12-3-7-14(18)8-4-12)13-5-9-15(19)10-6-13/h3-11,21H,1-2H3  //  2612 : InChI=1S/C17H16Cl2O3/c1-11(2)22-16(20)17(21,12-3-7-14(18)8-4-12)13-5-9-15(19)10-6-13/h3-11,21H,1-2H3
206 : 1  //  2612 : 1
208 : InChI=1S/C7H10N2/c1-5-3-2-4-6(8)7(5)9/h2-4H,8-9H2,1H3  //  3376 : InChI=1S/C7H10N2/c1-5-3-2-4-6(8)7(5)9/h2-4H,8-9H2,1H3
208 : 1  //  3376 : 1
209 : InChI=1S/C14H30N2O4/c1-15(2,3)9-11-19-13(17)7-8-14(18)20-12-10-16(4,5)6/h7-12H2,1-6H3/q+2  //  28 : InChI=1S/C14H30N2O4/c1-15(2,3)9-11-19-13(17)7-8-14(18)20-12-10-16(4,5)6/h7-12H2,1-6H3/q+2
209 : 0  //  28 : 0
214 : InChI=1S/C4H9NO3/c1-2(6)3(5)4(7)8/h2-3,6H,5H2,1H3,(H,7,8)/t2-,3+/m1/s1  //  15 : InChI=1S/C4H9NO3/c1-2(6)3(5)4(7)8/h2-3,6H,5H2,1H3,(H,7,8)/t2-,3+/m1/s1
214 : 0  //  15 : 0
221 : InChI=1S/C8H17NO2/c1-6(2)3-7(5-9)4-8(10)11/h6-7H,3-5,9H2,1-2H3,(H,10,11)/t7-/m0/s1  //  1775 : InChI=1S/C8H17NO2/c1-6(2)3-7(5-9)4-8(10)11/h6-7H,3-5,9H2,1-2H3,(H,10,11)/t7-/m0/s1
221 : 0  //  1775 : 0
222 : InCh

307 : InChI=1S/C16H10O/c17-14-9-7-12-5-4-10-2-1-3-11-6-8-13(14)16(12)15(10)11/h1-9,17H  //  2158 : InChI=1S/C16H10O/c17-14-9-7-12-5-4-10-2-1-3-11-6-8-13(14)16(12)15(10)11/h1-9,17H
307 : 1  //  2158 : 1
309 : InChI=1S/C10H6Cl8/c11-3-1-2-4(5(3)12)9(16)7(14)6(13)8(2,15)10(9,17)18/h2-5H,1H2  //  2508 : InChI=1S/C10H6Cl8/c11-3-1-2-4(5(3)12)9(16)7(14)6(13)8(2,15)10(9,17)18/h2-5H,1H2
309 : 0  //  2508 : 1
311 : InChI=1S/C6H4Cl2N2O2/c7-4-1-3(10(11)12)2-5(8)6(4)9/h1-2H,9H2  //  1946 : InChI=1S/C6H4Cl2N2O2/c7-4-1-3(10(11)12)2-5(8)6(4)9/h1-2H,9H2
311 : 1  //  1946 : 1
317 : InChI=1S/C21H24O2/c1-3-20-11-9-17-16-8-6-15(22)13-14(16)5-7-18(17)19(20)10-12-21(20,23)4-2/h2,9,11,13,18-19,23H,3,5-8,10,12H2,1H3/t18-,19+,20+,21+/m1/s1  //  1683 : InChI=1S/C21H24O2/c1-3-20-11-9-17-16-8-6-15(22)13-14(16)5-7-18(17)19(20)10-12-21(20,23)4-2/h2,9,11,13,18-19,23H,3,5-8,10,12H2,1H3/t18-,19+,20+,21+/m1/s1
317 : 0  //  1683 : 0
320 : InChI=1S/C12H16O7/c13-5-8-9(15)10(16)11(17)12(19-8)18-7-3-1-6(14)2-4-7/h1-4,8-17H,5H

414 : InChI=1S/C6H5NO3/c8-6-3-1-5(2-4-6)7(9)10/h1-4,8H  //  3047 : InChI=1S/C6H5NO3/c8-6-3-1-5(2-4-6)7(9)10/h1-4,8H
414 : 1  //  3047 : 1
420 : InChI=1S/C12H21N/c1-10-3-9-4-11(2,6-10)8-12(13,5-9)7-10/h9H,3-8,13H2,1-2H3  //  1510 : InChI=1S/C12H21N/c1-10-3-9-4-11(2,6-10)8-12(13,5-9)7-10/h9H,3-8,13H2,1-2H3
420 : 0  //  1510 : 0
421 : InChI=1S/C11H16N2O/c1-7-5-4-6-8(2)10(7)13-11(14)9(3)12/h4-6,9H,12H2,1-3H3,(H,13,14)  //  228 : InChI=1S/C11H16N2O/c1-7-5-4-6-8(2)10(7)13-11(14)9(3)12/h4-6,9H,12H2,1-3H3,(H,13,14)
421 : 0  //  228 : 0
435 : InChI=1S/C16H28O/c1-10-14-8-12(16(10,2)3)9-15(14)11-5-4-6-13(17)7-11/h10-15,17H,4-9H2,1-3H3  //  2355 : InChI=1S/C16H28O/c1-10-14-8-12(16(10,2)3)9-15(14)11-5-4-6-13(17)7-11/h10-15,17H,4-9H2,1-3H3
435 : 1  //  2355 : 1
439 : InChI=1S/C6H7ClN2/c7-4-1-2-5(8)6(9)3-4/h1-3H,8-9H2  //  2981 : InChI=1S/C6H7ClN2/c7-4-1-2-5(8)6(9)3-4/h1-3H,8-9H2
439 : 1  //  2981 : 1
441 : InChI=1S/C14H16ClO5PS/c1-4-17-21(22,18-5-2)20-10-6-7-11-9(3)13(15)14(16)19-12(11)8-10/h6-8H,4-

556 : InChI=1S/C12H18O/c1-3-4-5-6-11-8-7-10(2)9-12(11)13/h7-9,13H,3-6H2,1-2H3  //  3119 : InChI=1S/C12H18O/c1-3-4-5-6-11-8-7-10(2)9-12(11)13/h7-9,13H,3-6H2,1-2H3
556 : 1  //  3119 : 1
560 : InChI=1S/C13H19NO2/c1-2-3-4-7-10-16-12-9-6-5-8-11(12)13(14)15/h5-6,8-9H,2-4,7,10H2,1H3,(H2,14,15)  //  2358 : InChI=1S/C13H19NO2/c1-2-3-4-7-10-16-12-9-6-5-8-11(12)13(14)15/h5-6,8-9H,2-4,7,10H2,1H3,(H2,14,15)
560 : 1  //  2358 : 1
562 : InChI=1S/C13H12N4O3/c18-13(15-9-10-2-1-7-14-8-10)16-11-3-5-12(6-4-11)17(19)20/h1-8H,9H2,(H2,15,16,18)  //  1909 : InChI=1S/C13H12N4O3/c18-13(15-9-10-2-1-7-14-8-10)16-11-3-5-12(6-4-11)17(19)20/h1-8H,9H2,(H2,15,16,18)
562 : 1  //  1909 : 1
563 : InChI=1S/C13H11ClN2O2/c1-8-10(14)5-2-6-11(8)16-12-9(13(17)18)4-3-7-15-12/h2-7H,1H3,(H,15,16)(H,17,18)  //  3006 : InChI=1S/C13H11ClN2O2/c1-8-10(14)5-2-6-11(8)16-12-9(13(17)18)4-3-7-15-12/h2-7H,1H3,(H,15,16)(H,17,18)
563 : 1  //  3006 : 1
579 : InChI=1S/C7H15Cl2N2O2P/c8-2-5-11(6-3-9)14(12)10-4-1-7-13-14/h1-7H2,(H,10,12)  //  1024

707 : InChI=1S/C13H19NO4S/c1-3-9-14(10-4-2)19(17,18)12-7-5-11(6-8-12)13(15)16/h5-8H,3-4,9-10H2,1-2H3,(H,15,16)  //  490 : InChI=1S/C13H19NO4S/c1-3-9-14(10-4-2)19(17,18)12-7-5-11(6-8-12)13(15)16/h5-8H,3-4,9-10H2,1-2H3,(H,15,16)
707 : 0  //  490 : 0
708 : InChI=1S/C23H36N2O2/c1-21(2,3)25-20(27)17-8-7-15-14-6-9-18-23(5,13-11-19(26)24-18)16(14)10-12-22(15,17)4/h11,13-18H,6-10,12H2,1-5H3,(H,24,26)(H,25,27)/t14-,15-,16-,17+,18+,22-,23+/m0/s1  //  75 : InChI=1S/C23H36N2O2/c1-21(2,3)25-20(27)17-8-7-15-14-6-9-18-23(5,13-11-19(26)24-18)16(14)10-12-22(15,17)4/h11,13-18H,6-10,12H2,1-5H3,(H,24,26)(H,25,27)/t14-,15-,16-,17+,18+,22-,23+/m0/s1
708 : 0  //  75 : 0
709 : InChI=1S/C12H16F3N/c1-3-16-9(2)7-10-5-4-6-11(8-10)12(13,14)15/h4-6,8-9,16H,3,7H2,1-2H3/t9-/m0/s1  //  690 : InChI=1S/C12H16F3N/c1-3-16-9(2)7-10-5-4-6-11(8-10)12(13,14)15/h4-6,8-9,16H,3,7H2,1-2H3/t9-/m0/s1
709 : 0  //  690 : 0
710 : InChI=1S/C7H6Cl2O/c8-6-2-1-5(4-10)7(9)3-6/h1-3,10H,4H2  //  1265 : InChI=1S/C7H6Cl2O/c8-6-2-1-5(4-10)7(9)3

836 : InChI=1S/C18H26O/c1-11-8-16-15(9-14(11)13(3)19)17(4,5)10-12(2)18(16,6)7/h8-9,12H,10H2,1-7H3  //  2996 : InChI=1S/C18H26O/c1-11-8-16-15(9-14(11)13(3)19)17(4,5)10-12(2)18(16,6)7/h8-9,12H,10H2,1-7H3
836 : 1  //  2996 : 1
837 : InChI=1S/C13H17NO/c1-4-8-13(15)14(5-2)12-10-7-6-9-11(12)3/h4,6-10H,5H2,1-3H3/b8-4+  //  1487 : InChI=1S/C13H17NO/c1-4-8-13(15)14(5-2)12-10-7-6-9-11(12)3/h4,6-10H,5H2,1-3H3/b8-4+
837 : 0  //  1487 : 0
840 : InChI=1S/C18H22O2/c1-18-9-8-14-13-5-3-12(19)10-11(13)2-4-15(14)16(18)6-7-17(18)20/h3,5,10,14-16,19H,2,4,6-9H2,1H3/t14-,15-,16+,18+/m1/s1  //  1522 : InChI=1S/C18H22O2/c1-18-9-8-14-13-5-3-12(19)10-11(13)2-4-15(14)16(18)6-7-17(18)20/h3,5,10,14-16,19H,2,4,6-9H2,1H3/t14-,15-,16+,18+/m1/s1
840 : 1  //  1522 : 0
842 : InChI=1S/C20H41N5O7/c1-20(28)8-29-19(14(27)17(20)25-3)32-16-12(23)6-11(22)15(13(16)26)31-18-10(21)5-4-9(30-18)7-24-2/h9-19,24-28H,4-8,21-23H2,1-3H3/t9-,10+,11-,12+,13-,14+,15+,16-,17+,18+,19+,20-/m0/s1  //  628 : InChI=1S/C20H41N5O7/c1-20(28)8-29-19(

933 : InChI=1S/C14H10O/c15-14-9-10-5-1-2-6-11(10)12-7-3-4-8-13(12)14/h1-9,15H  //  2497 : InChI=1S/C14H10O/c15-14-9-10-5-1-2-6-11(10)12-7-3-4-8-13(12)14/h1-9,15H
933 : 1  //  2497 : 1
939 : InChI=1S/C19H22F2N4O3/c1-8-5-24(6-9(2)23-8)17-13(20)15(22)12-16(14(17)21)25(10-3-4-10)7-11(18(12)26)19(27)28/h7-10,23H,3-6,22H2,1-2H3,(H,27,28)/t8-,9+  //  1575 : InChI=1S/C19H22F2N4O3/c1-8-5-24(6-9(2)23-8)17-13(20)15(22)12-16(14(17)21)25(10-3-4-10)7-11(18(12)26)19(27)28/h7-10,23H,3-6,22H2,1-2H3,(H,27,28)/t8-,9+
939 : 0  //  1575 : 0
940 : InChI=1S/C21H24FN3O4/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(20)24-8-11-3-2-6-23-16(11)10-24/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28)/t11-,16+/m0/s1  //  393 : InChI=1S/C21H24FN3O4/c1-29-20-17-13(19(26)14(21(27)28)9-25(17)12-4-5-12)7-15(22)18(20)24-8-11-3-2-6-23-16(11)10-24/h7,9,11-12,16,23H,2-6,8,10H2,1H3,(H,27,28)/t11-,16+/m0/s1
940 : 0  //  393 : 0
941 : InChI=1S/C27H36N2O4/c1-4-33-25-17-20(12-13-22(25)27(31)32)18-26(30)28-23(16-19(2)3)2

1062 : InChI=1S/C24H26O/c1-23(2,18-11-7-5-8-12-18)20-15-16-22(25)21(17-20)24(3,4)19-13-9-6-10-14-19/h5-17,25H,1-4H3  //  3025 : InChI=1S/C24H26O/c1-23(2,18-11-7-5-8-12-18)20-15-16-22(25)21(17-20)24(3,4)19-13-9-6-10-14-19/h5-17,25H,1-4H3
1062 : 1  //  3025 : 1
1069 : InChI=1S/C10H13NO4S2/c1-7-5-8-3-2-4-16(12,13)10(8)6-9(7)17(11,14)15/h5-6H,2-4H2,1H3,(H2,11,14,15)  //  1806 : InChI=1S/C10H13NO4S2/c1-7-5-8-3-2-4-16(12,13)10(8)6-9(7)17(11,14)15/h5-6H,2-4H2,1H3,(H2,11,14,15)
1069 : 0  //  1806 : 0
1072 : InChI=1S/C6H12N3PS/c11-10(7-1-2-7,8-3-4-8)9-5-6-9/h1-6H2  //  708 : InChI=1S/C6H12N3PS/c11-10(7-1-2-7,8-3-4-8)9-5-6-9/h1-6H2
1072 : 0  //  708 : 0
1073 : InChI=1S/C28H40O7/c1-5-7-24(33)35-28(22(31)16-34-23(32)6-2)13-11-20-19-9-8-17-14-18(29)10-12-26(17,3)25(19)21(30)15-27(20,28)4/h14,19-21,25,30H,5-13,15-16H2,1-4H3/t19-,20-,21-,25+,26-,27-,28-/m0/s1  //  405 : InChI=1S/C28H40O7/c1-5-7-24(33)35-28(22(31)16-34-23(32)6-2)13-11-20-19-9-8-17-14-18(29)10-12-26(17,3)25(19)21(30)15-27(20,28)4/h14,1

1199 : InChI=1S/C11H11N3O2S/c12-9-4-6-10(7-5-9)17(15,16)14-11-3-1-2-8-13-11/h1-8H,12H2,(H,13,14)  //  1314 : InChI=1S/C11H11N3O2S/c12-9-4-6-10(7-5-9)17(15,16)14-11-3-1-2-8-13-11/h1-8H,12H2,(H,13,14)
1199 : 0  //  1314 : 0
1205 : InChI=1S/C6H8N2/c7-5-3-1-2-4-6(5)8/h1-4H,7-8H2  //  2566 : InChI=1S/C6H8N2/c7-5-3-1-2-4-6(5)8/h1-4H,7-8H2
1205 : 1  //  2566 : 1
1208 : InChI=1S/C17H14ClF2IN2O2/c18-12-7-10(21)3-6-14(12)22-16-11(4-5-13(19)15(16)20)17(24)23-25-8-9-1-2-9/h3-7,9,22H,1-2,8H2,(H,23,24)  //  2838 : InChI=1S/C17H14ClF2IN2O2/c18-12-7-10(21)3-6-14(12)22-16-11(4-5-13(19)15(16)20)17(24)23-25-8-9-1-2-9/h3-7,9,22H,1-2,8H2,(H,23,24)
1208 : 1  //  2838 : 1
1209 : InChI=1S/C21H27FO6/c1-18-6-5-12(24)7-11(18)3-4-13-14-8-15(25)21(28,17(27)10-23)19(14,2)9-16(26)20(13,18)22/h5-7,13-16,23,25-26,28H,3-4,8-10H2,1-2H3/t13-,14-,15+,16-,18-,19-,20-,21-/m0/s1  //  721 : InChI=1S/C21H27FO6/c1-18-6-5-12(24)7-11(18)3-4-13-14-8-15(25)21(28,17(27)10-23)19(14,2)9-16(26)20(13,18)22/h5-7,13-16,23,25-26,28H,3-4,8-

1312 : InChI=1S/C9H16ClN3O2/c10-6-7-13(12-15)9(14)11-8-4-2-1-3-5-8/h8H,1-7H2,(H,11,14)  //  1619 : InChI=1S/C9H16ClN3O2/c10-6-7-13(12-15)9(14)11-8-4-2-1-3-5-8/h8H,1-7H2,(H,11,14)
1312 : 0  //  1619 : 0
1317 : InChI=1S/C6H4Cl2O2/c7-3-1-4(8)6(10)2-5(3)9/h1-2,9-10H  //  2746 : InChI=1S/C6H4Cl2O2/c7-3-1-4(8)6(10)2-5(3)9/h1-2,9-10H
1317 : 1  //  2746 : 1
1318 : InChI=1S/C21H21N5O6/c1-15(27)31-11-9-25(10-12-32-16(2)28)19-5-3-18(4-6-19)23-24-21-8-7-20(26(29)30)13-17(21)14-22/h3-8,13H,9-12H2,1-2H3  //  2936 : InChI=1S/C21H21N5O6/c1-15(27)31-11-9-25(10-12-32-16(2)28)19-5-3-18(4-6-19)23-24-21-8-7-20(26(29)30)13-17(21)14-22/h3-8,13H,9-12H2,1-2H3
1318 : 1  //  2936 : 1
1319 : InChI=1S/C14H30O4S/c1-5-7-8-13(6-2)9-10-14(11-12(3)4)18-19(15,16)17/h12-14H,5-11H2,1-4H3,(H,15,16,17)  //  1673 : InChI=1S/C14H30O4S/c1-5-7-8-13(6-2)9-10-14(11-12(3)4)18-19(15,16)17/h12-14H,5-11H2,1-4H3,(H,15,16,17)
1319 : 0  //  1673 : 0
1322 : InChI=1S/C12H6O3/c13-11-8-5-1-3-7-4-2-6-9(10(7)8)12(14)15-11/h1-6H  //  2354 : In

1436 : InChI=1S/C11H19N5S/c1-11(2,3)16-9-13-8(12-7-5-6-7)14-10(15-9)17-4/h7H,5-6H2,1-4H3,(H2,12,13,14,15,16)  //  2463 : InChI=1S/C11H19N5S/c1-11(2,3)16-9-13-8(12-7-5-6-7)14-10(15-9)17-4/h7H,5-6H2,1-4H3,(H2,12,13,14,15,16)
1436 : 1  //  2463 : 1
1441 : InChI=1S/C15H14F3N3O4S2/c16-15(17,18)10-7-11-13(8-12(10)26(19,22)23)27(24,25)21-14(20-11)6-9-4-2-1-3-5-9/h1-5,7-8,14,20-21H,6H2,(H2,19,22,23)  //  686 : InChI=1S/C15H14F3N3O4S2/c16-15(17,18)10-7-11-13(8-12(10)26(19,22)23)27(24,25)21-14(20-11)6-9-4-2-1-3-5-9/h1-5,7-8,14,20-21H,6H2,(H2,19,22,23)
1441 : 0  //  686 : 0
1442 : InChI=1S/C14H18N2O7/c1-5-9(4)11-6-10(15(18)19)7-12(16(20)21)13(11)23-14(17)22-8(2)3/h6-9H,5H2,1-4H3  //  3390 : InChI=1S/C14H18N2O7/c1-5-9(4)11-6-10(15(18)19)7-12(16(20)21)13(11)23-14(17)22-8(2)3/h6-9H,5H2,1-4H3
1442 : 1  //  3390 : 1
1443 : InChI=1S/C36H42N2/c1-37(2,35-31-21-11-7-17-27(31)28-18-8-12-22-32(28)35)25-15-5-6-16-26-38(3,4)36-33-23-13-9-19-29(33)30-20-10-14-24-34(30)36/h7-14,17-24,35-36H,5-6,15-16,25-26H2,1-

1566 : InChI=1S/C21H26Cl2O/c1-20(2,3)13-21(4,5)16-7-9-19(24)15(11-16)10-14-6-8-17(22)12-18(14)23/h6-9,11-12,24H,10,13H2,1-5H3  //  3278 : InChI=1S/C21H26Cl2O/c1-20(2,3)13-21(4,5)16-7-9-19(24)15(11-16)10-14-6-8-17(22)12-18(14)23/h6-9,11-12,24H,10,13H2,1-5H3
1566 : 1  //  3278 : 1
1568 : InChI=1S/C7H7Cl3NO3PS/c1-12-15(16,13-2)14-7-5(9)3-4(8)6(10)11-7/h3H,1-2H3  //  2036 : InChI=1S/C7H7Cl3NO3PS/c1-12-15(16,13-2)14-7-5(9)3-4(8)6(10)11-7/h3H,1-2H3
1568 : 1  //  2036 : 1
1573 : InChI=1S/C22H30N2O5S2/c1-3-29-21(28)17(10-9-16-7-5-4-6-8-16)23-15(2)19(25)24-14-22(30-11-12-31-22)13-18(24)20(26)27/h4-8,15,17-18,23H,3,9-14H2,1-2H3,(H,26,27)/t15-,17-,18-/m0/s1  //  188 : InChI=1S/C22H30N2O5S2/c1-3-29-21(28)17(10-9-16-7-5-4-6-8-16)23-15(2)19(25)24-14-22(30-11-12-31-22)13-18(24)20(26)27/h4-8,15,17-18,23H,3,9-14H2,1-2H3,(H,26,27)/t15-,17-,18-/m0/s1
1573 : 0  //  188 : 0
1577 : InChI=1S/C9H10N2O3/c10-7-3-1-6(2-4-7)9(14)11-5-8(12)13/h1-4H,5,10H2,(H,11,14)(H,12,13)  //  527 : InChI=1S/C9H10N2O3/c10-7-3-1-

1678 : InChI=1S/C8H17NO5/c10-2-1-9-3-6(12)8(14)7(13)5(9)4-11/h5-8,10-14H,1-4H2/t5-,6+,7-,8-/m1/s1  //  744 : InChI=1S/C8H17NO5/c10-2-1-9-3-6(12)8(14)7(13)5(9)4-11/h5-8,10-14H,1-4H2/t5-,6+,7-,8-/m1/s1
1678 : 0  //  744 : 0
1683 : InChI=1S/C13H12Cl2N2/c14-10-6-8(1-3-12(10)16)5-9-2-4-13(17)11(15)7-9/h1-4,6-7H,5,16-17H2  //  3365 : InChI=1S/C13H12Cl2N2/c14-10-6-8(1-3-12(10)16)5-9-2-4-13(17)11(15)7-9/h1-4,6-7H,5,16-17H2
1683 : 0  //  3365 : 1
1691 : InChI=1S/C10H15N5/c11-9(12)15-10(13)14-7-6-8-4-2-1-3-5-8/h1-5H,6-7H2,(H6,11,12,13,14,15)  //  769 : InChI=1S/C10H15N5/c11-9(12)15-10(13)14-7-6-8-4-2-1-3-5-8/h1-5H,6-7H2,(H6,11,12,13,14,15)
1691 : 0  //  769 : 0
1691 : InChI=1S/C10H15N5/c11-9(12)15-10(13)14-7-6-8-4-2-1-3-5-8/h1-5H,6-7H2,(H6,11,12,13,14,15)  //  2870 : InChI=1S/C10H15N5/c11-9(12)15-10(13)14-7-6-8-4-2-1-3-5-8/h1-5H,6-7H2,(H6,11,12,13,14,15)
1691 : 0  //  2870 : 1
1694 : InChI=1S/C14H22O/c1-13(2,3)10-7-8-12(15)11(9-10)14(4,5)6/h7-9,15H,1-6H3  //  2516 : InChI=1S/C14H22O/c1-13(2,3)10

1791 : InChI=1S/C15H22N2O/c1-11-7-6-8-12(2)14(11)16-15(18)13-9-4-5-10-17(13)3/h6-8,13H,4-5,9-10H2,1-3H3,(H,16,18)  //  1293 : InChI=1S/C15H22N2O/c1-11-7-6-8-12(2)14(11)16-15(18)13-9-4-5-10-17(13)3/h6-8,13H,4-5,9-10H2,1-3H3,(H,16,18)
1791 : 0  //  1293 : 0
1792 : InChI=1S/C7H6N2O4/c1-5-2-3-6(8(10)11)7(4-5)9(12)13/h2-4H,1H3  //  3239 : InChI=1S/C7H6N2O4/c1-5-2-3-6(8(10)11)7(4-5)9(12)13/h2-4H,1H3
1792 : 0  //  3239 : 1
1794 : InChI=1S/C9H10N2S/c1-5-3-7-8(4-6(5)2)12-9(10)11-7/h3-4H,1-2H3,(H2,10,11)  //  3381 : InChI=1S/C9H10N2S/c1-5-3-7-8(4-6(5)2)12-9(10)11-7/h3-4H,1-2H3,(H2,10,11)
1794 : 0  //  3381 : 1
1799 : InChI=1S/C17H21NO3/c1-12(17(21)14-4-8-16(20)9-5-14)18-11-10-13-2-6-15(19)7-3-13/h2-9,12,17-21H,10-11H2,1H3/t12-,17-/m0/s1  //  1259 : InChI=1S/C17H21NO3/c1-12(17(21)14-4-8-16(20)9-5-14)18-11-10-13-2-6-15(19)7-3-13/h2-9,12,17-21H,10-11H2,1H3/t12-,17-/m0/s1
1799 : 0  //  1259 : 0
1800 : InChI=1S/C26H29N3O2/c1-27(2)20-11-7-18(8-12-20)26(19-9-13-21(14-10-19)28(3)4)24-16-15-22(29(5)6)17-

1924 : InChI=1S/C20H30O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18-19-20(21)22/h3-4,6-7,9-10,12-13,15-16H,2,5,8,11,14,17-19H2,1H3,(H,21,22)/b4-3-,7-6-,10-9-,13-12-,16-15-  //  1408 : InChI=1S/C20H30O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18-19-20(21)22/h3-4,6-7,9-10,12-13,15-16H,2,5,8,11,14,17-19H2,1H3,(H,21,22)/b4-3-,7-6-,10-9-,13-12-,16-15-
1924 : 0  //  1408 : 0
1928 : InChI=1S/C11H7NS/c13-8-12-11-7-3-5-9-4-1-2-6-10(9)11/h1-7H  //  2164 : InChI=1S/C11H7NS/c13-8-12-11-7-3-5-9-4-1-2-6-10(9)11/h1-7H
1928 : 0  //  2164 : 1
1930 : InChI=1S/C15H12O3/c1-8-5-10-7-9-3-2-4-11(16)13(9)15(18)14(10)12(17)6-8/h2-7,16-18H,1H3  //  3396 : InChI=1S/C15H12O3/c1-8-5-10-7-9-3-2-4-11(16)13(9)15(18)14(10)12(17)6-8/h2-7,16-18H,1H3
1930 : 1  //  3396 : 1
1936 : InChI=1S/C7H6ClN3O4S2/c8-4-1-5-7(2-6(4)16(9,12)13)17(14,15)11-3-10-5/h1-3H,(H,10,11)(H2,9,12,13)  //  936 : InChI=1S/C7H6ClN3O4S2/c8-4-1-5-7(2-6(4)16(9,12)13)17(14,15)11-3-10-5/h1-3H,(H,10,11)(H2,9,12,13)
1936 : 0  //  936 : 0
1938 : InChI

2014 : InChI=1S/C16H12O/c1-11(17)14-9-8-13-7-6-12-4-2-3-5-15(12)16(13)10-14/h2-10H,1H3  //  2018 : InChI=1S/C16H12O/c1-11(17)14-9-8-13-7-6-12-4-2-3-5-15(12)16(13)10-14/h2-10H,1H3
2014 : 1  //  2018 : 1
2015 : InChI=1S/C28H44O3/c1-3-4-5-6-7-8-9-10-27(30)31-26-16-15-25-24-13-11-20-19-21(29)12-14-22(20)23(24)17-18-28(25,26)2/h19,22-26H,3-18H2,1-2H3/t22-,23+,24+,25-,26-,28-/m0/s1  //  128 : InChI=1S/C28H44O3/c1-3-4-5-6-7-8-9-10-27(30)31-26-16-15-25-24-13-11-20-19-21(29)12-14-22(20)23(24)17-18-28(25,26)2/h19,22-26H,3-18H2,1-2H3/t22-,23+,24+,25-,26-,28-/m0/s1
2015 : 0  //  128 : 0
2022 : InChI=1S/C10H11N3O3S/c1-7-6-10(12-16-7)13-17(14,15)9-4-2-8(11)3-5-9/h2-6H,11H2,1H3,(H,12,13)  //  475 : InChI=1S/C10H11N3O3S/c1-7-6-10(12-16-7)13-17(14,15)9-4-2-8(11)3-5-9/h2-6H,11H2,1H3,(H,12,13)
2022 : 0  //  475 : 0
2023 : InChI=1S/C12H18N2O3S/c1-3-4-9-13-12(15)14-18(16,17)11-7-5-10(2)6-8-11/h5-8H,3-4,9H2,1-2H3,(H2,13,14,15)  //  1706 : InChI=1S/C12H18N2O3S/c1-3-4-9-13-12(15)14-18(16,17)11-7-5-10(2)6-8-11

2124 : InChI=1S/C3H2N2S2/c4-1-6-3-7-2-5/h3H2  //  3064 : InChI=1S/C3H2N2S2/c4-1-6-3-7-2-5/h3H2
2124 : 0  //  3064 : 1
2125 : InChI=1S/C11H17NO3/c1-7(2)12-6-11(15)8-3-4-9(13)10(14)5-8/h3-5,7,11-15H,6H2,1-2H3  //  3015 : InChI=1S/C11H17NO3/c1-7(2)12-6-11(15)8-3-4-9(13)10(14)5-8/h3-5,7,11-15H,6H2,1-2H3
2125 : 0  //  3015 : 1
2127 : InChI=1S/C10H19O6PS2/c1-5-15-9(11)7-8(10(12)16-6-2)19-17(18,13-3)14-4/h8H,5-7H2,1-4H3  //  0 : InChI=1S/C10H19O6PS2/c1-5-15-9(11)7-8(10(12)16-6-2)19-17(18,13-3)14-4/h8H,5-7H2,1-4H3
2127 : 0  //  0 : 0
2128 : InChI=1S/C14H20N2O/c1-11-7-5-6-10-13(11)16-14(17)15-12-8-3-2-4-9-12/h2-4,8-9,11,13H,5-7,10H2,1H3,(H2,15,16,17)  //  2478 : InChI=1S/C14H20N2O/c1-11-7-5-6-10-13(11)16-14(17)15-12-8-3-2-4-9-12/h2-4,8-9,11,13H,5-7,10H2,1H3,(H2,15,16,17)
2128 : 1  //  2478 : 1
2131 : InChI=1S/C23H16O3/c24-21-17-13-7-8-14-18(17)22(25)20(21)23(26)19(15-9-3-1-4-10-15)16-11-5-2-6-12-16/h1-14,19-20H  //  2215 : InChI=1S/C23H16O3/c24-21-17-13-7-8-14-18(17)22(25)20(21)23(26)19(15-9-3-

2251 : InChI=1S/C9H12O/c1-2-3-8-4-6-9(10)7-5-8/h4-7,10H,2-3H2,1H3  //  3288 : InChI=1S/C9H12O/c1-2-3-8-4-6-9(10)7-5-8/h4-7,10H,2-3H2,1H3
2251 : 1  //  3288 : 1
2264 : InChI=1S/C7H10N2OS/c1-2-3-5-4-6(10)9-7(11)8-5/h4H,2-3H2,1H3,(H2,8,9,10,11)  //  1035 : InChI=1S/C7H10N2OS/c1-2-3-5-4-6(10)9-7(11)8-5/h4H,2-3H2,1H3,(H2,8,9,10,11)
2264 : 0  //  1035 : 0
2265 : InChI=1S/C12H20N2S4/c15-11(13-7-3-1-4-8-13)17-18-12(16)14-9-5-2-6-10-14/h1-10H2  //  1563 : InChI=1S/C12H20N2S4/c15-11(13-7-3-1-4-8-13)17-18-12(16)14-9-5-2-6-10-14/h1-10H2
2265 : 0  //  1563 : 0
2266 : InChI=1S/C13H20O/c1-2-3-4-5-6-7-12-8-10-13(14)11-9-12/h8-11,14H,2-7H2,1H3  //  2110 : InChI=1S/C13H20O/c1-2-3-4-5-6-7-12-8-10-13(14)11-9-12/h8-11,14H,2-7H2,1H3
2266 : 1  //  2110 : 1
2268 : InChI=1S/C12H15ClO3/c1-4-15-11(14)12(2,3)16-10-7-5-9(13)6-8-10/h5-8H,4H2,1-3H3  //  2767 : InChI=1S/C12H15ClO3/c1-4-15-11(14)12(2,3)16-10-7-5-9(13)6-8-10/h5-8H,4H2,1-3H3
2268 : 0  //  2767 : 1
2279 : InChI=1S/C7H11N3O3/c1-5(11)4-9-6(2)8-3-7(9)10(12)

2409 : InChI=1S/C17H22N4O/c1-14-13-16(15-5-3-2-4-6-15)19-20-17(14)18-7-8-21-9-11-22-12-10-21/h2-6,13H,7-12H2,1H3,(H,18,20)  //  1794 : InChI=1S/C17H22N4O/c1-14-13-16(15-5-3-2-4-6-15)19-20-17(14)18-7-8-21-9-11-22-12-10-21/h2-6,13H,7-12H2,1H3,(H,18,20)
2409 : 0  //  1794 : 0
2412 : InChI=1S/C14H27N2/c1-3-4-5-6-7-8-9-10-11-16-13-12-15(2)14-16/h12-14H,3-11H2,1-2H3/q+1  //  2184 : InChI=1S/C14H27N2/c1-3-4-5-6-7-8-9-10-11-16-13-12-15(2)14-16/h12-14H,3-11H2,1-2H3/q+1
2412 : 1  //  2184 : 1
2413 : InChI=1S/C9H8Cl3NO2S/c10-9(11,12)16-13-7(14)5-3-1-2-4-6(5)8(13)15/h1-2,5-6H,3-4H2  //  3118 : InChI=1S/C9H8Cl3NO2S/c10-9(11,12)16-13-7(14)5-3-1-2-4-6(5)8(13)15/h1-2,5-6H,3-4H2
2413 : 1  //  3118 : 1
2422 : InChI=1S/C6H12N2O4S2/c7-3(5(9)10)1-13-14-2-4(8)6(11)12/h3-4H,1-2,7-8H2,(H,9,10)(H,11,12)/t3-,4-/m0/s1  //  1174 : InChI=1S/C6H12N2O4S2/c7-3(5(9)10)1-13-14-2-4(8)6(11)12/h3-4H,1-2,7-8H2,(H,9,10)(H,11,12)/t3-,4-/m0/s1
2422 : 0  //  1174 : 0
2428 : InChI=1S/C13H11NO3/c15-10-7-5-9(6-8-10)14-13(17)11-3-

2578 : InChI=1S/C24H32O4S/c1-14(25)29-19-13-15-12-16(26)4-8-22(15,2)17-5-9-23(3)18(21(17)19)6-10-24(23)11-7-20(27)28-24/h12,17-19,21H,4-11,13H2,1-3H3/t17-,18-,19+,21+,22-,23-,24+/m0/s1  //  488 : InChI=1S/C24H32O4S/c1-14(25)29-19-13-15-12-16(26)4-8-22(15,2)17-5-9-23(3)18(21(17)19)6-10-24(23)11-7-20(27)28-24/h12,17-19,21H,4-11,13H2,1-3H3/t17-,18-,19+,21+,22-,23-,24+/m0/s1
2578 : 0  //  488 : 0
2579 : InChI=1S/C8H11NO3/c1-5-8(12)7(4-11)6(3-10)2-9-5/h2,10-12H,3-4H2,1H3  //  1370 : InChI=1S/C8H11NO3/c1-5-8(12)7(4-11)6(3-10)2-9-5/h2,10-12H,3-4H2,1H3
2579 : 0  //  1370 : 0
2580 : InChI=1S/C14H26O2/c1-11(2)9-13(5,15)7-8-14(6,16)10-12(3)4/h11-12,15-16H,9-10H2,1-6H3  //  3159 : InChI=1S/C14H26O2/c1-11(2)9-13(5,15)7-8-14(6,16)10-12(3)4/h11-12,15-16H,9-10H2,1-6H3
2580 : 1  //  3159 : 1
2581 : InChI=1S/C6H5N3O4/c7-5-2-1-4(8(10)11)3-6(5)9(12)13/h1-3H,7H2  //  3277 : InChI=1S/C6H5N3O4/c7-5-2-1-4(8(10)11)3-6(5)9(12)13/h1-3H,7H2
2581 : 1  //  3277 : 1
2593 : InChI=1S/C12H18O/c1-5-9-6-7-11(13)10(8-9)12

2691 : InChI=1S/C21H16ClF3N4O3/c1-26-19(30)18-11-15(8-9-27-18)32-14-5-2-12(3-6-14)28-20(31)29-13-4-7-17(22)16(10-13)21(23,24)25/h2-11H,1H3,(H,26,30)(H2,28,29,31)  //  2070 : InChI=1S/C21H16ClF3N4O3/c1-26-19(30)18-11-15(8-9-27-18)32-14-5-2-12(3-6-14)28-20(31)29-13-4-7-17(22)16(10-13)21(23,24)25/h2-11H,1H3,(H,26,30)(H2,28,29,31)
2691 : 1  //  2070 : 1
2693 : InChI=1S/C12H6Cl4O2S/c13-7-1-3-8(4-2-7)19(17,18)12-6-10(15)9(14)5-11(12)16/h1-6H  //  2372 : InChI=1S/C12H6Cl4O2S/c13-7-1-3-8(4-2-7)19(17,18)12-6-10(15)9(14)5-11(12)16/h1-6H
2693 : 1  //  2372 : 1
2697 : InChI=1S/C20H17FO3S/c1-12-17(9-13-3-6-15(7-4-13)25(2)24)16-8-5-14(21)10-19(16)18(12)11-20(22)23/h3-10H,11H2,1-2H3,(H,22,23)/b17-9-  //  2485 : InChI=1S/C20H17FO3S/c1-12-17(9-13-3-6-15(7-4-13)25(2)24)16-8-5-14(21)10-19(16)18(12)11-20(22)23/h3-10H,11H2,1-2H3,(H,22,23)/b17-9-
2697 : 0  //  2485 : 1
2703 : InChI=1S/C14H14N8O4S3/c1-6-17-18-14(29-6)28-4-7-3-27-12-9(11(24)22(12)10(7)13(25)26)16-8(23)2-21-5-15-19-20-21/h5,9,12H,2-4H2,1H3,(H,

2809 : InChI=1S/C21H27FO5/c1-19-5-3-11(24)7-14(19)15(22)8-12-13-4-6-21(27,17(26)10-23)20(13,2)9-16(25)18(12)19/h3,5,7,12-13,15-16,18,23,25,27H,4,6,8-10H2,1-2H3/t12-,13-,15-,16-,18+,19-,20-,21-/m0/s1  //  1180 : InChI=1S/C21H27FO5/c1-19-5-3-11(24)7-14(19)15(22)8-12-13-4-6-21(27,17(26)10-23)20(13,2)9-16(25)18(12)19/h3,5,7,12-13,15-16,18,23,25,27H,4,6,8-10H2,1-2H3/t12-,13-,15-,16-,18+,19-,20-,21-/m0/s1
2809 : 0  //  1180 : 0
2820 : InChI=1S/C24H28O2/c1-15-13-20-21(24(5,6)12-11-23(20,3)4)14-19(15)16(2)17-7-9-18(10-8-17)22(25)26/h7-10,13-14H,2,11-12H2,1,3-6H3,(H,25,26)  //  2327 : InChI=1S/C24H28O2/c1-15-13-20-21(24(5,6)12-11-23(20,3)4)14-19(15)16(2)17-7-9-18(10-8-17)22(25)26/h7-10,13-14H,2,11-12H2,1,3-6H3,(H,25,26)
2820 : 1  //  2327 : 1
2825 : InChI=1S/C18H13Cl2NO/c19-16-7-3-13(4-8-16)18(22,15-2-1-11-21-12-15)14-5-9-17(20)10-6-14/h1-12,22H  //  2145 : InChI=1S/C18H13Cl2NO/c19-16-7-3-13(4-8-16)18(22,15-2-1-11-21-12-15)14-5-9-17(20)10-6-14/h1-12,22H
2825 : 1  //  2145 : 1
2829 : InChI=1S/C3

2930 : InChI=1S/C18H37N5O9/c19-3-9-8(25)2-7(22)17(29-9)31-15-5(20)1-6(21)16(14(15)28)32-18-13(27)11(23)12(26)10(4-24)30-18/h5-18,24-28H,1-4,19-23H2/t5-,6+,7+,8-,9+,10+,11-,12+,13+,14-,15+,16-,17+,18+/m0/s1  //  944 : InChI=1S/C18H37N5O9/c19-3-9-8(25)2-7(22)17(29-9)31-15-5(20)1-6(21)16(14(15)28)32-18-13(27)11(23)12(26)10(4-24)30-18/h5-18,24-28H,1-4,19-23H2/t5-,6+,7+,8-,9+,10+,11-,12+,13+,14-,15+,16-,17+,18+/m0/s1
2930 : 0  //  944 : 0
2934 : InChI=1S/C13H9Cl3O2/c1-17-13-7-9(15)3-5-12(13)18-11-4-2-8(14)6-10(11)16/h2-7H,1H3  //  2857 : InChI=1S/C13H9Cl3O2/c1-17-13-7-9(15)3-5-12(13)18-11-4-2-8(14)6-10(11)16/h2-7H,1H3
2934 : 1  //  2857 : 1
2935 : InChI=1S/C15H24O/c1-10-8-11(14(2,3)4)13(16)12(9-10)15(5,6)7/h8-9,16H,1-7H3  //  3241 : InChI=1S/C15H24O/c1-10-8-11(14(2,3)4)13(16)12(9-10)15(5,6)7/h8-9,16H,1-7H3
2935 : 0  //  3241 : 1
2937 : InChI=1S/C23H25N/c1-19(20-11-5-2-6-12-20)24-18-17-23(21-13-7-3-8-14-21)22-15-9-4-10-16-22/h2-16,19,23-24H,17-18H2,1H3  //  3083 : InChI=1S/C23H25N/c1-19(20-1

3060 : InChI=1S/C47H74O19/c1-20-41(64-36-16-30(50)42(21(2)60-36)65-37-17-31(51)43(22(3)61-37)66-44-40(56)39(55)38(54)32(18-48)63-44)29(49)15-35(59-20)62-25-8-10-45(4)24(13-25)6-7-27-28(45)14-33(52)46(5)26(9-11-47(27,46)57)23-12-34(53)58-19-23/h12,20-22,24-33,35-44,48-52,54-57H,6-11,13-19H2,1-5H3/t20-,21-,22-,24-,25+,26-,27-,28+,29+,30+,31+,32-,33-,35+,36+,37+,38-,39+,40-,41-,42-,43-,44+,45+,46+,47+/m1/s1  //  919 : InChI=1S/C47H74O19/c1-20-41(64-36-16-30(50)42(21(2)60-36)65-37-17-31(51)43(22(3)61-37)66-44-40(56)39(55)38(54)32(18-48)63-44)29(49)15-35(59-20)62-25-8-10-45(4)24(13-25)6-7-27-28(45)14-33(52)46(5)26(9-11-47(27,46)57)23-12-34(53)58-19-23/h12,20-22,24-33,35-44,48-52,54-57H,6-11,13-19H2,1-5H3/t20-,21-,22-,24-,25+,26-,27-,28+,29+,30+,31+,32-,33-,35+,36+,37+,38-,39+,40-,41-,42-,43-,44+,45+,46+,47+/m1/s1
3060 : 1  //  919 : 0
3064 : InChI=1S/C10H12O2/c11-10(12)8-4-7-9-5-2-1-3-6-9/h1-3,5-6H,4,7-8H2,(H,11,12)  //  135 : InChI=1S/C10H12O2/c11-10(12)8-4-7-9-5-2-1-3-6-9/h1-3,5-6H,4,7-8H

3154 : InChI=1S/CHI3/c2-1(3)4/h1H  //  779 : InChI=1S/CHI3/c2-1(3)4/h1H
3154 : 0  //  779 : 0
3159 : InChI=1S/C12H18O/c1-8(2)10-6-5-7-11(9(3)4)12(10)13/h5-9,13H,1-4H3  //  2409 : InChI=1S/C12H18O/c1-8(2)10-6-5-7-11(9(3)4)12(10)13/h5-9,13H,1-4H3
3159 : 0  //  2409 : 1
3161 : InChI=1S/C24H21F2NO3/c25-17-5-1-15(2-6-17)22(29)14-13-21-23(16-3-11-20(28)12-4-16)27(24(21)30)19-9-7-18(26)8-10-19/h1-12,21-23,28-29H,13-14H2/t21-,22+,23-/m1/s1  //  1515 : InChI=1S/C24H21F2NO3/c25-17-5-1-15(2-6-17)22(29)14-13-21-23(16-3-11-20(28)12-4-16)27(24(21)30)19-9-7-18(26)8-10-19/h1-12,21-23,28-29H,13-14H2/t21-,22+,23-/m1/s1
3161 : 1  //  1515 : 0
3165 : InChI=1S/C18H18N6O5S2/c1-23-18(20-21-22-23)31-8-10-7-30-16-11(15(27)24(16)12(10)17(28)29)19-14(26)13(25)9-5-3-2-4-6-9/h2-6,11,13,16,25H,7-8H2,1H3,(H,19,26)(H,28,29)/t11-,13-,16-/m1/s1  //  212 : InChI=1S/C18H18N6O5S2/c1-23-18(20-21-22-23)31-8-10-7-30-16-11(15(27)24(16)12(10)17(28)29)19-14(26)13(25)9-5-3-2-4-6-9/h2-6,11,13,16,25H,7-8H2,1H3,(H,19,26)(H,28,29)/t

3265 : InChI=1S/C30H23BrO4/c31-23-16-14-20(15-17-23)19-10-12-22(13-11-19)26(32)18-25(21-6-2-1-3-7-21)28-29(33)24-8-4-5-9-27(24)35-30(28)34/h1-17,25-26,32-33H,18H2  //  2093 : InChI=1S/C30H23BrO4/c31-23-16-14-20(15-17-23)19-10-12-22(13-11-19)26(32)18-25(21-6-2-1-3-7-21)28-29(33)24-8-4-5-9-27(24)35-30(28)34/h1-17,25-26,32-33H,18H2
3265 : 1  //  2093 : 1
3266 : InChI=1S/C19H23NO/c1-20-14-12-18(13-15-20)21-19(16-8-4-2-5-9-16)17-10-6-3-7-11-17/h2-11,18-19H,12-15H2,1H3  //  316 : InChI=1S/C19H23NO/c1-20-14-12-18(13-15-20)21-19(16-8-4-2-5-9-16)17-10-6-3-7-11-17/h2-11,18-19H,12-15H2,1H3
3266 : 0  //  316 : 0
3267 : InChI=1S/C13H13N3/c14-13(15-11-7-3-1-4-8-11)16-12-9-5-2-6-10-12/h1-10H,(H3,14,15,16)  //  2530 : InChI=1S/C13H13N3/c14-13(15-11-7-3-1-4-8-11)16-12-9-5-2-6-10-12/h1-10H,(H3,14,15,16)
3267 : 0  //  2530 : 1
3268 : InChI=1S/C35H62N4O4/c1-24(40)42-32-21-26-9-10-27-28(35(26,4)23-31(32)37-15-19-39(7,8)20-16-37)11-12-34(3)29(27)22-30(33(34)43-25(2)41)36-13-17-38(5,6)18-14-36/h26-33H,9-23H2

3350 : InChI=1S/C19H22ClN5O/c20-16-5-3-6-17(15-16)23-13-11-22(12-14-23)8-4-10-25-19(26)24-9-2-1-7-18(24)21-25/h1-3,5-7,9,15H,4,8,10-14H2  //  1483 : InChI=1S/C19H22ClN5O/c20-16-5-3-6-17(15-16)23-13-11-22(12-14-23)8-4-10-25-19(26)24-9-2-1-7-18(24)21-25/h1-3,5-7,9,15H,4,8,10-14H2
3350 : 0  //  1483 : 0
3353 : InChI=1S/C6H10O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-5,7-10H,1H2/t2-,3-,4+,5-/m1/s1  //  60 : InChI=1S/C6H10O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-5,7-10H,1H2/t2-,3-,4+,5-/m1/s1
3353 : 0  //  60 : 0
3354 : InChI=1S/C24H19NO5/c1-15(26)29-19-11-7-17(8-12-19)24(18-9-13-20(14-10-18)30-16(2)27)21-5-3-4-6-22(21)25-23(24)28/h3-14H,1-2H3,(H,25,28)  //  3280 : InChI=1S/C24H19NO5/c1-15(26)29-19-11-7-17(8-12-19)24(18-9-13-20(14-10-18)30-16(2)27)21-5-3-4-6-22(21)25-23(24)28/h3-14H,1-2H3,(H,25,28)
3354 : 1  //  3280 : 1
3361 : InChI=1S/C22H28O5/c1-12-8-16-15-5-4-13-9-14(24)6-7-20(13,2)19(15)17(25)10-21(16,3)22(12,27)18(26)11-23/h6-7,9,12,15-16,19,23,27H,4-5,8,10-11H2,1-3H3/t12-,15-,16-,19+,20-,21-,22-

3458 : InChI=1S/C4H10O2/c1-4(6)2-3-5/h4-6H,2-3H2,1H3  //  648 : InChI=1S/C4H10O2/c1-4(6)2-3-5/h4-6H,2-3H2,1H3
3458 : 0  //  648 : 0
3463 : InChI=1S/C18H20N2O6/c1-5-26-18(22)15-11(3)19-10(2)14(17(21)25-4)16(15)12-7-6-8-13(9-12)20(23)24/h6-9,16,19H,5H2,1-4H3  //  1937 : InChI=1S/C18H20N2O6/c1-5-26-18(22)15-11(3)19-10(2)14(17(21)25-4)16(15)12-7-6-8-13(9-12)20(23)24/h6-9,16,19H,5H2,1-4H3
3463 : 1  //  1937 : 1
3467 : InChI=1S/C13H10O/c14-11-6-5-10-7-9-3-1-2-4-12(9)13(10)8-11/h1-6,8,14H,7H2  //  2428 : InChI=1S/C13H10O/c14-11-6-5-10-7-9-3-1-2-4-12(9)13(10)8-11/h1-6,8,14H,7H2
3467 : 1  //  2428 : 1
3468 : InChI=1S/C14H14O3/c1-8(2)7-11(15)12-13(16)9-5-3-4-6-10(9)14(12)17/h3-6,8,12H,7H2,1-2H3  //  2392 : InChI=1S/C14H14O3/c1-8(2)7-11(15)12-13(16)9-5-3-4-6-10(9)14(12)17/h3-6,8,12H,7H2,1-2H3
3468 : 1  //  2392 : 1
3471 : InChI=1S/C5H9NO3S/c1-3(7)6-4(2-10)5(8)9/h4,10H,2H2,1H3,(H,6,7)(H,8,9)/t4-/m0/s1  //  3351 : InChI=1S/C5H9NO3S/c1-3(7)6-4(2-10)5(8)9/h4,10H,2H2,1H3,(H,6,7)(H,8,9)/t4-/m0/s1
3471 

3581 : InChI=1S/C18H30O3S/c1-3-5-6-7-8-9-10-11-16(4-2)17-12-14-18(15-13-17)22(19,20)21/h12-16H,3-11H2,1-2H3,(H,19,20,21)  //  2331 : InChI=1S/C18H30O3S/c1-3-5-6-7-8-9-10-11-16(4-2)17-12-14-18(15-13-17)22(19,20)21/h12-16H,3-11H2,1-2H3,(H,19,20,21)
3581 : 0  //  2331 : 1
3584 : InChI=1S/C12H14N2O5/c15-12-10(8-4-2-1-3-5-8)6-9(13(16)17)7-11(12)14(18)19/h6-8,15H,1-5H2  //  1984 : InChI=1S/C12H14N2O5/c15-12-10(8-4-2-1-3-5-8)6-9(13(16)17)7-11(12)14(18)19/h6-8,15H,1-5H2
3584 : 1  //  1984 : 1
3587 : InChI=1S/C7H5NS/c9-6-8-7-4-2-1-3-5-7/h1-5H  //  2494 : InChI=1S/C7H5NS/c9-6-8-7-4-2-1-3-5-7/h1-5H
3587 : 1  //  2494 : 1
3599 : InChI=1S/C21H22N3OS2/c1-4-23-13-9-8-10-15(23)14-18-24(5-2)20(25)19(27-18)21-22(3)16-11-6-7-12-17(16)26-21/h6-14H,4-5H2,1-3H3/q+1/b21-19+  //  2115 : InChI=1S/C21H22N3OS2/c1-4-23-13-9-8-10-15(23)14-18-24(5-2)20(25)19(27-18)21-22(3)16-11-6-7-12-17(16)26-21/h6-14H,4-5H2,1-3H3/q+1/b21-19+
3599 : 1  //  2115 : 1
3603 : InChI=1S/C26H28N3/c1-19-17-21(20(2)29(19)24-9-7-6-8-10-24)1

3727 : InChI=1S/C18H22ClNO/c1-16(15-21-18-10-6-3-7-11-18)20(13-12-19)14-17-8-4-2-5-9-17/h2-11,16H,12-15H2,1H3  //  2054 : InChI=1S/C18H22ClNO/c1-16(15-21-18-10-6-3-7-11-18)20(13-12-19)14-17-8-4-2-5-9-17/h2-11,16H,12-15H2,1H3
3727 : 0  //  2054 : 1
3740 : InChI=1S/C6H4Cl2O/c7-4-1-2-5(8)6(9)3-4/h1-3,9H  //  2804 : InChI=1S/C6H4Cl2O/c7-4-1-2-5(8)6(9)3-4/h1-3,9H
3740 : 0  //  2804 : 1
3741 : InChI=1S/C23H27FN4O2/c1-15-18(23(29)28-10-3-2-4-21(28)25-15)9-13-27-11-7-16(8-12-27)22-19-6-5-17(24)14-20(19)30-26-22/h5-6,14,16H,2-4,7-13H2,1H3  //  2813 : InChI=1S/C23H27FN4O2/c1-15-18(23(29)28-10-3-2-4-21(28)25-15)9-13-27-11-7-16(8-12-27)22-19-6-5-17(24)14-20(19)30-26-22/h5-6,14,16H,2-4,7-13H2,1H3
3741 : 0  //  2813 : 1
3745 : InChI=1S/C6H7N3O2/c7-5-2-1-4(9(10)11)3-6(5)8/h1-3H,7-8H2  //  3202 : InChI=1S/C6H7N3O2/c7-5-2-1-4(9(10)11)3-6(5)8/h1-3H,7-8H2
3745 : 0  //  3202 : 1
3746 : InChI=1S/C21H21ClN2O/c1-4-14(2)24(3)21(25)19-13-15-9-5-6-10-16(15)20(23-19)17-11-7-8-12-18(17)22/h5-14H,4H2,1-3H3  //  25

3839 : InChI=1S/C21H23BrFNO2/c22-18-7-5-17(6-8-18)21(26)11-14-24(15-12-21)13-1-2-20(25)16-3-9-19(23)10-4-16/h3-10,26H,1-2,11-15H2  //  245 : InChI=1S/C21H23BrFNO2/c22-18-7-5-17(6-8-18)21(26)11-14-24(15-12-21)13-1-2-20(25)16-3-9-19(23)10-4-16/h3-10,26H,1-2,11-15H2
3839 : 0  //  245 : 0
3841 : InChI=1S/C10H13ClN2O3S/c1-2-7-12-10(14)13-17(15,16)9-5-3-8(11)4-6-9/h3-6H,2,7H2,1H3,(H2,12,13,14)  //  1054 : InChI=1S/C10H13ClN2O3S/c1-2-7-12-10(14)13-17(15,16)9-5-3-8(11)4-6-9/h3-6H,2,7H2,1H3,(H2,12,13,14)
3841 : 0  //  1054 : 0
3842 : InChI=1S/C15H10/c1-3-10-7-8-11-4-2-6-13-9-12(5-1)14(10)15(11)13/h1-8H,9H2  //  2988 : InChI=1S/C15H10/c1-3-10-7-8-11-4-2-6-13-9-12(5-1)14(10)15(11)13/h1-8H,9H2
3842 : 0  //  2988 : 1
3843 : InChI=1S/C21H31N3O5/c22-13-5-4-9-16(19(25)24-14-6-10-18(24)21(28)29)23-17(20(26)27)12-11-15-7-2-1-3-8-15/h1-3,7-8,16-18,23H,4-6,9-14,22H2,(H,26,27)(H,28,29)/t16-,17-,18-/m0/s1  //  1191 : InChI=1S/C21H31N3O5/c22-13-5-4-9-16(19(25)24-14-6-10-18(24)21(28)29)23-17(20(26)27)12-11-15

3928 : InChI=1S/C6H2Cl4O/c7-2-1-3(11)5(9)6(10)4(2)8/h1,11H  //  3234 : InChI=1S/C6H2Cl4O/c7-2-1-3(11)5(9)6(10)4(2)8/h1,11H
3928 : 1  //  3234 : 1
3932 : InChI=1S/C12H13N/c1-2-9-13-12-8-7-10-5-3-4-6-11(10)12/h1,3-6,12-13H,7-9H2/t12-/m1/s1  //  1588 : InChI=1S/C12H13N/c1-2-9-13-12-8-7-10-5-3-4-6-11(10)12/h1,3-6,12-13H,7-9H2/t12-/m1/s1
3932 : 0  //  1588 : 0
3936 : InChI=1S/C23H25N5O5/c1-30-18-11-14-15(12-19(18)31-2)25-23(26-21(14)24)28-9-7-27(8-10-28)22(29)20-13-32-16-5-3-4-6-17(16)33-20/h3-6,11-12,20H,7-10,13H2,1-2H3,(H2,24,25,26)  //  1740 : InChI=1S/C23H25N5O5/c1-30-18-11-14-15(12-19(18)31-2)25-23(26-21(14)24)28-9-7-27(8-10-28)22(29)20-13-32-16-5-3-4-6-17(16)33-20/h3-6,11-12,20H,7-10,13H2,1-2H3,(H2,24,25,26)
3936 : 0  //  1740 : 0
3937 : InChI=1S/C10H19N2/c1-3-4-5-6-7-12-9-8-11(2)10-12/h8-10H,3-7H2,1-2H3/q+1  //  2951 : InChI=1S/C10H19N2/c1-3-4-5-6-7-12-9-8-11(2)10-12/h8-10H,3-7H2,1-2H3/q+1
3937 : 0  //  2951 : 1
3939 : InChI=1S/C19H20Br4O4/c1-19(2,11-7-13(20)17(14(21)8-11)26-5-3-24)1

4064 : InChI=1S/C13H10O2/c14-12-8-4-7-11(9-12)13(15)10-5-2-1-3-6-10/h1-9,14H  //  2288 : InChI=1S/C13H10O2/c14-12-8-4-7-11(9-12)13(15)10-5-2-1-3-6-10/h1-9,14H
4064 : 1  //  2288 : 1
4067 : InChI=1S/C15H9ClO/c16-15-12-7-3-1-5-10(12)14(9-17)11-6-2-4-8-13(11)15/h1-9H  //  2364 : InChI=1S/C15H9ClO/c16-15-12-7-3-1-5-10(12)14(9-17)11-6-2-4-8-13(11)15/h1-9H
4067 : 1  //  2364 : 1
4071 : InChI=1S/C19H34N/c1-2-3-4-5-6-7-8-9-10-11-12-14-17-20-18-15-13-16-19-20/h13,15-16,18-19H,2-12,14,17H2,1H3/q+1  //  2484 : InChI=1S/C19H34N/c1-2-3-4-5-6-7-8-9-10-11-12-14-17-20-18-15-13-16-19-20/h13,15-16,18-19H,2-12,14,17H2,1H3/q+1
4071 : 1  //  2484 : 1
4075 : InChI=1S/C27H42NO2/c1-26(2,3)22-27(4,5)24-13-15-25(16-14-24)30-20-19-29-18-17-28(6,7)21-23-11-9-8-10-12-23/h8-16H,17-22H2,1-7H3/q+1  //  2335 : InChI=1S/C27H42NO2/c1-26(2,3)22-27(4,5)24-13-15-25(16-14-24)30-20-19-29-18-17-28(6,7)21-23-11-9-8-10-12-23/h8-16H,17-22H2,1-7H3/q+1
4075 : 0  //  2335 : 1
4077 : InChI=1S/C16H12O2/c1-2-10-7-8-13-14(9-10)16(18)12

4200 : InChI=1S/C8H4F3NO/c9-8(10,11)6-2-1-3-7(4-6)12-5-13/h1-4H  //  3063 : InChI=1S/C8H4F3NO/c9-8(10,11)6-2-1-3-7(4-6)12-5-13/h1-4H
4200 : 1  //  3063 : 1
4216 : InChI=1S/C11H12N2O2/c1-2-13-10(14)9(12-11(13)15)8-6-4-3-5-7-8/h3-7,9H,2H2,1H3,(H,12,15)  //  1458 : InChI=1S/C11H12N2O2/c1-2-13-10(14)9(12-11(13)15)8-6-4-3-5-7-8/h3-7,9H,2H2,1H3,(H,12,15)
4216 : 0  //  1458 : 0
4219 : InChI=1S/C22H25NO3/c1-21(2)11-12-22(3,4)18-13-15(7-10-17(18)21)19(24)23-16-8-5-14(6-9-16)20(25)26/h5-10,13H,11-12H2,1-4H3,(H,23,24)(H,25,26)  //  2707 : InChI=1S/C22H25NO3/c1-21(2)11-12-22(3,4)18-13-15(7-10-17(18)21)19(24)23-16-8-5-14(6-9-16)20(25)26/h5-10,13H,11-12H2,1-4H3,(H,23,24)(H,25,26)
4219 : 1  //  2707 : 1
4220 : InChI=1S/C14H22ClN3O2/c1-4-18(5-2)7-6-17-14(19)10-8-11(15)12(16)9-13(10)20-3/h8-9H,4-7,16H2,1-3H3,(H,17,19)  //  191 : InChI=1S/C14H22ClN3O2/c1-4-18(5-2)7-6-17-14(19)10-8-11(15)12(16)9-13(10)20-3/h8-9H,4-7,16H2,1-3H3,(H,17,19)
4220 : 0  //  191 : 0
4222 : InChI=1S/C9H6N2S3/c10-5-12-6-13-9-11-7-

4305 : InChI=1S/C9H13NO3/c1-10-5-9(13)6-2-3-7(11)8(12)4-6/h2-4,9-13H,5H2,1H3/t9-/m0/s1  //  1709 : InChI=1S/C9H13NO3/c1-10-5-9(13)6-2-3-7(11)8(12)4-6/h2-4,9-13H,5H2,1H3/t9-/m0/s1
4305 : 0  //  1709 : 0
4308 : InChI=1S/C23H15ClO3/c24-16-12-10-15(11-13-16)19(14-6-2-1-3-7-14)23(27)20-21(25)17-8-4-5-9-18(17)22(20)26/h1-13,19-20H  //  2003 : InChI=1S/C23H15ClO3/c24-16-12-10-15(11-13-16)19(14-6-2-1-3-7-14)23(27)20-21(25)17-8-4-5-9-18(17)22(20)26/h1-13,19-20H
4308 : 1  //  2003 : 1
4316 : InChI=1S/C7H6N2O4/c8-6-3-4(9(12)13)1-2-5(6)7(10)11/h1-3H,8H2,(H,10,11)  //  3121 : InChI=1S/C7H6N2O4/c8-6-3-4(9(12)13)1-2-5(6)7(10)11/h1-3H,8H2,(H,10,11)
4316 : 0  //  3121 : 1
4318 : InChI=1S/C13H10N2O4/c16-10-6-5-9(11(17)14-10)15-12(18)7-3-1-2-4-8(7)13(15)19/h1-4,9H,5-6H2,(H,14,16,17)  //  963 : InChI=1S/C13H10N2O4/c16-10-6-5-9(11(17)14-10)15-12(18)7-3-1-2-4-8(7)13(15)19/h1-4,9H,5-6H2,(H,14,16,17)
4318 : 0  //  963 : 0
4323 : InChI=1S/C6H4N2O5/c9-6-2-1-4(7(10)11)3-5(6)8(12)13/h1-3,9H  //  3062 : InChI=1S/C

4404 : InChI=1S/C17H26N2O4S/c1-4-19-10-6-7-13(19)12-18-17(20)15-11-14(24(21,22)5-2)8-9-16(15)23-3/h8-9,11,13H,4-7,10,12H2,1-3H3,(H,18,20)  //  1270 : InChI=1S/C17H26N2O4S/c1-4-19-10-6-7-13(19)12-18-17(20)15-11-14(24(21,22)5-2)8-9-16(15)23-3/h8-9,11,13H,4-7,10,12H2,1-3H3,(H,18,20)
4404 : 1  //  1270 : 0
4408 : InChI=1S/C26H36O3/c1-26-15-14-21-20-10-8-19(27)16-18(20)7-9-22(21)23(26)11-12-24(26)29-25(28)13-6-17-4-2-3-5-17/h8,10,16-17,21-24,27H,2-7,9,11-15H2,1H3/t21-,22-,23+,24+,26+/m1/s1  //  1534 : InChI=1S/C26H36O3/c1-26-15-14-21-20-10-8-19(27)16-18(20)7-9-22(21)23(26)11-12-24(26)29-25(28)13-6-17-4-2-3-5-17/h8,10,16-17,21-24,27H,2-7,9,11-15H2,1H3/t21-,22-,23+,24+,26+/m1/s1
4408 : 1  //  1534 : 0
4409 : InChI=1S/C14H9Cl5O/c15-11-5-1-9(2-6-11)13(20,14(17,18)19)10-3-7-12(16)8-4-10/h1-8,20H  //  3294 : InChI=1S/C14H9Cl5O/c15-11-5-1-9(2-6-11)13(20,14(17,18)19)10-3-7-12(16)8-4-10/h1-8,20H
4409 : 0  //  3294 : 1
4416 : InChI=1S/C14H9I3O4/c15-9-6-8(1-2-12(9)18)21-14-10(16)3-7(4-11(14)17)5-13(19

4515 : InChI=1S/C21H26O2/c1-5-6-7-8-15-12-18(22)20-16-11-14(2)9-10-17(16)21(3,4)23-19(20)13-15/h9-13,22H,5-8H2,1-4H3  //  3084 : InChI=1S/C21H26O2/c1-5-6-7-8-15-12-18(22)20-16-11-14(2)9-10-17(16)21(3,4)23-19(20)13-15/h9-13,22H,5-8H2,1-4H3
4515 : 1  //  3084 : 1
4519 : InChI=1S/C20H44N/c1-5-7-8-9-10-11-12-13-14-15-16-17-18-19-20-21(3,4)6-2/h5-20H2,1-4H3/q+1  //  2661 : InChI=1S/C20H44N/c1-5-7-8-9-10-11-12-13-14-15-16-17-18-19-20-21(3,4)6-2/h5-20H2,1-4H3/q+1
4519 : 1  //  2661 : 1
4520 : InChI=1S/C12H10O2/c13-11-5-1-9(2-6-11)10-3-7-12(14)8-4-10/h1-8,13-14H  //  2349 : InChI=1S/C12H10O2/c13-11-5-1-9(2-6-11)10-3-7-12(14)8-4-10/h1-8,13-14H
4520 : 1  //  2349 : 1
4525 : InChI=1S/C13H19ClN2O2/c1-3-16(4-2)7-8-18-13(17)11-6-5-10(15)9-12(11)14/h5-6,9H,3-4,7-8,15H2,1-2H3  //  1246 : InChI=1S/C13H19ClN2O2/c1-3-16(4-2)7-8-18-13(17)11-6-5-10(15)9-12(11)14/h5-6,9H,3-4,7-8,15H2,1-2H3
4525 : 0  //  1246 : 0
4528 : InChI=1S/C14H17Cl2NO2/c1-14(7-3-2-4-8-14)13(19)17-9-5-6-10(18)12(16)11(9)15/h5-6,18H,2-4,

4640 : InChI=1S/C20H26O4/c21-19(23-15-9-3-1-4-10-15)17-13-7-8-14-18(17)20(22)24-16-11-5-2-6-12-16/h7-8,13-16H,1-6,9-12H2  //  3148 : InChI=1S/C20H26O4/c21-19(23-15-9-3-1-4-10-15)17-13-7-8-14-18(17)20(22)24-16-11-5-2-6-12-16/h7-8,13-16H,1-6,9-12H2
4640 : 1  //  3148 : 1
4642 : InChI=1S/C18H24O2/c1-18-9-8-14-13-5-3-12(19)10-11(13)2-4-15(14)16(18)6-7-17(18)20/h3,5,10,14-17,19-20H,2,4,6-9H2,1H3/t14-,15-,16+,17+,18+/m1/s1  //  355 : InChI=1S/C18H24O2/c1-18-9-8-14-13-5-3-12(19)10-11(13)2-4-15(14)16(18)6-7-17(18)20/h3,5,10,14-17,19-20H,2,4,6-9H2,1H3/t14-,15-,16+,17+,18+/m1/s1
4642 : 0  //  355 : 0
4643 : InChI=1S/C7H10N2/c1-5-2-3-6(8)4-7(5)9/h2-4H,8-9H2,1H3  //  3228 : InChI=1S/C7H10N2/c1-5-2-3-6(8)4-7(5)9/h2-4H,8-9H2,1H3
4643 : 1  //  3228 : 1
4646 : InChI=1S/C23H32O4/c1-14(24)27-13-21(26)20-7-6-18-17-5-4-15-12-16(25)8-10-22(15,2)19(17)9-11-23(18,20)3/h12,17-20H,4-11,13H2,1-3H3/t17-,18-,19-,20+,22-,23-/m0/s1  //  942 : InChI=1S/C23H32O4/c1-14(24)27-13-21(26)20-7-6-18-17-5-4-15-12-16(25)8-10-

4731 : InChI=1S/C18H30O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18(19)20/h6-7,9-10,12-13H,2-5,8,11,14-17H2,1H3,(H,19,20)/b7-6-,10-9-,13-12-  //  121 : InChI=1S/C18H30O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18(19)20/h6-7,9-10,12-13H,2-5,8,11,14-17H2,1H3,(H,19,20)/b7-6-,10-9-,13-12-
4731 : 0  //  121 : 0
4742 : InChI=1S/C17H21N2/c1-4-19-14-6-5-7-17(19)13-10-15-8-11-16(12-9-15)18(2)3/h5-14H,4H2,1-3H3/q+1  //  3193 : InChI=1S/C17H21N2/c1-4-19-14-6-5-7-17(19)13-10-15-8-11-16(12-9-15)18(2)3/h5-14H,4H2,1-3H3/q+1
4742 : 1  //  3193 : 1
4746 : InChI=1S/C14H20N2O3/c17-13(15-12-8-4-3-5-9-12)10-6-1-2-7-11-14(18)16-19/h3-5,8-9,19H,1-2,6-7,10-11H2,(H,15,17)(H,16,18)  //  1109 : InChI=1S/C14H20N2O3/c17-13(15-12-8-4-3-5-9-12)10-6-1-2-7-11-14(18)16-19/h3-5,8-9,19H,1-2,6-7,10-11H2,(H,15,17)(H,16,18)
4746 : 0  //  1109 : 0
4756 : InChI=1S/C24H32O6/c1-21(2)29-19-10-16-15-6-5-13-9-14(26)7-8-22(13,3)20(15)17(27)11-23(16,4)24(19,30-21)18(28)12-25/h7-9,15-17,19-20,25,27H,5-6,10-12H2,1-4H3/t15-,16-,1

4855 : InChI=1S/C12H13ClN4/c1-2-9-10(11(14)17-12(15)16-9)7-3-5-8(13)6-4-7/h3-6H,2H2,1H3,(H4,14,15,16,17)  //  1562 : InChI=1S/C12H13ClN4/c1-2-9-10(11(14)17-12(15)16-9)7-3-5-8(13)6-4-7/h3-6H,2H2,1H3,(H4,14,15,16,17)
4855 : 0  //  1562 : 0
4863 : InChI=1S/C17H25NO2/c1-3-5-6-11(4-2)10-18-16(19)14-12-7-8-13(9-12)15(14)17(18)20/h7-8,11-15H,3-6,9-10H2,1-2H3  //  3001 : InChI=1S/C17H25NO2/c1-3-5-6-11(4-2)10-18-16(19)14-12-7-8-13(9-12)15(14)17(18)20/h7-8,11-15H,3-6,9-10H2,1-2H3
4863 : 1  //  3001 : 1
4865 : InChI=1S/C11H11NO2/c1-12-10(13)7-9(11(12)14)8-5-3-2-4-6-8/h2-6,9H,7H2,1H3  //  1630 : InChI=1S/C11H11NO2/c1-12-10(13)7-9(11(12)14)8-5-3-2-4-6-8/h2-6,9H,7H2,1H3
4865 : 0  //  1630 : 0
4876 : InChI=1S/C23H42N/c1-4-5-6-7-8-9-10-11-12-13-14-18-21-24(2,3)22-23-19-16-15-17-20-23/h15-17,19-20H,4-14,18,21-22H2,1-3H3/q+1  //  2287 : InChI=1S/C23H42N/c1-4-5-6-7-8-9-10-11-12-13-14-18-21-24(2,3)22-23-19-16-15-17-20-23/h15-17,19-20H,4-14,18,21-22H2,1-3H3/q+1
4876 : 1  //  2287 : 1
4880 : InChI=1S/C19H27

5004 : InChI=1S/C15H21NO2/c1-3-18-14(17)15(9-11-16(2)12-10-15)13-7-5-4-6-8-13/h4-8H,3,9-12H2,1-2H3  //  1537 : InChI=1S/C15H21NO2/c1-3-18-14(17)15(9-11-16(2)12-10-15)13-7-5-4-6-8-13/h4-8H,3,9-12H2,1-2H3
5004 : 1  //  1537 : 0
5009 : InChI=1S/C8H12N4O4/c9-7-10-3-12(8(15)11-7)6-1-4(14)5(2-13)16-6/h3-6,13-14H,1-2H2,(H2,9,11,15)/t4-,5+,6+/m0/s1  //  289 : InChI=1S/C8H12N4O4/c9-7-10-3-12(8(15)11-7)6-1-4(14)5(2-13)16-6/h3-6,13-14H,1-2H2,(H2,9,11,15)/t4-,5+,6+/m0/s1
5009 : 0  //  289 : 0
5017 : InChI=1S/C14H21N3O3/c1-9(2)15-7-12-4-3-10-5-11(8-18)14(17(19)20)6-13(10)16-12/h5-6,9,12,15-16,18H,3-4,7-8H2,1-2H3  //  366 : InChI=1S/C14H21N3O3/c1-9(2)15-7-12-4-3-10-5-11(8-18)14(17(19)20)6-13(10)16-12/h5-6,9,12,15-16,18H,3-4,7-8H2,1-2H3
5017 : 0  //  366 : 0
5020 : InChI=1S/C21H19N3O3S/c1-27-20-13-14(24-28(2,25)26)11-12-19(20)23-21-15-7-3-5-9-17(15)22-18-10-6-4-8-16(18)21/h3-13,24H,1-2H3,(H,22,23)  //  2448 : InChI=1S/C21H19N3O3S/c1-27-20-13-14(24-28(2,25)26)11-12-19(20)23-21-15-7-3-5-9-17(15)22-18-1

5139 : InChI=1S/C2H7NO3S/c3-1-2-7(4,5)6/h1-3H2,(H,4,5,6)  //  2293 : InChI=1S/C2H7NO3S/c3-1-2-7(4,5)6/h1-3H2,(H,4,5,6)
5139 : 0  //  2293 : 1
5140 : InChI=1S/C21H26O5/c1-19-7-5-13(23)9-12(19)3-4-14-15-6-8-21(26,17(25)11-22)20(15,2)10-16(24)18(14)19/h5,7,9,14-15,18,22,26H,3-4,6,8,10-11H2,1-2H3/t14-,15-,18+,19-,20-,21-/m0/s1  //  1889 : InChI=1S/C21H26O5/c1-19-7-5-13(23)9-12(19)3-4-14-15-6-8-21(26,17(25)11-22)20(15,2)10-16(24)18(14)19/h5,7,9,14-15,18,22,26H,3-4,6,8,10-11H2,1-2H3/t14-,15-,18+,19-,20-,21-/m0/s1
5140 : 0  //  1889 : 1
5141 : InChI=1S/C14H9NO2/c15-8-5-6-11-12(7-8)14(17)10-4-2-1-3-9(10)13(11)16/h1-7H,15H2  //  1926 : InChI=1S/C14H9NO2/c15-8-5-6-11-12(7-8)14(17)10-4-2-1-3-9(10)13(11)16/h1-7H,15H2
5141 : 1  //  1926 : 1
5149 : InChI=1S/C13H13N3O/c14-10-3-1-9(2-4-10)13(17)16-12-7-5-11(15)6-8-12/h1-8H,14-15H2,(H,16,17)  //  1970 : InChI=1S/C13H13N3O/c14-10-3-1-9(2-4-10)13(17)16-12-7-5-11(15)6-8-12/h1-8H,14-15H2,(H,16,17)
5149 : 1  //  1970 : 1
5150 : InChI=1S/C24H28N2O5/c1-2-31-2

5258 : InChI=1S/C4H11N5/c1-9(2)4(7)8-3(5)6/h1-2H3,(H5,5,6,7,8)  //  1877 : InChI=1S/C4H11N5/c1-9(2)4(7)8-3(5)6/h1-2H3,(H5,5,6,7,8)
5258 : 0  //  1877 : 1
5258 : InChI=1S/C4H11N5/c1-9(2)4(7)8-3(5)6/h1-2H3,(H5,5,6,7,8)  //  2938 : InChI=1S/C4H11N5/c1-9(2)4(7)8-3(5)6/h1-2H3,(H5,5,6,7,8)
5258 : 0  //  2938 : 1
5260 : InChI=1S/C18H15O4P/c19-23(20-16-10-4-1-5-11-16,21-17-12-6-2-7-13-17)22-18-14-8-3-9-15-18/h1-15H  //  3256 : InChI=1S/C18H15O4P/c19-23(20-16-10-4-1-5-11-16,21-17-12-6-2-7-13-17)22-18-14-8-3-9-15-18/h1-15H
5260 : 1  //  3256 : 1
5268 : InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)  //  2031 : InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)
5268 : 1  //  2031 : 1
5271 : InChI=1S/C12H8Br2O/c13-9-1-5-11(6-2-9)15-12-7-3-10(14)4-8-12/h1-8H  //  2622 : InChI=1S/C12H8Br2O/c13-9-1-5-11(6-2-9)15-12-7-3-10(14)4-8-12/h1-8H
5

5393 : InChI=1S/C15H18I3NO3/c1-3-5-12(20)19-14-11(17)7-10(16)9(13(14)18)6-8(4-2)15(21)22/h7-8H,3-6H2,1-2H3,(H,19,20)(H,21,22)  //  1076 : InChI=1S/C15H18I3NO3/c1-3-5-12(20)19-14-11(17)7-10(16)9(13(14)18)6-8(4-2)15(21)22/h7-8H,3-6H2,1-2H3,(H,19,20)(H,21,22)
5393 : 0  //  1076 : 0
5395 : InChI=1S/C20H21ClO4/c1-13(2)24-19(23)20(3,4)25-17-11-7-15(8-12-17)18(22)14-5-9-16(21)10-6-14/h5-13H,1-4H3  //  3036 : InChI=1S/C20H21ClO4/c1-13(2)24-19(23)20(3,4)25-17-11-7-15(8-12-17)18(22)14-5-9-16(21)10-6-14/h5-13H,1-4H3
5395 : 0  //  3036 : 1
5396 : InChI=1S/C17H20O2/c1-11-9-13(5-7-15(11)18)17(3,4)14-6-8-16(19)12(2)10-14/h5-10,18-19H,1-4H3  //  2142 : InChI=1S/C17H20O2/c1-11-9-13(5-7-15(11)18)17(3,4)14-6-8-16(19)12(2)10-14/h5-10,18-19H,1-4H3
5396 : 1  //  2142 : 1
5399 : InChI=1S/C7H7NO2/c1-10-7(9)6-3-2-4-8-5-6/h2-5H,1H3  //  1022 : InChI=1S/C7H7NO2/c1-10-7(9)6-3-2-4-8-5-6/h2-5H,1H3
5399 : 0  //  1022 : 0
5403 : InChI=1S/C14H10/c1-3-7-13-11(5-1)9-10-12-6-2-4-8-14(12)13/h1-10H  //  3345 : InChI=1S/C14

5530 : InChI=1S/C16H17N3O4S/c1-8-7-24-15-11(14(21)19(15)12(8)16(22)23)18-13(20)10(17)9-5-3-2-4-6-9/h2-6,10-11,15H,7,17H2,1H3,(H,18,20)(H,22,23)/t10-,11-,15-/m1/s1  //  1172 : InChI=1S/C16H17N3O4S/c1-8-7-24-15-11(14(21)19(15)12(8)16(22)23)18-13(20)10(17)9-5-3-2-4-6-9/h2-6,10-11,15H,7,17H2,1H3,(H,18,20)(H,22,23)/t10-,11-,15-/m1/s1
5530 : 0  //  1172 : 0
5531 : InChI=1S/C6H7NO3S/c7-5-2-1-3-6(4-5)11(8,9)10/h1-4H,7H2,(H,8,9,10)  //  2629 : InChI=1S/C6H7NO3S/c7-5-2-1-3-6(4-5)11(8,9)10/h1-4H,7H2,(H,8,9,10)
5531 : 0  //  2629 : 1
5533 : InChI=1S/C31H52O3/c1-21(2)13-10-14-22(3)15-11-16-23(4)17-12-19-31(9)20-18-28-26(7)29(33-27(8)32)24(5)25(6)30(28)34-31/h21-23H,10-20H2,1-9H3/t22-,23-,31-/m1/s1  //  1745 : InChI=1S/C31H52O3/c1-21(2)13-10-14-22(3)15-11-16-23(4)17-12-19-31(9)20-18-28-26(7)29(33-27(8)32)24(5)25(6)30(28)34-31/h21-23H,10-20H2,1-9H3/t22-,23-,31-/m1/s1
5533 : 0  //  1745 : 0
5537 : InChI=1S/C6H5ClN2O3/c7-3-1-4(8)6(10)2-5(3)9(11)12/h1-2,10H,8H2  //  3359 : InChI=1S/C6H5ClN2O3/c7-3-1-4(8

5623 : InChI=1S/C8H5N3O3S2/c12-7(5-2-1-3-15-5)10-8-9-4-6(16-8)11(13)14/h1-4H,(H,9,10,12)  //  1915 : InChI=1S/C8H5N3O3S2/c12-7(5-2-1-3-15-5)10-8-9-4-6(16-8)11(13)14/h1-4H,(H,9,10,12)
5623 : 1  //  1915 : 1
5624 : InChI=1S/C11H6Br3NOS/c12-6-1-3-7(4-2-6)15-11(16)9-5-8(13)10(14)17-9/h1-5H,(H,15,16)  //  2504 : InChI=1S/C11H6Br3NOS/c12-6-1-3-7(4-2-6)15-11(16)9-5-8(13)10(14)17-9/h1-5H,(H,15,16)
5624 : 1  //  2504 : 1
5637 : InChI=1S/C7H9NO/c1-5-2-3-7(9)6(8)4-5/h2-4,9H,8H2,1H3  //  2357 : InChI=1S/C7H9NO/c1-5-2-3-7(9)6(8)4-5/h2-4,9H,8H2,1H3
5637 : 1  //  2357 : 1
5642 : InChI=1S/C2H6O3S2/c3-7(4,5)2-1-6/h6H,1-2H2,(H,3,4,5)  //  531 : InChI=1S/C2H6O3S2/c3-7(4,5)2-1-6/h6H,1-2H2,(H,3,4,5)
5642 : 0  //  531 : 0
5644 : InChI=1S/C12H15N/c1-9-8-12(2,3)13-11-7-5-4-6-10(9)11/h4-8,13H,1-3H3  //  3337 : InChI=1S/C12H15N/c1-9-8-12(2,3)13-11-7-5-4-6-10(9)11/h4-8,13H,1-3H3
5644 : 0  //  3337 : 1
5646 : InChI=1S/C12H16O3/c1-2-3-4-9-15-12(14)10-5-7-11(13)8-6-10/h5-8,13H,2-4,9H2,1H3  //  2266 : InChI=1S/C12H1

In [159]:
len(num),len(counter1),len(counter2)

(1592, 279, 279)

In [160]:
d4_ = d4.drop(num)
d4_.reset_index(inplace=True)
d4_.drop("index",axis = 1,inplace = True)
d4_.drop("Smile",axis = 1,inplace = True)
d4_["Toxicity"].value_counts()

Toxicity
0    1106
1     708
Name: count, dtype: int64

In [161]:
#Have overlap index, so weird....
#Some conpound's mol will be None when we 
counter1 = set(counter1)
d1_ = d1.drop(counter1)
d1_.reset_index(inplace=True)
d1_.drop("index",axis = 1,inplace = True)
d1_["Mol"] = [Chem.MolFromInchi(x) for x in d1_["Inchi"]]
d1_["Toxicity"].value_counts()

[12:04:02] Explicit valence for atom # 0 Cl, 5, is greater than permitted
[12:04:02] ERROR: Explicit valence for atom # 0 Cl, 5, is greater than permitted




Toxicity
0    4688
1     792
Name: count, dtype: int64

In [162]:
d_total = pd.concat([d1_,d4_],axis = 0)
d_total.reset_index(inplace=True)
d_total.drop("index",axis = 1,inplace = True)
d_total["Toxicity"].value_counts()

Toxicity
0    5794
1    1500
Name: count, dtype: int64

In [163]:
#combine d_total and cp and ge into dtotal(same assay)
#delete the overlap part
num = []
counter1 = []
counter2 = []
for i in range(len(d_total)):
    for y in range(len(d3)):
        if(d_total["Inchi"][i] == d3["Inchi"][y]):
            num.append(y)
            print(i,":",d_total["Inchi"][i]," // ",y,":",d3["Inchi"][y])
            print(i,":",d_total["Toxicity"][i]," // ",y,":",d3["Toxicity"][y])
            if d_total["Toxicity"][i] != d3["Toxicity"][y]:
                counter1.append(i)
                counter2.append(y)

0 : InChI=1S/C10H19N5S/c1-6(2)11-8-13-9(12-7(3)4)15-10(14-8)16-5/h6-7H,1-5H3,(H2,11,12,13,14,15)  //  32 : InChI=1S/C10H19N5S/c1-6(2)11-8-13-9(12-7(3)4)15-10(14-8)16-5/h6-7H,1-5H3,(H2,11,12,13,14,15)
0 : 0  //  32 : 0
4 : InChI=1S/C16H13ClN2O/c1-19-14-8-7-12(17)9-13(14)16(18-10-15(19)20)11-5-3-2-4-6-11/h2-9H,10H2,1H3  //  205 : InChI=1S/C16H13ClN2O/c1-19-14-8-7-12(17)9-13(14)16(18-10-15(19)20)11-5-3-2-4-6-11/h2-9H,10H2,1H3
4 : 0  //  205 : 0
34 : InChI=1S/C8H10N2S/c1-2-7-5-6(8(9)11)3-4-10-7/h3-5H,2H2,1H3,(H2,9,11)  //  450 : InChI=1S/C8H10N2S/c1-2-7-5-6(8(9)11)3-4-10-7/h3-5H,2H2,1H3,(H2,9,11)
34 : 0  //  450 : 0
77 : InChI=1S/C23H34O5/c1-4-14(2)23(26)28-20-7-5-6-16-9-8-15(3)19(22(16)20)11-10-18-12-17(24)13-21(25)27-18/h6,8-9,14-15,17-20,22,24H,4-5,7,10-13H2,1-3H3/t14-,15-,17+,18+,19-,20-,22-/m0/s1  //  374 : InChI=1S/C23H34O5/c1-4-14(2)23(26)28-20-7-5-6-16-9-8-15(3)19(22(16)20)11-10-18-12-17(24)13-21(25)27-18/h6,8-9,14-15,17-20,22,24H,4-5,7,10-13H2,1-3H3/t14-,15-,17+,18+,19-,20-,22-/m0

408 : InChI=1S/C12H17N3O/c1-8(2)15-7-12(16)9-3-4-11(14)10(5-9)6-13/h3-5,8,12,15-16H,7,14H2,1-2H3  //  84 : InChI=1S/C12H17N3O/c1-8(2)15-7-12(16)9-3-4-11(14)10(5-9)6-13/h3-5,8,12,15-16H,7,14H2,1-2H3
408 : 0  //  84 : 0
431 : InChI=1S/C18H14Cl4N2O/c19-13-2-1-12(16(21)7-13)10-25-18(9-24-6-5-23-11-24)15-4-3-14(20)8-17(15)22/h1-8,11,18H,9-10H2  //  254 : InChI=1S/C18H14Cl4N2O/c19-13-2-1-12(16(21)7-13)10-25-18(9-24-6-5-23-11-24)15-4-3-14(20)8-17(15)22/h1-8,11,18H,9-10H2
431 : 0  //  254 : 0
436 : InChI=1S/C13H18N4O3/c1-9(18)6-4-5-7-17-12(19)10-11(14-8-15(10)2)16(3)13(17)20/h8H,4-7H2,1-3H3  //  113 : InChI=1S/C13H18N4O3/c1-9(18)6-4-5-7-17-12(19)10-11(14-8-15(10)2)16(3)13(17)20/h8H,4-7H2,1-3H3
436 : 0  //  113 : 0
439 : InChI=1S/C4H6N4O3S2/c1-2(9)6-3-7-8-4(12-3)13(5,10)11/h1H3,(H2,5,10,11)(H,6,7,9)  //  420 : InChI=1S/C4H6N4O3S2/c1-2(9)6-3-7-8-4(12-3)13(5,10)11/h1H3,(H2,5,10,11)(H,6,7,9)
439 : 0  //  420 : 0
454 : InChI=1S/C19H22N2/c1-16-7-9-17(10-8-16)18(19-6-2-3-12-20-19)11-15-21-13-4-5-14-2

893 : InChI=1S/C8H11NO/c9-6-5-7-1-3-8(10)4-2-7/h1-4,10H,5-6,9H2  //  452 : InChI=1S/C8H11NO/c9-6-5-7-1-3-8(10)4-2-7/h1-4,10H,5-6,9H2
893 : 0  //  452 : 0
903 : InChI=1S/C27H36N2O4/c1-4-33-25-17-20(12-13-22(25)27(31)32)18-26(30)28-23(16-19(2)3)21-10-6-7-11-24(21)29-14-8-5-9-15-29/h6-7,10-13,17,19,23H,4-5,8-9,14-16,18H2,1-3H3,(H,28,30)(H,31,32)/t23-/m0/s1  //  401 : InChI=1S/C27H36N2O4/c1-4-33-25-17-20(12-13-22(25)27(31)32)18-26(30)28-23(16-19(2)3)21-10-6-7-11-24(21)29-14-8-5-9-15-29/h6-7,10-13,17,19,23H,4-5,8-9,14-16,18H2,1-3H3,(H,28,30)(H,31,32)/t23-/m0/s1
903 : 0  //  401 : 0
907 : InChI=1S/C9H15NO3S/c1-6(5-14)8(11)10-4-2-3-7(10)9(12)13/h6-7,14H,2-5H2,1H3,(H,12,13)/t6-,7+/m1/s1  //  484 : InChI=1S/C9H15NO3S/c1-6(5-14)8(11)10-4-2-3-7(10)9(12)13/h6-7,14H,2-5H2,1H3,(H,12,13)/t6-,7+/m1/s1
907 : 0  //  484 : 0
915 : InChI=1S/C34H63N5O9/c1-17(2)12-23(37-33(47)31(21(9)10)39-34(48)30(20(7)8)38-27(42)14-19(5)6)25(40)15-28(43)35-22(11)32(46)36-24(13-18(3)4)26(41)16-29(44)45/h17-26,30-31,40-41H,

1276 : InChI=1S/C10H15N5/c1-4-14(5-2)9-6-8(3)13-10-11-7-12-15(9)10/h6-7H,4-5H2,1-3H3  //  25 : InChI=1S/C10H15N5/c1-4-14(5-2)9-6-8(3)13-10-11-7-12-15(9)10/h6-7H,4-5H2,1-3H3
1276 : 0  //  25 : 0
1292 : InChI=1S/C16H23BrN2O3/c1-4-19-9-5-6-11(19)10-18-16(20)14-13(21-2)8-7-12(17)15(14)22-3/h7-8,11H,4-6,9-10H2,1-3H3,(H,18,20)/t11-/m0/s1  //  226 : InChI=1S/C16H23BrN2O3/c1-4-19-9-5-6-11(19)10-18-16(20)14-13(21-2)8-7-12(17)15(14)22-3/h7-8,11H,4-6,9-10H2,1-3H3,(H,18,20)/t11-/m0/s1
1292 : 0  //  226 : 0
1310 : InChI=1S/C14H13N3O5S/c1-10(18)15-11-4-8-14(9-5-11)23(21,22)16-12-2-6-13(7-3-12)17(19)20/h2-9,16H,1H3,(H,15,18)  //  133 : InChI=1S/C14H13N3O5S/c1-10(18)15-11-4-8-14(9-5-11)23(21,22)16-12-2-6-13(7-3-12)17(19)20/h2-9,16H,1H3,(H,15,18)
1310 : 1  //  133 : 1
1339 : InChI=1S/C22H32O3/c1-12-9-15-17-6-5-16(13(2)23)22(17,4)11-19(25)20(15)21(3)8-7-14(24)10-18(12)21/h10,12,15-17,19-20,25H,5-9,11H2,1-4H3/t12-,15-,16+,17-,19-,20+,21-,22+/m0/s1  //  363 : InChI=1S/C22H32O3/c1-12-9-15-17-6-5-16(13(2)23

1700 : InChI=1S/C21H26O2/c1-4-21(22)12-10-19-18-7-5-14-13-15(23-3)6-8-16(14)17(18)9-11-20(19,21)2/h1,6,8,13,17-19,22H,5,7,9-12H2,2-3H3/t17-,18-,19+,20+,21+/m1/s1  //  326 : InChI=1S/C21H26O2/c1-4-21(22)12-10-19-18-7-5-14-13-15(23-3)6-8-16(14)17(18)9-11-20(19,21)2/h1,6,8,13,17-19,22H,5,7,9-12H2,2-3H3/t17-,18-,19+,20+,21+/m1/s1
1700 : 0  //  326 : 0
1703 : InChI=1S/C12H15NO3/c1-8-3-9(2)5-10(4-8)15-7-11-6-13-12(14)16-11/h3-5,11H,6-7H2,1-2H3,(H,13,14)  //  82 : InChI=1S/C12H15NO3/c1-8-3-9(2)5-10(4-8)15-7-11-6-13-12(14)16-11/h3-5,11H,6-7H2,1-2H3,(H,13,14)
1703 : 0  //  82 : 0
1728 : InChI=1S/C19H32N2O5/c1-4-8-14(19(25)26-5-2)20-12(3)17(22)21-15-10-7-6-9-13(15)11-16(21)18(23)24/h12-16,20H,4-11H2,1-3H3,(H,23,24)/t12-,13-,14-,15-,16-/m0/s1  //  298 : InChI=1S/C19H32N2O5/c1-4-8-14(19(25)26-5-2)20-12(3)17(22)21-15-10-7-6-9-13(15)11-16(21)18(23)24/h12-16,20H,4-11H2,1-3H3,(H,23,24)/t12-,13-,14-,15-,16-/m0/s1
1728 : 0  //  298 : 0
1730 : InChI=1S/C7H10ClN3O3/c1-5-9-3-7(11(13)14)10(5)4-6(12)2-8/h3,6

2069 : InChI=1S/C26H30O8/c1-22(2)15-9-16(27)24(4)14(25(15)12-31-18(28)10-17(25)33-22)5-7-23(3)19(13-6-8-30-11-13)32-21(29)20-26(23,24)34-20/h6,8,11,14-15,17,19-20H,5,7,9-10,12H2,1-4H3/t14-,15-,17-,19-,20+,23-,24-,25+,26+/m0/s1  //  391 : InChI=1S/C26H30O8/c1-22(2)15-9-16(27)24(4)14(25(15)12-31-18(28)10-17(25)33-22)5-7-23(3)19(13-6-8-30-11-13)32-21(29)20-26(23,24)34-20/h6,8,11,14-15,17,19-20H,5,7,9-10,12H2,1-4H3/t14-,15-,17-,19-,20+,23-,24-,25+,26+/m0/s1
2069 : 0  //  391 : 0
2167 : InChI=1S/C7H10N2OS/c1-2-3-5-4-6(10)9-7(11)8-5/h4H,2-3H2,1H3,(H2,8,9,10,11)  //  434 : InChI=1S/C7H10N2OS/c1-2-3-5-4-6(10)9-7(11)8-5/h4H,2-3H2,1H3,(H2,8,9,10,11)
2167 : 0  //  434 : 0
2180 : InChI=1S/C21H27N3O2/c1-4-15(12-25)22-21(26)14-8-17-16-6-5-7-18-20(16)13(10-23(18)2)9-19(17)24(3)11-14/h5-8,10,14-15,19,25H,4,9,11-12H2,1-3H3,(H,22,26)/t14-,15+,19-/m1/s1  //  329 : InChI=1S/C21H27N3O2/c1-4-15(12-25)22-21(26)14-8-17-16-6-5-7-18-20(16)13(10-23(18)2)9-19(17)24(3)11-14/h5-8,10,14-15,19,25H,4,9,11-12H2,1-3H3,(

2643 : InChI=1S/C10H14N2/c1-2-7-12-10(5-1)9-4-3-6-11-8-9/h3-4,6,8,10,12H,1-2,5,7H2/t10-/m0/s1  //  20 : InChI=1S/C10H14N2/c1-2-7-12-10(5-1)9-4-3-6-11-8-9/h3-4,6,8,10,12H,1-2,5,7H2/t10-/m0/s1
2643 : 0  //  20 : 0
2678 : InChI=1S/C8H14ClN5/c1-4-10-7-12-6(9)13-8(14-7)11-5(2)3/h5H,4H2,1-3H3,(H2,10,11,12,13,14)  //  456 : InChI=1S/C8H14ClN5/c1-4-10-7-12-6(9)13-8(14-7)11-5(2)3/h5H,4H2,1-3H3,(H2,10,11,12,13,14)
2678 : 0  //  456 : 0
2713 : InChI=1S/C21H30O3/c1-13(22)24-15-8-10-20(2)14(12-15)4-5-16-17-6-7-19(23)21(17,3)11-9-18(16)20/h4,15-18H,5-12H2,1-3H3/t15-,16-,17-,18-,20-,21-/m0/s1  //  338 : InChI=1S/C21H30O3/c1-13(22)24-15-8-10-20(2)14(12-15)4-5-16-17-6-7-19(23)21(17,3)11-9-18(16)20/h4,15-18H,5-12H2,1-3H3/t15-,16-,17-,18-,20-,21-/m0/s1
2713 : 0  //  338 : 1
2723 : InChI=1S/C13H21NO3/c1-13(2,3)14-7-12(17)9-4-5-11(16)10(6-9)8-15/h4-6,12,14-17H,7-8H2,1-3H3  //  118 : InChI=1S/C13H21NO3/c1-13(2,3)14-7-12(17)9-4-5-11(16)10(6-9)8-15/h4-6,12,14-17H,7-8H2,1-3H3
2723 : 0  //  118 : 0
2724 : InChI

3152 : InChI=1S/C13H12N2O3S/c14-11-6-8-12(9-7-11)19(17,18)15-13(16)10-4-2-1-3-5-10/h1-9H,14H2,(H,15,16)  //  101 : InChI=1S/C13H12N2O3S/c14-11-6-8-12(9-7-11)19(17,18)15-13(16)10-4-2-1-3-5-10/h1-9H,14H2,(H,15,16)
3152 : 0  //  101 : 0
3157 : InChI=1S/C17H13N3O5S2/c21-15(13-3-1-2-4-14(13)16(22)23)19-11-5-7-12(8-6-11)27(24,25)20-17-18-9-10-26-17/h1-10H,(H,18,20)(H,19,21)(H,22,23)  //  234 : InChI=1S/C17H13N3O5S2/c21-15(13-3-1-2-4-14(13)16(22)23)19-11-5-7-12(8-6-11)27(24,25)20-17-18-9-10-26-17/h1-10H,(H,18,20)(H,19,21)(H,22,23)
3157 : 0  //  234 : 0
3161 : InChI=1S/C6H8ClNS/c1-5-6(2-3-7)9-4-8-5/h4H,2-3H2,1H3  //  428 : InChI=1S/C6H8ClNS/c1-5-6(2-3-7)9-4-8-5/h4H,2-3H2,1H3
3161 : 0  //  428 : 0
3167 : InChI=1S/C22H32O3/c1-4-20(24)25-19-8-7-17-16-6-5-14-13-15(23)9-11-21(14,2)18(16)10-12-22(17,19)3/h13,16-19H,4-12H2,1-3H3/t16-,17-,18-,19-,21-,22-/m0/s1  //  364 : InChI=1S/C22H32O3/c1-4-20(24)25-19-8-7-17-16-6-5-14-13-15(23)9-11-21(14,2)18(16)10-12-22(17,19)3/h13,16-19H,4-12H2,1-3H3/t16-,17-,18

3573 : InChI=1S/C15H22N4O3/c1-4-8-18-10-16-13-12(18)14(21)19(15(22)17(13)3)9-6-5-7-11(2)20/h10H,4-9H2,1-3H3  //  196 : InChI=1S/C15H22N4O3/c1-4-8-18-10-16-13-12(18)14(21)19(15(22)17(13)3)9-6-5-7-11(2)20/h10H,4-9H2,1-3H3
3573 : 0  //  196 : 0
3576 : InChI=1S/C14H16N4O3/c1-2-17-8-10(13(20)21)11(19)9-7-15-14(16-12(9)17)18-5-3-4-6-18/h7-8H,2-6H2,1H3,(H,20,21)  //  144 : InChI=1S/C14H16N4O3/c1-2-17-8-10(13(20)21)11(19)9-7-15-14(16-12(9)17)18-5-3-4-6-18/h7-8H,2-6H2,1H3,(H,20,21)
3576 : 0  //  144 : 0
3586 : InChI=1S/C29H33ClN2O2/c1-31(2)27(33)29(24-9-5-3-6-10-24,25-11-7-4-8-12-25)19-22-32-20-17-28(34,18-21-32)23-13-15-26(30)16-14-23/h3-16,34H,17-22H2,1-2H3  //  410 : InChI=1S/C29H33ClN2O2/c1-31(2)27(33)29(24-9-5-3-6-10-24,25-11-7-4-8-12-25)19-22-32-20-17-28(34,18-21-32)23-13-15-26(30)16-14-23/h3-16,34H,17-22H2,1-2H3
3586 : 0  //  410 : 0
3593 : InChI=1S/C15H10O7/c16-7-4-10(19)12-11(5-7)22-15(14(21)13(12)20)6-1-2-8(17)9(18)3-6/h1-5,16-19,21H  //  174 : InChI=1S/C15H10O7/c16-7-4-10(19)12-11(5-

4077 : InChI=1S/C8H8N2O3S/c9-14(11,12)5-7-6-3-1-2-4-8(6)13-10-7/h1-4H,5H2,(H2,9,11,12)  //  467 : InChI=1S/C8H8N2O3S/c9-14(11,12)5-7-6-3-1-2-4-8(6)13-10-7/h1-4H,5H2,(H2,9,11,12)
4077 : 0  //  467 : 0
4080 : InChI=1S/C24H31N5O2/c1-3-27(4-2)16-18-10-7-8-15-28(18)17-22(30)29-21-13-6-5-11-19(21)24(31)26-20-12-9-14-25-23(20)29/h5-6,9,11-14,18H,3-4,7-8,10,15-17H2,1-2H3,(H,26,31)  //  382 : InChI=1S/C24H31N5O2/c1-3-27(4-2)16-18-10-7-8-15-28(18)17-22(30)29-21-13-6-5-11-19(21)24(31)26-20-12-9-14-25-23(20)29/h5-6,9,11-14,18H,3-4,7-8,10,15-17H2,1-2H3,(H,26,31)
4080 : 0  //  382 : 0
4105 : InChI=1S/C13H10N2O4/c16-10-6-5-9(11(17)14-10)15-12(18)7-3-1-2-4-8(7)13(15)19/h1-4,9H,5-6H2,(H,14,16,17)  //  97 : InChI=1S/C13H10N2O4/c16-10-6-5-9(11(17)14-10)15-12(18)7-3-1-2-4-8(7)13(15)19/h1-4,9H,5-6H2,(H,14,16,17)
4105 : 0  //  97 : 0
4110 : InChI=1S/C6H4N2O5/c9-6-2-1-4(7(10)11)3-5(6)8(12)13/h1-3,9H  //  425 : InChI=1S/C6H4N2O5/c9-6-2-1-4(7(10)11)3-5(6)8(12)13/h1-3,9H
4110 : 1  //  425 : 1
4126 : InChI=1S/C9

4555 : InChI=1S/C11H12ClNO3S/c1-13-10(14)6-7-17(15,16)11(13)8-2-4-9(12)5-3-8/h2-5,11H,6-7H2,1H3  //  44 : InChI=1S/C11H12ClNO3S/c1-13-10(14)6-7-17(15,16)11(13)8-2-4-9(12)5-3-8/h2-5,11H,6-7H2,1H3
4555 : 0  //  44 : 0
4558 : InChI=1S/C11H18N2O3/c1-4-6-7(3)11(5-2)8(14)12-10(16)13-9(11)15/h7H,4-6H2,1-3H3,(H2,12,13,14,15,16)  //  62 : InChI=1S/C11H18N2O3/c1-4-6-7(3)11(5-2)8(14)12-10(16)13-9(11)15/h7H,4-6H2,1-3H3,(H2,12,13,14,15,16)
4558 : 0  //  62 : 0
4571 : InChI=1S/C15H23N3OS/c1-5-18(6-2)9-10-19-12-7-8-13-14(11-12)20-15(16-13)17(3)4/h7-8,11H,5-6,9-10H2,1-4H3  //  199 : InChI=1S/C15H23N3OS/c1-5-18(6-2)9-10-19-12-7-8-13-14(11-12)20-15(16-13)17(3)4/h7-8,11H,5-6,9-10H2,1-4H3
4571 : 0  //  199 : 0
4573 : InChI=1S/C7H9N/c8-6-7-4-2-1-3-5-7/h1-5H,6,8H2  //  448 : InChI=1S/C7H9N/c8-6-7-4-2-1-3-5-7/h1-5H,6,8H2
4573 : 0  //  448 : 0
4588 : InChI=1S/C17H12Br2O3/c1-2-13-15(10-5-3-4-6-14(10)22-13)16(20)9-7-11(18)17(21)12(19)8-9/h3-8,21H,2H2,1H3  //  232 : InChI=1S/C17H12Br2O3/c1-2-13-15(10-5-3-4-6-14(

4938 : InChI=1S/C19H22FN3O/c20-17-8-6-16(7-9-17)18(24)4-3-11-22-12-14-23(15-13-22)19-5-1-2-10-21-19/h1-2,5-10H,3-4,11-15H2  //  289 : InChI=1S/C19H22FN3O/c20-17-8-6-16(7-9-17)18(24)4-3-11-22-12-14-23(15-13-22)19-5-1-2-10-21-19/h1-2,5-10H,3-4,11-15H2
4938 : 0  //  289 : 0
4947 : InChI=1S/C19H22FN3O4/c1-10-8-22(6-5-21-10)16-14(20)7-12-15(18(16)27-2)23(11-3-4-11)9-13(17(12)24)19(25)26/h7,9-11,21H,3-6,8H2,1-2H3,(H,25,26)  //  290 : InChI=1S/C19H22FN3O4/c1-10-8-22(6-5-21-10)16-14(20)7-12-15(18(16)27-2)23(11-3-4-11)9-13(17(12)24)19(25)26/h7,9-11,21H,3-6,8H2,1-2H3,(H,25,26)
4947 : 0  //  290 : 0
4950 : InChI=1S/C8H15N7O2S3/c9-6(15-20(12,16)17)1-2-18-3-5-4-19-8(13-5)14-7(10)11/h4H,1-3H2,(H2,9,15)(H2,12,16,17)(H4,10,11,13,14)  //  457 : InChI=1S/C8H15N7O2S3/c9-6(15-20(12,16)17)1-2-18-3-5-4-19-8(13-5)14-7(10)11/h4H,1-3H2,(H2,9,15)(H2,12,16,17)(H4,10,11,13,14)
4950 : 0  //  457 : 0
4953 : InChI=1S/C15H11I4NO4/c16-8-4-7(5-9(17)13(8)21)24-14-10(18)1-6(2-11(14)19)3-12(20)15(22)23/h1-2,4-5,12,21H,3,2

5525 : InChI=1S/C17H21NO3/c1-3-11-6-5-7-12-13-8-9-21-17(4-2,10-14(19)20)16(13)18-15(11)12/h5-7,18H,3-4,8-10H2,1-2H3,(H,19,20)  //  247 : InChI=1S/C17H21NO3/c1-3-11-6-5-7-12-13-8-9-21-17(4-2,10-14(19)20)16(13)18-15(11)12/h5-7,18H,3-4,8-10H2,1-2H3,(H,19,20)
5525 : 0  //  247 : 0
5788 : InChI=1S/C22H24ClN5O2/c23-15-6-7-20-18(14-15)25-22(30)28(20)16-8-12-26(13-9-16)10-3-11-27-19-5-2-1-4-17(19)24-21(27)29/h1-2,4-7,14,16H,3,8-13H2,(H,24,29)(H,25,30)  //  350 : InChI=1S/C22H24ClN5O2/c23-15-6-7-20-18(14-15)25-22(30)28(20)16-8-12-26(13-9-16)10-3-11-27-19-5-2-1-4-17(19)24-21(27)29/h1-2,4-7,14,16H,3,8-13H2,(H,24,29)(H,25,30)
5788 : 0  //  350 : 0
5803 : InChI=1S/C32H40BrN5O5/c1-16(2)12-24-29(40)37-11-7-10-25(37)32(42)38(24)30(41)31(43-32,17(3)4)35-28(39)18-13-20-19-8-6-9-22-26(19)21(27(33)34-22)14-23(20)36(5)15-18/h6,8-9,13,16-18,23-25,34,42H,7,10-12,14-15H2,1-5H3,(H,35,39)/t18-,23-,24+,25+,31-,32+/m1/s1  //  415 : InChI=1S/C32H40BrN5O5/c1-16(2)12-24-29(40)37-11-7-10-25(37)32(42)38(24)30(41)31(43

In [164]:
len(num),len(counter1),len(counter2)

(424, 11, 11)

In [165]:
d3_ = d3.drop(num)
d3_.reset_index(inplace=True)
d3_.drop("index",axis = 1,inplace = True)
d3_["Toxicity"].value_counts()

Toxicity
0    53
1    19
Name: count, dtype: int64

In [166]:
#Have overlap index, so weird....
#Some conpound's mol will be None when we 
counter1 = set(counter1)
d_total_ = d_total.drop(counter1)
d_total_.reset_index(inplace=True)
d_total_.drop("index",axis = 1,inplace = True)
d_total_["Mol"] = [Chem.MolFromInchi(x) for x in d_total_["Inchi"]]
d_total_["Toxicity"].value_counts()

[12:04:30] Explicit valence for atom # 0 Cl, 5, is greater than permitted
[12:04:30] ERROR: Explicit valence for atom # 0 Cl, 5, is greater than permitted


[12:04:31] Explicit valence for atom # 0 Cl, 3, is greater than permitted
[12:04:31] ERROR: Explicit valence for atom # 0 Cl, 3, is greater than permitted

[12:04:32] Explicit valence for atom # 0 Cl, 5, is greater than permitted
[12:04:32] ERROR: Explicit valence for atom # 0 Cl, 5, is greater than permitted




Toxicity
0    5788
1    1495
Name: count, dtype: int64

In [167]:
d_total_ = pd.concat([d_total_,d3_],axis = 0)
d_total_.reset_index(inplace=True)
d_total_.drop("index",axis = 1,inplace = True)
d_total_["Toxicity"].value_counts()

Toxicity
0    5841
1    1514
Name: count, dtype: int64

In [168]:
#combine d_total and cp and ge into dtotal(same assay)
#delete the overlap part
num = []
counter1 = []
counter2 = []
for i in range(len(d_total_)):
    for y in range(len(d5)):
        if(d_total_["Inchi"][i] == d5["Inchi"][y]):
            num.append(y)
            print(i,":",d_total_["Inchi"][i]," // ",y,":",d5["Inchi"][y])
            print(i,":",d_total_["Toxicity"][i]," // ",y,":",d5["Toxicity"][y])
            if d_total_["Toxicity"][i] != d5["Toxicity"][y]:
                counter1.append(i)
                counter2.append(y)

4 : InChI=1S/C16H13ClN2O/c1-19-14-8-7-12(17)9-13(14)16(18-10-15(19)20)11-5-3-2-4-6-11/h2-9H,10H2,1H3  //  282 : InChI=1S/C16H13ClN2O/c1-19-14-8-7-12(17)9-13(14)16(18-10-15(19)20)11-5-3-2-4-6-11/h2-9H,10H2,1H3
4 : 0  //  282 : 0
15 : InChI=1S/C13H6Cl6O2/c14-6-2-8(16)12(20)4(10(6)18)1-5-11(19)7(15)3-9(17)13(5)21/h2-3,20-21H,1H2  //  175 : InChI=1S/C13H6Cl6O2/c14-6-2-8(16)12(20)4(10(6)18)1-5-11(19)7(15)3-9(17)13(5)21/h2-3,20-21H,1H2
15 : 1  //  175 : 1
15 : InChI=1S/C13H6Cl6O2/c14-6-2-8(16)12(20)4(10(6)18)1-5-11(19)7(15)3-9(17)13(5)21/h2-3,20-21H,1H2  //  176 : InChI=1S/C13H6Cl6O2/c14-6-2-8(16)12(20)4(10(6)18)1-5-11(19)7(15)3-9(17)13(5)21/h2-3,20-21H,1H2
15 : 1  //  176 : 1
32 : InChI=1S/C19H26O2/c1-18-9-7-13(20)11-12(18)3-4-14-15-5-6-17(21)19(15,2)10-8-16(14)18/h11,14-16H,3-10H2,1-2H3/t14-,15-,16-,18-,19-/m0/s1  //  420 : InChI=1S/C19H26O2/c1-18-9-7-13(20)11-12(18)3-4-14-15-5-6-17(21)19(15,2)10-8-16(14)18/h11,14-16H,3-10H2,1-2H3/t14-,15-,16-,18-,19-/m0/s1
32 : 0  //  420 : 0
34 : InChI=1

302 : InChI=1S/C46H65N13O12S2/c47-17-5-4-9-29(40(65)52-22-38(51)63)54-45(70)35-10-6-18-59(35)46(71)34-24-73-72-23-28(48)39(64)55-31(20-26-11-13-27(60)14-12-26)43(68)56-32(19-25-7-2-1-3-8-25)42(67)53-30(15-16-36(49)61)41(66)57-33(21-37(50)62)44(69)58-34/h1-3,7-8,11-14,28-35,60H,4-6,9-10,15-24,47-48H2,(H2,49,61)(H2,50,62)(H2,51,63)(H,52,65)(H,53,67)(H,54,70)(H,55,64)(H,56,68)(H,57,66)(H,58,69)/t28-,29-,30-,31-,32-,33-,34-,35-/m0/s1  //  625 : InChI=1S/C46H65N13O12S2/c47-17-5-4-9-29(40(65)52-22-38(51)63)54-45(70)35-10-6-18-59(35)46(71)34-24-73-72-23-28(48)39(64)55-31(20-26-11-13-27(60)14-12-26)43(68)56-32(19-25-7-2-1-3-8-25)42(67)53-30(15-16-36(49)61)41(66)57-33(21-37(50)62)44(69)58-34/h1-3,7-8,11-14,28-35,60H,4-6,9-10,15-24,47-48H2,(H2,49,61)(H2,50,62)(H2,51,63)(H,52,65)(H,53,67)(H,54,70)(H,55,64)(H,56,68)(H,57,66)(H,58,69)/t28-,29-,30-,31-,32-,33-,34-,35-/m0/s1
302 : 0  //  625 : 0
308 : InChI=1S/C12H16O7/c13-5-8-9(15)10(16)11(17)12(19-8)18-7-3-1-6(14)2-4-7/h1-4,8-17H,5H2/t8-,9-,10+,11-

579 : InChI=1S/C6H14O6S2/c1-13(7,8)11-5-3-4-6-12-14(2,9)10/h3-6H2,1-2H3  //  652 : InChI=1S/C6H14O6S2/c1-13(7,8)11-5-3-4-6-12-14(2,9)10/h3-6H2,1-2H3
579 : 0  //  652 : 0
581 : InChI=1S/C10H13NO2/c1-3-13-10-6-4-9(5-7-10)11-8(2)12/h4-7H,3H2,1-2H3,(H,11,12)  //  26 : InChI=1S/C10H13NO2/c1-3-13-10-6-4-9(5-7-10)11-8(2)12/h4-7H,3H2,1-2H3,(H,11,12)
581 : 0  //  26 : 0
582 : InChI=1S/C12H19N3O/c1-9(2)15-12(16)11-6-4-10(5-7-11)8-14-13-3/h4-7,9,13-14H,8H2,1-3H3,(H,15,16)  //  122 : InChI=1S/C12H19N3O/c1-9(2)15-12(16)11-6-4-10(5-7-11)8-14-13-3/h4-7,9,13-14H,8H2,1-3H3,(H,15,16)
582 : 0  //  122 : 0
588 : InChI=1S/C10H16N2O/c1-2-7(1)9(8-3-4-8)12-10-11-5-6-13-10/h7-9H,1-6H2,(H,11,12)  //  40 : InChI=1S/C10H16N2O/c1-2-7(1)9(8-3-4-8)12-10-11-5-6-13-10/h7-9H,1-6H2,(H,11,12)
588 : 0  //  40 : 0
591 : InChI=1S/C15H26O/c1-13(2)7-5-8-14(3)9-6-10-15(4)11-12-16/h7,9,11,16H,5-6,8,10,12H2,1-4H3/b14-9+,15-11+  //  279 : InChI=1S/C15H26O/c1-13(2)7-5-8-14(3)9-6-10-15(4)11-12-16/h7,9,11,16H,5-6,8,10,12H2,1-4H3/b14

837 : InChI=1S/C27H32N4O7S/c1-18(2)20-14-19(36-3)15-22-25(20)27(33)31(39(22,34)35)17-38-23-16-24(32)30-11-7-8-21(26(30)28-23)37-13-12-29-9-5-4-6-10-29/h7-8,11,14-16,18H,4-6,9-10,12-13,17H2,1-3H3  //  577 : InChI=1S/C27H32N4O7S/c1-18(2)20-14-19(36-3)15-22-25(20)27(33)31(39(22,34)35)17-38-23-16-24(32)30-11-7-8-21(26(30)28-23)37-13-12-29-9-5-4-6-10-29/h7-8,11,14-16,18H,4-6,9-10,12-13,17H2,1-3H3
837 : 0  //  577 : 0
847 : InChI=1S/C18H30O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18(19)20/h3-4,6-7,9-10H,2,5,8,11-17H2,1H3,(H,19,20)/b4-3-,7-6-,10-9-  //  387 : InChI=1S/C18H30O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18(19)20/h3-4,6-7,9-10H,2,5,8,11-17H2,1H3,(H,19,20)/b4-3-,7-6-,10-9-
847 : 0  //  387 : 0
848 : InChI=1S/C21H45N3/c1-6-10-12-19(8-3)14-23-16-21(5,22)17-24(18-23)15-20(9-4)13-11-7-2/h19-20H,6-18,22H2,1-5H3  //  491 : InChI=1S/C21H45N3/c1-6-10-12-19(8-3)14-23-16-21(5,22)17-24(18-23)15-20(9-4)13-11-7-2/h19-20H,6-18,22H2,1-5H3
848 : 0  //  491 : 0
851 : InChI=1S/C12H15NO3/c1-12

1024 : InChI=1S/C12H11N7/c13-9-7(6-4-2-1-3-5-6)16-8-10(14)18-12(15)19-11(8)17-9/h1-5H,(H6,13,14,15,17,18,19)  //  91 : InChI=1S/C12H11N7/c13-9-7(6-4-2-1-3-5-6)16-8-10(14)18-12(15)19-11(8)17-9/h1-5H,(H6,13,14,15,17,18,19)
1024 : 0  //  91 : 0
1026 : InChI=1S/C6H12N3PS/c11-10(7-1-2-7,8-3-4-8)9-5-6-9/h1-6H2  //  649 : InChI=1S/C6H12N3PS/c11-10(7-1-2-7,8-3-4-8)9-5-6-9/h1-6H2
1026 : 0  //  649 : 0
1051 : InChI=1S/C22H32O3/c1-13-11-16-17(20(3)8-5-15(24)12-19(13)20)6-9-21(4)18(16)7-10-22(21,25)14(2)23/h12-13,16-18,25H,5-11H2,1-4H3/t13-,16+,17-,18-,20+,21-,22-/m0/s1  //  521 : InChI=1S/C22H32O3/c1-13-11-16-17(20(3)8-5-15(24)12-19(13)20)6-9-21(4)18(16)7-10-22(21,25)14(2)23/h12-13,16-18,25H,5-11H2,1-4H3/t13-,16+,17-,18-,20+,21-,22-/m0/s1
1051 : 0  //  521 : 0
1057 : InChI=1S/C19H24N2O2/c22-18-13-20(19(23)15-7-2-1-3-8-15)12-17-16-9-5-4-6-14(16)10-11-21(17)18/h4-6,9,15,17H,1-3,7-8,10-13H2  //  417 : InChI=1S/C19H24N2O2/c22-18-13-20(19(23)15-7-2-1-3-8-15)12-17-16-9-5-4-6-14(16)10-11-21(17)18/h4-6,9

1366 : InChI=1S/C15H10O/c16-15-13(11-7-3-1-4-8-11)14(15)12-9-5-2-6-10-12/h1-10H  //  232 : InChI=1S/C15H10O/c16-15-13(11-7-3-1-4-8-11)14(15)12-9-5-2-6-10-12/h1-10H
1366 : 1  //  232 : 1
1369 : InChI=1S/C12H21N3O5S3/c1-3-14-10-8-15(5-4-6-20-2)23(18,19)12-9(10)7-11(21-12)22(13,16)17/h7,10,14H,3-6,8H2,1-2H3,(H2,13,16,17)/t10-/m0/s1  //  125 : InChI=1S/C12H21N3O5S3/c1-3-14-10-8-15(5-4-6-20-2)23(18,19)12-9(10)7-11(21-12)22(13,16)17/h7,10,14H,3-6,8H2,1-2H3,(H2,13,16,17)/t10-/m0/s1
1369 : 0  //  125 : 0
1382 : InChI=1S/C15H14F3N3O4S2/c16-15(17,18)10-7-11-13(8-12(10)26(19,22)23)27(24,25)21-14(20-11)6-9-4-2-1-3-5-9/h1-5,7-8,14,20-21H,6H2,(H2,19,22,23)  //  256 : InChI=1S/C15H14F3N3O4S2/c16-15(17,18)10-7-11-13(8-12(10)26(19,22)23)27(24,25)21-14(20-11)6-9-4-2-1-3-5-9/h1-5,7-8,14,20-21H,6H2,(H2,19,22,23)
1382 : 0  //  256 : 0
1388 : InChI=1S/CHCl3/c2-1(3)4/h1H  //  765 : InChI=1S/CHCl3/c2-1(3)4/h1H
1388 : 0  //  765 : 0
1405 : InChI=1S/C19H20N2O3/c1-2-3-9-17-18(23)20(14-7-5-4-6-8-14)21(19(17)24)15

1694 : InChI=1S/C22H28O3/c1-4-22(25-14(2)23)12-10-20-19-7-5-15-13-16(24)6-8-17(15)18(19)9-11-21(20,22)3/h1,13,17-20H,5-12H2,2-3H3/t17-,18+,19+,20-,21-,22-/m0/s1  //  513 : InChI=1S/C22H28O3/c1-4-22(25-14(2)23)12-10-20-19-7-5-15-13-16(24)6-8-17(15)18(19)9-11-21(20,22)3/h1,13,17-20H,5-12H2,2-3H3/t17-,18+,19+,20-,21-,22-/m0/s1
1694 : 0  //  513 : 0
1697 : InChI=1S/C21H26O2/c1-4-21(22)12-10-19-18-7-5-14-13-15(23-3)6-8-16(14)17(18)9-11-20(19,21)2/h1,6,8,13,17-19,22H,5,7,9-12H2,2-3H3/t17-,18-,19+,20+,21+/m1/s1  //  470 : InChI=1S/C21H26O2/c1-4-21(22)12-10-19-18-7-5-14-13-15(23-3)6-8-16(14)17(18)9-11-20(19,21)2/h1,6,8,13,17-19,22H,5,7,9-12H2,2-3H3/t17-,18-,19+,20+,21+/m1/s1
1697 : 0  //  470 : 0
1700 : InChI=1S/C12H15NO3/c1-8-3-9(2)5-10(4-8)15-7-11-6-13-12(14)16-11/h3-5,11H,6-7H2,1-2H3,(H,13,14)  //  110 : InChI=1S/C12H15NO3/c1-8-3-9(2)5-10(4-8)15-7-11-6-13-12(14)16-11/h3-5,11H,6-7H2,1-2H3,(H,13,14)
1700 : 0  //  110 : 0
1701 : InChI=1S/C11H21N/c1-10(2)8-5-6-9(7-8)11(10,3)12-4/h8-9,12H,5-7H2,

1995 : InChI=1S/C17H26N4O3S2/c1-12(16(5-6-22)26-25-10-15-4-3-7-24-15)21(11-23)9-14-8-19-13(2)20-17(14)18/h8,11,15,22H,3-7,9-10H2,1-2H3,(H2,18,19,20)/b16-12+  //  352 : InChI=1S/C17H26N4O3S2/c1-12(16(5-6-22)26-25-10-15-4-3-7-24-15)21(11-23)9-14-8-19-13(2)20-17(14)18/h8,11,15,22H,3-7,9-10H2,1-2H3,(H2,18,19,20)/b16-12+
1995 : 0  //  352 : 0
2009 : InChI=1S/C27H25F2N3OS/c1-18-24(26(33)32-16-17-34-27(32)30-18)12-15-31-13-10-21(11-14-31)25(19-2-6-22(28)7-3-19)20-4-8-23(29)9-5-20/h2-9,16-17H,10-15H2,1H3  //  572 : InChI=1S/C27H25F2N3OS/c1-18-24(26(33)32-16-17-34-27(32)30-18)12-15-31-13-10-21(11-14-31)25(19-2-6-22(28)7-3-19)20-4-8-23(29)9-5-20/h2-9,16-17H,10-15H2,1H3
2009 : 0  //  572 : 0
2016 : InChI=1S/C22H25N3O3/c26-21-22(25(16-23-21)17-6-2-1-3-7-17)10-12-24(13-11-22)14-18-15-27-19-8-4-5-9-20(19)28-18/h1-9,18H,10-16H2,(H,23,26)  //  503 : InChI=1S/C22H25N3O3/c26-21-22(25(16-23-21)17-6-2-1-3-7-17)10-12-24(13-11-22)14-18-15-27-19-8-4-5-9-20(19)28-18/h1-9,18H,10-16H2,(H,23,26)
2016 : 0  //  50

2377 : InChI=1S/C7H6O5/c8-4-1-3(7(11)12)2-5(9)6(4)10/h1-2,8-10H,(H,11,12)  //  688 : InChI=1S/C7H6O5/c8-4-1-3(7(11)12)2-5(9)6(4)10/h1-2,8-10H,(H,11,12)
2377 : 0  //  688 : 0
2390 : InChI=1S/C14H10F3NO2/c15-14(16,17)9-4-3-5-10(8-9)18-12-7-2-1-6-11(12)13(19)20/h1-8,18H,(H,19,20)  //  183 : InChI=1S/C14H10F3NO2/c15-14(16,17)9-4-3-5-10(8-9)18-12-7-2-1-6-11(12)13(19)20/h1-8,18H,(H,19,20)
2390 : 1  //  183 : 1
2421 : InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t9-,10-,11+,14-/m1/s1  //  305 : InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t9-,10-,11+,14-/m1/s1
2421 : 0  //  305 : 0
2427 : InChI=1S/C41H64O14/c1-19-36(47)28(42)15-34(50-19)54-38-21(3)52-35(17-30(38)44)55-37-20(2)51-33(16-29(37)43)53-24-8-10-39(4)23(13-24)6-7-26-27(39)14-31(45)40(5)25(9-11-41(26,40)48)22-12-32(46)49-18-22/h12,19-21,23-31

2649 : InChI=1S/C22H25NO3/c1-21(2)11-12-22(3,4)18-13-16(9-10-17(18)21)23-19(24)14-5-7-15(8-6-14)20(25)26/h5-10,13H,11-12H2,1-4H3,(H,23,24)(H,25,26)  //  505 : InChI=1S/C22H25NO3/c1-21(2)11-12-22(3,4)18-13-16(9-10-17(18)21)23-19(24)14-5-7-15(8-6-14)20(25)26/h5-10,13H,11-12H2,1-4H3,(H,23,24)(H,25,26)
2649 : 1  //  505 : 1
2652 : InChI=1S/C31H37N5O3/c1-22-9-11-23(12-10-22)36-29(21-28(34-36)31(2,3)4)33-30(37)32-26-13-14-27(25-8-6-5-7-24(25)26)39-20-17-35-15-18-38-19-16-35/h5-14,21H,15-20H2,1-4H3,(H2,32,33,37)  //  605 : InChI=1S/C31H37N5O3/c1-22-9-11-23(12-10-22)36-29(21-28(34-36)31(2,3)4)33-30(37)32-26-13-14-27(25-8-6-5-7-24(25)26)39-20-17-35-15-18-38-19-16-35/h5-14,21H,15-20H2,1-4H3,(H2,32,33,37)
2652 : 1  //  605 : 1
2660 : InChI=1S/C6H7NO/c8-5-6-2-1-3-7-4-6/h1-4,8H,5H2  //  666 : InChI=1S/C6H7NO/c8-5-6-2-1-3-7-4-6/h1-4,8H,5H2
2660 : 0  //  666 : 0
2662 : InChI=1S/C21H21ClN4OS/c22-17-13-18-15(12-20(27)23-18)11-14(17)5-6-25-7-9-26(10-8-25)21-16-3-1-2-4-19(16)28-24-21/h1-4,11,13H,5-10,12H

2948 : InChI=1S/C15H14FN3O3/c1-3-22-15(21)13-12-7-18(2)14(20)10-6-9(16)4-5-11(10)19(12)8-17-13/h4-6,8H,3,7H2,1-2H3  //  257 : InChI=1S/C15H14FN3O3/c1-3-22-15(21)13-12-7-18(2)14(20)10-6-9(16)4-5-11(10)19(12)8-17-13/h4-6,8H,3,7H2,1-2H3
2948 : 0  //  257 : 0
2950 : InChI=1S/C5H4N4O/c10-5-3-1-8-9-4(3)6-2-7-5/h1-2H,(H2,6,7,8,9,10)  //  640 : InChI=1S/C5H4N4O/c10-5-3-1-8-9-4(3)6-2-7-5/h1-2H,(H2,6,7,8,9,10)
2950 : 0  //  640 : 0
2951 : InChI=1S/C11H20O/c1-2-3-4-5-6-7-8-9-10-11-12/h2,11H,1,3-10H2  //  86 : InChI=1S/C11H20O/c1-2-3-4-5-6-7-8-9-10-11-12/h2,11H,1,3-10H2
2951 : 0  //  86 : 0
2953 : InChI=1S/C18H15NO3/c20-16(21)12-11-15-19-17(13-7-3-1-4-8-13)18(22-15)14-9-5-2-6-10-14/h1-10H,11-12H2,(H,20,21)  //  358 : InChI=1S/C18H15NO3/c20-16(21)12-11-15-19-17(13-7-3-1-4-8-13)18(22-15)14-9-5-2-6-10-14/h1-10H,11-12H2,(H,20,21)
2953 : 0  //  358 : 0
2956 : InChI=1S/C12H24N2O4/c1-5-6-12(4,7-17-10(13)15)8-18-11(16)14-9(2)3/h9H,5-8H2,1-4H3,(H2,13,15)(H,14,16)  //  128 : InChI=1S/C12H24N2O4/c1-5-6-12(4,

3349 : InChI=1S/C27H41NO2/c1-15-11-24-25(28-14-15)17(3)27(30-24)10-8-20-21-6-5-18-12-19(29)7-9-26(18,4)23(21)13-22(20)16(27)2/h5,15,17,19-21,23-25,28-29H,6-14H2,1-4H3/t15-,17+,19-,20-,21-,23-,24+,25-,26-,27-/m0/s1  //  581 : InChI=1S/C27H41NO2/c1-15-11-24-25(28-14-15)17(3)27(30-24)10-8-20-21-6-5-18-12-19(29)7-9-26(18,4)23(21)13-22(20)16(27)2/h5,15,17,19-21,23-25,28-29H,6-14H2,1-4H3/t15-,17+,19-,20-,21-,23-,24+,25-,26-,27-/m0/s1
3349 : 0  //  581 : 0
3354 : InChI=1S/C30H30O8/c1-11(2)19-15-7-13(5)21(27(35)23(15)17(9-31)25(33)29(19)37)22-14(6)8-16-20(12(3)4)30(38)26(34)18(10-32)24(16)28(22)36/h7-12,33-38H,1-6H3  //  601 : InChI=1S/C30H30O8/c1-11(2)19-15-7-13(5)21(27(35)23(15)17(9-31)25(33)29(19)37)22-14(6)8-16-20(12(3)4)30(38)26(34)18(10-32)24(16)28(22)36/h7-12,33-38H,1-6H3
3354 : 1  //  601 : 1
3355 : InChI=1S/C14H8O4/c15-9-5-1-3-7-11(9)14(18)12-8(13(7)17)4-2-6-10(12)16/h1-6,15-16H  //  225 : InChI=1S/C14H8O4/c15-9-5-1-3-7-11(9)14(18)12-8(13(7)17)4-2-6-10(12)16/h1-6,15-16H
3355 : 1  //  

3660 : InChI=1S/C19H21N5O2/c1-22-9-11-23(12-10-22)13-17(25)24-16-7-3-2-5-14(16)19(26)21-15-6-4-8-20-18(15)24/h2-8H,9-13H2,1H3,(H,21,26)  //  405 : InChI=1S/C19H21N5O2/c1-22-9-11-23(12-10-22)13-17(25)24-16-7-3-2-5-14(16)19(26)21-15-6-4-8-20-18(15)24/h2-8H,9-13H2,1H3,(H,21,26)
3660 : 0  //  405 : 0
3669 : InChI=1S/C10H7F3O4/c1-5(14)17-8-4-6(10(11,12)13)2-3-7(8)9(15)16/h2-4H,1H3,(H,15,16)  //  50 : InChI=1S/C10H7F3O4/c1-5(14)17-8-4-6(10(11,12)13)2-3-7(8)9(15)16/h2-4H,1H3,(H,15,16)
3669 : 0  //  50 : 0
3678 : InChI=1S/C13H16N2O2/c1-2-13(8-7-11(16)15-12(13)17)9-3-5-10(14)6-4-9/h3-6H,2,7-8,14H2,1H3,(H,15,16,17)  //  158 : InChI=1S/C13H16N2O2/c1-2-13(8-7-11(16)15-12(13)17)9-3-5-10(14)6-4-9/h3-6H,2,7-8,14H2,1H3,(H,15,16,17)
3678 : 0  //  158 : 0
3682 : InChI=1S/C17H19N3/c1-19-9-10-20-16(12-19)15-7-3-2-5-13(15)11-14-6-4-8-18-17(14)20/h2-8,16H,9-12H2,1H3  //  338 : InChI=1S/C17H19N3/c1-19-9-10-20-16(12-19)15-7-3-2-5-13(15)11-14-6-4-8-18-17(14)20/h2-8,16H,9-12H2,1H3
3682 : 0  //  338 : 0
3697 : I

4049 : InChI=1S/C19H12O8/c1-8(20)26-13-5-3-4-11-15(13)18(23)16-12(17(11)22)6-10(19(24)25)7-14(16)27-9(2)21/h3-7H,1-2H3,(H,24,25)  //  392 : InChI=1S/C19H12O8/c1-8(20)26-13-5-3-4-11-15(13)18(23)16-12(17(11)22)6-10(19(24)25)7-14(16)27-9(2)21/h3-7H,1-2H3,(H,24,25)
4049 : 1  //  392 : 1
4052 : InChI=1S/C24H21I6N5O8/c1-7(37)35(3)20-17(29)10(21(39)31-2)13(25)11(18(20)30)23(41)33-6-8(38)34-19-15(27)9(22(40)32-4-5-36)14(26)12(16(19)28)24(42)43/h36H,4-6H2,1-3H3,(H,31,39)(H,32,40)(H,33,41)(H,34,38)(H,42,43)  //  541 : InChI=1S/C24H21I6N5O8/c1-7(37)35(3)20-17(29)10(21(39)31-2)13(25)11(18(20)30)23(41)33-6-8(38)34-19-15(27)9(22(40)32-4-5-36)14(26)12(16(19)28)24(42)43/h36H,4-6H2,1-3H3,(H,31,39)(H,32,40)(H,33,41)(H,34,38)(H,42,43)
4052 : 0  //  541 : 0
4054 : InChI=1S/C15H10O5/c16-9-3-1-8(2-4-9)11-7-20-13-6-10(17)5-12(18)14(13)15(11)19/h1-7,16-18H  //  235 : InChI=1S/C15H10O5/c16-9-3-1-8(2-4-9)11-7-20-13-6-10(17)5-12(18)14(13)15(11)19/h1-7,16-18H
4054 : 1  //  235 : 1
4054 : InChI=1S/C15H10O5/c16-9-3

4365 : InChI=1S/C20H24N2O6/c1-11(2)10-28-20(24)17-13(4)21-12(3)16(19(23)27-5)18(17)14-8-6-7-9-15(14)22(25)26/h6-9,11,18,21H,10H2,1-5H3  //  436 : InChI=1S/C20H24N2O6/c1-11(2)10-28-20(24)17-13(4)21-12(3)16(19(23)27-5)18(17)14-8-6-7-9-15(14)22(25)26/h6-9,11,18,21H,10H2,1-5H3
4365 : 1  //  436 : 1
4366 : InChI=1S/C6H12N4/c1-7-2-9-4-8(1)5-10(3-7)6-9/h1-6H2  //  650 : InChI=1S/C6H12N4/c1-7-2-9-4-8(1)5-10(3-7)6-9/h1-6H2
4366 : 0  //  650 : 0
4379 : InChI=1S/C11H12N4O3S/c1-18-11-7-6-10(13-14-11)15-19(16,17)9-4-2-8(12)3-5-9/h2-7H,12H2,1H3,(H,13,15)  //  68 : InChI=1S/C11H12N4O3S/c1-18-11-7-6-10(13-14-11)15-19(16,17)9-4-2-8(12)3-5-9/h2-7H,12H2,1H3,(H,13,15)
4379 : 0  //  68 : 0
4387 : InChI=1S/C13H22N4O3S/c1-14-13(9-17(18)19)15-6-7-21-10-12-5-4-11(20-12)8-16(2)3/h4-5,9,14-15H,6-8,10H2,1-3H3/b13-9+  //  173 : InChI=1S/C13H22N4O3S/c1-14-13(9-17(18)19)15-6-7-21-10-12-5-4-11(20-12)8-16(2)3/h4-5,9,14-15H,6-8,10H2,1-3H3/b13-9+
4387 : 0  //  173 : 0
4420 : InChI=1S/C11H17ClO7P2/c1-15-20(13,16-2)11(19-

4644 : InChI=1S/C9H12ClN5O/c1-5-13-7(10)6(8(14-5)16-2)15-9-11-3-4-12-9/h3-4H2,1-2H3,(H2,11,12,15)  //  736 : InChI=1S/C9H12ClN5O/c1-5-13-7(10)6(8(14-5)16-2)15-9-11-3-4-12-9/h3-4H2,1-2H3,(H2,11,12,15)
4644 : 0  //  736 : 0
4678 : InChI=1S/C9H11NO4/c10-6(9(13)14)3-5-1-2-7(11)8(12)4-5/h1-2,4,6,11-12H,3,10H2,(H,13,14)/t6-/m0/s1  //  735 : InChI=1S/C9H11NO4/c10-6(9(13)14)3-5-1-2-7(11)8(12)4-5/h1-2,4,6,11-12H,3,10H2,(H,13,14)/t6-/m0/s1
4678 : 0  //  735 : 0
4687 : InChI=1S/C17H20ClN3O3/c1-20-14-7-11(18)6-12(16(14)24-9-15(20)22)17(23)19-13-8-21-4-2-10(13)3-5-21/h6-7,10,13H,2-5,8-9H2,1H3,(H,19,23)  //  341 : InChI=1S/C17H20ClN3O3/c1-20-14-7-11(18)6-12(16(14)24-9-15(20)22)17(23)19-13-8-21-4-2-10(13)3-5-21/h6-7,10,13H,2-5,8-9H2,1H3,(H,19,23)
4687 : 0  //  341 : 0
4708 : InChI=1S/C23H29F3O6/c24-23(25,26)15-6-5-7-17(12-15)32-14-16(27)10-11-19-18(20(28)13-21(19)29)8-3-1-2-4-9-22(30)31/h1,3,5-7,10-12,16,18-21,27-29H,2,4,8-9,13-14H2,(H,30,31)/b3-1-,11-10+/t16-,18-,19-,20+,21-/m1/s1  //  531 : InChI=1

4997 : InChI=1S/C13H16N2O/c1-2-4-12(11(3-1)10-5-6-10)16-9-13-14-7-8-15-13/h1-4,10H,5-9H2,(H,14,15)  //  157 : InChI=1S/C13H16N2O/c1-2-4-12(11(3-1)10-5-6-10)16-9-13-14-7-8-15-13/h1-4,10H,5-9H2,(H,14,15)
4997 : 0  //  157 : 0
5008 : InChI=1S/C10H16N2O3S/c13-8(14)4-2-1-3-7-9-6(5-16-7)11-10(15)12-9/h6-7,9H,1-5H2,(H,13,14)(H2,11,12,15)/t6-,7-,9-/m0/s1  //  41 : InChI=1S/C10H16N2O3S/c13-8(14)4-2-1-3-7-9-6(5-16-7)11-10(15)12-9/h6-7,9H,1-5H2,(H,13,14)(H2,11,12,15)/t6-,7-,9-/m0/s1
5008 : 0  //  41 : 0
5024 : InChI=1S/C12H9N3O5/c16-9-3-1-8(2-4-9)12(17)14-13-7-10-5-6-11(20-10)15(18)19/h1-7,16H,(H,14,17)  //  137 : InChI=1S/C12H9N3O5/c16-9-3-1-8(2-4-9)12(17)14-13-7-10-5-6-11(20-10)15(18)19/h1-7,16H,(H,14,17)
5024 : 1  //  137 : 1
5030 : InChI=1S/C10H16N8S2/c1-14-9(16-6-11)15-2-3-19-4-7-5-20-10(17-7)18-8(12)13/h5H,2-4H2,1H3,(H2,14,15,16)(H4,12,13,17,18)  //  43 : InChI=1S/C10H16N8S2/c1-14-9(16-6-11)15-2-3-19-4-7-5-20-10(17-7)18-8(12)13/h5H,2-4H2,1H3,(H2,14,15,16)(H4,12,13,17,18)
5030 : 0  //  43 : 

5305 : InChI=1S/C7H8N4O2/c1-10-5-4(8-3-9-5)6(12)11(2)7(10)13/h3H,1-2H3,(H,8,9)  //  693 : InChI=1S/C7H8N4O2/c1-10-5-4(8-3-9-5)6(12)11(2)7(10)13/h3H,1-2H3,(H,8,9)
5305 : 0  //  693 : 0
5330 : InChI=1S/C21H27N5O4S/c1-15-13-24-19(14-23-15)20(27)22-12-11-16-7-9-18(10-8-16)31(29,30)26-21(28)25-17-5-3-2-4-6-17/h7-10,13-14,17H,2-6,11-12H2,1H3,(H,22,27)(H2,25,26,28)  //  474 : InChI=1S/C21H27N5O4S/c1-15-13-24-19(14-23-15)20(27)22-12-11-16-7-9-18(10-8-16)31(29,30)26-21(28)25-17-5-3-2-4-6-17/h7-10,13-14,17H,2-6,11-12H2,1H3,(H,22,27)(H2,25,26,28)
5330 : 0  //  474 : 0
5333 : InChI=1S/C14H18N2O/c1-9(2)13-12(14(17)10(3)4)11-7-5-6-8-16(11)15-13/h5-10H,1-4H3  //  207 : InChI=1S/C14H18N2O/c1-9(2)13-12(14(17)10(3)4)11-7-5-6-8-16(11)15-13/h5-10H,1-4H3
5333 : 0  //  207 : 0
5350 : InChI=1S/C18H21NO/c20-18(15-7-3-1-4-8-15,16-9-5-2-6-10-16)17-11-13-19-14-12-17/h1-10,17,19-20H,11-14H2  //  369 : InChI=1S/C18H21NO/c20-18(15-7-3-1-4-8-15,16-9-5-2-6-10-16)17-11-13-19-14-12-17/h1-10,17,19-20H,11-14H2
5350 : 0  

5950 : InChI=1S/C12H9N3O/c1-8-11(9-2-4-14-5-3-9)6-10(7-13)12(16)15-8/h2-6H,1H3,(H,15,16)  //  136 : InChI=1S/C12H9N3O/c1-8-11(9-2-4-14-5-3-9)6-10(7-13)12(16)15-8/h2-6H,1H3,(H,15,16)
5950 : 0  //  136 : 0
5963 : InChI=1S/C10H15N5O3/c11-10-13-8-7(9(18)14-10)12-5-15(8)2-1-6(3-16)4-17/h5-6,16-17H,1-4H2,(H3,11,13,14,18)  //  39 : InChI=1S/C10H15N5O3/c11-10-13-8-7(9(18)14-10)12-5-15(8)2-1-6(3-16)4-17/h5-6,16-17H,1-4H2,(H3,11,13,14,18)
5963 : 0  //  39 : 0
5982 : InChI=1S/C26H37N5O2/c1-5-11-30-17-19(25(32)31(26(33)27-6-2)13-8-12-29(3)4)14-21-20-9-7-10-22-24(20)18(16-28-22)15-23(21)30/h5,7,9-10,16,19,21,23,28H,1,6,8,11-15,17H2,2-4H3,(H,27,33)/t19-,21-,23-/m1/s1  //  570 : InChI=1S/C26H37N5O2/c1-5-11-30-17-19(25(32)31(26(33)27-6-2)13-8-12-29(3)4)14-21-20-9-7-10-22-24(20)18(16-28-22)15-23(21)30/h5,7,9-10,16,19,21,23,28H,1,6,8,11-15,17H2,2-4H3,(H,27,33)/t19-,21-,23-/m1/s1
5982 : 0  //  570 : 0
6113 : InChI=1S/C14H20N2O2/c1-10(2)16-8-11(17)9-18-14-5-3-4-13-12(14)6-7-15-13/h3-7,10-11,15-17H,8-9H2,1

6994 : InChI=1S/C16H8Cl4N2O2/c17-9-5-13(19)15(21-7-9)23-11-1-2-12(4-3-11)24-16-14(20)6-10(18)8-22-16/h1-8H  //  321 : InChI=1S/C16H8Cl4N2O2/c17-9-5-13(19)15(21-7-9)23-11-1-2-12(4-3-11)24-16-14(20)6-10(18)8-22-16/h1-8H
6994 : 1  //  321 : 1
7050 : InChI=1S/C13H17N3O2/c1-3-4-5-9-6-7-10-11(8-9)15-12(14-10)16-13(17)18-2/h6-8H,3-5H2,1-2H3,(H2,14,15,16,17)  //  162 : InChI=1S/C13H17N3O2/c1-3-4-5-9-6-7-10-11(8-9)15-12(14-10)16-13(17)18-2/h6-8H,3-5H2,1-2H3,(H2,14,15,16,17)
7050 : 1  //  162 : 1
7054 : InChI=1S/C24H20N6O3/c1-2-33-24-25-20-9-5-8-19(23(31)32)21(20)30(24)14-15-10-12-16(13-11-15)17-6-3-4-7-18(17)22-26-28-29-27-22/h3-13H,2,14H2,1H3,(H,31,32)(H,26,27,28,29)  //  539 : InChI=1S/C24H20N6O3/c1-2-33-24-25-20-9-5-8-19(23(31)32)21(20)30(24)14-15-10-12-16(13-11-15)17-6-3-4-7-18(17)22-26-28-29-27-22/h3-13H,2,14H2,1H3,(H,31,32)(H,26,27,28,29)
7054 : 1  //  539 : 1
7065 : InChI=1S/C16H13N3O3/c1-22-16(21)19-15-17-12-8-7-11(9-13(12)18-15)14(20)10-5-3-2-4-6-10/h2-9H,1H3,(H2,17,18,19,21)  //  283 

7316 : InChI=1S/C17H11N5/c18-9-13-1-5-15(6-2-13)17(22-12-20-11-21-22)16-7-3-14(10-19)4-8-16/h1-8,11-12,17H  //  322 : InChI=1S/C17H11N5/c18-9-13-1-5-15(6-2-13)17(22-12-20-11-21-22)16-7-3-14(10-19)4-8-16/h1-8,11-12,17H
7316 : 0  //  322 : 0
7317 : InChI=1S/C17H14O4S/c1-22(19,20)14-9-7-12(8-10-14)15-11-21-17(18)16(15)13-5-3-2-4-6-13/h2-10H,11H2,1H3  //  329 : InChI=1S/C17H14O4S/c1-22(19,20)14-9-7-12(8-10-14)15-11-21-17(18)16(15)13-5-3-2-4-6-13/h2-10H,11H2,1H3
7317 : 0  //  329 : 0
7318 : InChI=1S/C18H20FN3O4/c1-10-9-26-17-14-11(16(23)12(18(24)25)8-22(10)14)7-13(19)15(17)21-5-3-20(2)4-6-21/h7-8,10H,3-6,9H2,1-2H3,(H,24,25)  //  364 : InChI=1S/C18H20FN3O4/c1-10-9-26-17-14-11(16(23)12(18(24)25)8-22(10)14)7-13(19)15(17)21-5-3-20(2)4-6-21/h7-8,10H,3-6,9H2,1-2H3,(H,24,25)
7318 : 0  //  364 : 0
7319 : InChI=1S/C18H22O2/c1-18-9-8-14-13-5-3-12(19)10-11(13)2-4-15(14)16(18)6-7-17(18)20/h3,5,10,14-16,19H,2,4,6-9H2,1H3/t14-,15-,16+,18+/m1/s1  //  372 : InChI=1S/C18H22O2/c1-18-9-8-14-13-5-3-12(19)10-11

In [169]:
len(num),len(counter1),len(counter2)

(692, 17, 17)

In [170]:
d5_ = d5.drop(num)
d5_.reset_index(inplace=True)
d5_.drop("index",axis = 1,inplace = True)
d5_["Toxicity"].value_counts()

Toxicity
0    58
1    19
Name: count, dtype: int64

In [171]:
#Have overlap index, so weird....
#Some conpound's mol will be None when we 
counter1 = set(counter1)
d_total__ = d_total_.drop(counter1)
d_total__.reset_index(inplace=True)
d_total__.drop("index",axis = 1,inplace = True)
d_total__["Mol"] = [Chem.MolFromInchi(x) for x in d_total__["Inchi"]]
d_total__["Toxicity"].value_counts()

[12:05:14] Explicit valence for atom # 0 Cl, 5, is greater than permitted
[12:05:14] ERROR: Explicit valence for atom # 0 Cl, 5, is greater than permitted


[12:05:15] Explicit valence for atom # 0 Cl, 3, is greater than permitted
[12:05:15] ERROR: Explicit valence for atom # 0 Cl, 3, is greater than permitted

[12:05:15] Explicit valence for atom # 0 Cl, 5, is greater than permitted
[12:05:15] ERROR: Explicit valence for atom # 0 Cl, 5, is greater than permitted




Toxicity
0    5835
1    1503
Name: count, dtype: int64

In [172]:
d_total__ = pd.concat([d_total__,d5_],axis = 0)
d_total__.reset_index(inplace=True)
d_total__.drop("index",axis = 1,inplace = True)
d_total__["Toxicity"].value_counts()

Toxicity
0    5893
1    1522
Name: count, dtype: int64

In [173]:
#combine d2 and total
#zhang
#delete the overlap part
num = []
counter1 = []
counter2 = []
for i in range(len(d_total__)):
    for y in range(len(d2)):
        if(d_total__["Inchi"][i] == d2["Inchi"][y]):
            num.append(y)
            print(i,":",d_total__["Inchi"][i]," // ",y,":",d2["Inchi"][y])
            print(i,":",d_total__["Toxicity"][i]," // ",y,":",d2["Toxicity"][y])
            if d_total__["Toxicity"][i] != d2["Toxicity"][y]:
                counter1.append(i)
                counter2.append(y)

128 : InChI=1S/C18H29N3O5/c1-18(2,3)19-11-15(22)12-8-13(25-16(23)20(4)5)10-14(9-12)26-17(24)21(6)7/h8-10,15,19,22H,11H2,1-7H3  //  0 : InChI=1S/C18H29N3O5/c1-18(2,3)19-11-15(22)12-8-13(25-16(23)20(4)5)10-14(9-12)26-17(24)21(6)7/h8-10,15,19,22H,11H2,1-7H3
128 : 0  //  0 : 0
169 : InChI=1S/C9H7Cl2N5/c10-4-1-2-6(11)5(3-4)7-14-8(12)16-9(13)15-7/h1-3H,(H4,12,13,14,15,16)  //  2 : InChI=1S/C9H7Cl2N5/c10-4-1-2-6(11)5(3-4)7-14-8(12)16-9(13)15-7/h1-3H,(H4,12,13,14,15,16)
169 : 0  //  2 : 0
238 : InChI=1S/C34H54O8/c1-9-25(31-21(6)18-34(11-3,42-31)26-16-17-33(40,10-2)23(8)41-26)30(37)22(7)28(35)19(4)12-14-24-15-13-20(5)29(36)27(24)32(38)39/h13,15,19,21-23,25-26,28,31,35-36,40H,9-12,14,16-18H2,1-8H3,(H,38,39)/t19-,21+,22+,23+,25+,26-,28+,31+,33-,34+/m1/s1  //  3 : InChI=1S/C34H54O8/c1-9-25(31-21(6)18-34(11-3,42-31)26-16-17-33(40,10-2)23(8)41-26)30(37)22(7)28(35)19(4)12-14-24-15-13-20(5)29(36)27(24)32(38)39/h13,15,19,21-23,25-26,28,31,35-36,40H,9-12,14,16-18H2,1-8H3,(H,38,39)/t19-,21+,22+,23+,25+,2

1646 : InChI=1S/C14H9ClN2O3S/c15-7-3-4-9-8(6-7)11(13(19)17(9)14(16)20)12(18)10-2-1-5-21-10/h1-6,11H,(H2,16,20)  //  34 : InChI=1S/C14H9ClN2O3S/c15-7-3-4-9-8(6-7)11(13(19)17(9)14(16)20)12(18)10-2-1-5-21-10/h1-6,11H,(H2,16,20)
1646 : 1  //  34 : 1
1701 : InChI=1S/C15H21N3O/c1-11(5-3-7-16)18-14-10-13(19-2)9-12-6-4-8-17-15(12)14/h4,6,8-11,18H,3,5,7,16H2,1-2H3  //  35 : InChI=1S/C15H21N3O/c1-11(5-3-7-16)18-14-10-13(19-2)9-12-6-4-8-17-15(12)14/h4,6,8-11,18H,3,5,7,16H2,1-2H3
1701 : 0  //  35 : 0
1705 : InChI=1S/C9H9Cl2N3O/c10-6-2-1-3-7(11)5(6)4-8(15)14-9(12)13/h1-3H,4H2,(H4,12,13,14,15)  //  36 : InChI=1S/C9H9Cl2N3O/c10-6-2-1-3-7(11)5(6)4-8(15)14-9(12)13/h1-3H,4H2,(H4,12,13,14,15)
1705 : 0  //  36 : 0
1709 : InChI=1S/C15H22N2O/c1-11-7-6-8-12(2)14(11)16-15(18)13-9-4-5-10-17(13)3/h6-8,13H,4-5,9-10H2,1-3H3,(H,16,18)  //  37 : InChI=1S/C15H22N2O/c1-11-7-6-8-12(2)14(11)16-15(18)13-9-4-5-10-17(13)3/h6-8,13H,4-5,9-10H2,1-3H3,(H,16,18)
1709 : 0  //  37 : 0
1715 : InChI=1S/C17H21NO3/c1-12(17(21)14-4-8

2493 : InChI=1S/C22H24ClN3O/c1-25-13-4-5-18(12-14-25)26-22(27)20-7-3-2-6-19(20)21(24-26)15-16-8-10-17(23)11-9-16/h2-3,6-11,18H,4-5,12-15H2,1H3  //  66 : InChI=1S/C22H24ClN3O/c1-25-13-4-5-18(12-14-25)26-22(27)20-7-3-2-6-19(20)21(24-26)15-16-8-10-17(23)11-9-16/h2-3,6-11,18H,4-5,12-15H2,1H3
2493 : 0  //  66 : 0
2515 : InChI=1S/C13H17N/c1-4-10-14(3)12(2)11-13-8-6-5-7-9-13/h1,5-9,12H,10-11H2,2-3H3/t12-/m1/s1  //  67 : InChI=1S/C13H17N/c1-4-10-14(3)12(2)11-13-8-6-5-7-9-13/h1,5-9,12H,10-11H2,2-3H3/t12-/m1/s1
2515 : 0  //  67 : 0
2588 : InChI=1S/C15H13N3O4S/c1-18-13(15(20)17-12-8-4-5-9-16-12)14(19)10-6-2-3-7-11(10)23(18,21)22/h2-9,13H,1H3,(H,16,17,20)  //  69 : InChI=1S/C15H13N3O4S/c1-18-13(15(20)17-12-8-4-5-9-16-12)14(19)10-6-2-3-7-11(10)23(18,21)22/h2-9,13H,1H3,(H,16,17,20)
2588 : 1  //  69 : 1
2614 : InChI=1S/C17H19N3O/c1-13-5-7-14(8-6-13)20(12-17-18-9-10-19-17)15-3-2-4-16(21)11-15/h2-8,11,21H,9-10,12H2,1H3,(H,18,19)  //  70 : InChI=1S/C17H19N3O/c1-13-5-7-14(8-6-13)20(12-17-18-9-10-19-17)15

3713 : InChI=1S/C17H18F3NO/c1-21-12-11-16(13-5-3-2-4-6-13)22-15-9-7-14(8-10-15)17(18,19)20/h2-10,16,21H,11-12H2,1H3  //  101 : InChI=1S/C17H18F3NO/c1-21-12-11-16(13-5-3-2-4-6-13)22-15-9-7-14(8-10-15)17(18,19)20/h2-10,16,21H,11-12H2,1H3
3713 : 1  //  101 : 1
3736 : InChI=1S/C23H25N5O5/c1-30-18-11-14-15(12-19(18)31-2)25-23(26-21(14)24)28-9-7-27(8-10-28)22(29)20-13-32-16-5-3-4-6-17(16)33-20/h3-6,11-12,20H,7-10,13H2,1-2H3,(H2,24,25,26)  //  102 : InChI=1S/C23H25N5O5/c1-30-18-11-14-15(12-19(18)31-2)25-23(26-21(14)24)28-9-7-27(8-10-28)22(29)20-13-32-16-5-3-4-6-17(16)33-20/h3-6,11-12,20H,7-10,13H2,1-2H3,(H2,24,25,26)
3736 : 0  //  102 : 0
3752 : InChI=1S/C32H39NO4/c1-31(2,30(35)36)25-17-15-24(16-18-25)29(34)14-9-21-33-22-19-28(20-23-33)32(37,26-10-5-3-6-11-26)27-12-7-4-8-13-27/h3-8,10-13,15-18,28-29,34,37H,9,14,19-23H2,1-2H3,(H,35,36)  //  103 : InChI=1S/C32H39NO4/c1-31(2,30(35)36)25-17-15-24(16-18-25)29(34)14-9-21-33-22-19-28(20-23-33)32(37,26-10-5-3-6-11-26)27-12-7-4-8-13-27/h3-8,10-13,15-1

4856 : InChI=1S/C26H28Cl2N4O4/c1-19(33)31-10-12-32(13-11-31)21-3-5-22(6-4-21)34-15-23-16-35-26(36-23,17-30-9-8-29-18-30)24-7-2-20(27)14-25(24)28/h2-9,14,18,23H,10-13,15-17H2,1H3/t23-,26-/m0/s1  //  133 : InChI=1S/C26H28Cl2N4O4/c1-19(33)31-10-12-32(13-11-31)21-3-5-22(6-4-21)34-15-23-16-35-26(36-23,17-30-9-8-29-18-30)24-7-2-20(27)14-25(24)28/h2-9,14,18,23H,10-13,15-17H2,1H3/t23-,26-/m0/s1
4856 : 1  //  133 : 1
4881 : InChI=1S/C24H28N2O5/c1-2-31-24(30)20(14-12-17-8-4-3-5-9-17)25-19-15-13-18-10-6-7-11-21(18)26(23(19)29)16-22(27)28/h3-11,19-20,25H,2,12-16H2,1H3,(H,27,28)/t19-,20-/m0/s1  //  134 : InChI=1S/C24H28N2O5/c1-2-31-24(30)20(14-12-17-8-4-3-5-9-17)25-19-15-13-18-10-6-7-11-21(18)26(23(19)29)16-22(27)28/h3-11,19-20,25H,2,12-16H2,1H3,(H,27,28)/t19-,20-/m0/s1
4881 : 0  //  134 : 0
4914 : InChI=1S/C6H8ClN7O/c7-2-4(9)13-3(8)1(12-2)5(15)14-6(10)11/h(H4,8,9,13)(H4,10,11,14,15)  //  135 : InChI=1S/C6H8ClN7O/c7-2-4(9)13-3(8)1(12-2)5(15)14-6(10)11/h(H4,8,9,13)(H4,10,11,14,15)
4914 : 0  //  135 

In [174]:
len(num),len(counter1),len(counter2)

(141, 3, 3)

In [175]:
d2_ = d2.drop(num)
d2_.reset_index(inplace=True)
d2_.drop("index",axis = 1,inplace = True)
d2_["Toxicity"].value_counts()

Toxicity
0    81
1    22
Name: count, dtype: int64

In [176]:
#Have overlap index, so weird....
#Some conpound's mol will be None when we 
counter1 = set(counter1)
d_total___ = d_total__.drop(counter1)
d_total___.reset_index(inplace=True)
d_total___.drop("index",axis = 1,inplace = True)
d_total___["Mol"] = [Chem.MolFromInchi(x) for x in d_total___["Inchi"]]
d_total___["Toxicity"].value_counts()

[12:05:29] Explicit valence for atom # 0 Cl, 5, is greater than permitted
[12:05:29] ERROR: Explicit valence for atom # 0 Cl, 5, is greater than permitted


[12:05:30] Explicit valence for atom # 0 Cl, 3, is greater than permitted
[12:05:30] ERROR: Explicit valence for atom # 0 Cl, 3, is greater than permitted

[12:05:31] Explicit valence for atom # 0 Cl, 5, is greater than permitted
[12:05:31] ERROR: Explicit valence for atom # 0 Cl, 5, is greater than permitted




Toxicity
0    5890
1    1522
Name: count, dtype: int64

In [177]:
d_final = pd.concat([d_total___,d2_],axis = 0)
d_final.reset_index(inplace=True)
d_final.drop("index",axis = 1,inplace = True)
d_final["Toxicity"].value_counts()

Toxicity
0    5971
1    1544
Name: count, dtype: int64

In [178]:
# d_final.to_csv("data/train_data.csv")

In [179]:
#check overlap with external testset
num = []
counter1 = []
counter2 = []
for i in range(len(d_final)):
    for y in range(len(d6)):
        if(d_final["Inchi"][i] == d6["Inchi"][y]):
            num.append(y)
            print(i,":",d_final["Inchi"][i]," // ",y,":",d6["Inchi"][y])
            print(i,":",d_final["Toxicity"][i]," // ",y,":",d6["Toxicity"][y])
            if d_final["Toxicity"][i] != d6["Toxicity"][y]:
                counter1.append(i)
                counter2.append(y)

5300 : InChI=1S/C7H8N4O2/c1-10-5-4(8-3-9-5)6(12)11(2)7(10)13/h3H,1-2H3,(H,8,9)  //  140 : InChI=1S/C7H8N4O2/c1-10-5-4(8-3-9-5)6(12)11(2)7(10)13/h3H,1-2H3,(H,8,9)
5300 : 0  //  140 : 0
6844 : InChI=1S/C4H5NOS/c1-5-4(6)2-3-7-5/h2-3H,1H3  //  36 : InChI=1S/C4H5NOS/c1-5-4(6)2-3-7-5/h2-3H,1H3
6844 : 1  //  36 : 0
6897 : InChI=1S/C11H14O2/c1-3-5-9-6-7-11(13-4-2)10(12)8-9/h3,5-8,12H,4H2,1-2H3  //  473 : InChI=1S/C11H14O2/c1-3-5-9-6-7-11(13-4-2)10(12)8-9/h3,5-8,12H,4H2,1-2H3
6897 : 1  //  473 : 1
6964 : InChI=1S/C22H17ClF3N3O7/c1-33-18(30)21-10-12-9-13(23)3-8-16(12)17(21)27-28(11-35-21)19(31)29(20(32)34-2)14-4-6-15(7-5-14)36-22(24,25)26/h3-9H,10-11H2,1-2H3  //  565 : InChI=1S/C22H17ClF3N3O7/c1-33-18(30)21-10-12-9-13(23)3-8-16(12)17(21)27-28(11-35-21)19(31)29(20(32)34-2)14-4-6-15(7-5-14)36-22(24,25)26/h3-9H,10-11H2,1-2H3
6964 : 1  //  565 : 1
7088 : InChI=1S/C20H16N2O4/c1-2-20(25)14-8-16-17-12(7-11-5-3-4-6-15(11)21-17)9-22(16)18(23)13(14)10-26-19(20)24/h3-8,25H,2,9-10H2,1H3  //  584 : InChI=1S/

7466 : InChI=1S/C19H27N3O4S/c1-21-13-17(16-5-3-4-6-18(16)21)19(23)26-14-15-7-10-22(11-8-15)12-9-20-27(2,24)25/h3-6,13,15,20H,7-12,14H2,1-2H3  //  341 : InChI=1S/C19H27N3O4S/c1-21-13-17(16-5-3-4-6-18(16)21)19(23)26-14-15-7-10-22(11-8-15)12-9-20-27(2,24)25/h3-6,13,15,20H,7-12,14H2,1-2H3
7466 : 0  //  341 : 0
7467 : InChI=1S/C12H18N4O3/c1-7(2)5-16-10-9(11(17)15(3)12(16)18)13-8(14-10)6-19-4/h7H,5-6H2,1-4H3,(H,13,14)  //  353 : InChI=1S/C12H18N4O3/c1-7(2)5-16-10-9(11(17)15(3)12(16)18)13-8(14-10)6-19-4/h7H,5-6H2,1-4H3,(H,13,14)
7467 : 0  //  353 : 0
7468 : InChI=1S/C14H11N/c1-12-6-5-9-14(15-12)11-10-13-7-3-2-4-8-13/h2-9H,1H3  //  360 : InChI=1S/C14H11N/c1-12-6-5-9-14(15-12)11-10-13-7-3-2-4-8-13/h2-9H,1H3
7468 : 0  //  360 : 0
7469 : InChI=1S/C21H22N2O3/c24-21(25)19-12-7-13-23(16-19)14-15-26-22-20(17-8-3-1-4-9-17)18-10-5-2-6-11-18/h1-6,8-12H,7,13-16H2,(H,24,25)  //  363 : InChI=1S/C21H22N2O3/c24-21(25)19-12-7-13-23(16-19)14-15-26-22-20(17-8-3-1-4-9-17)18-10-5-2-6-11-18/h1-6,8-12H,7,13-16H2,(H

In [180]:
len(num),len(counter1),len(counter2)

(92, 2, 2)

In [181]:
d6_ = d6.drop(num)
d6_.reset_index(inplace=True)
d6_.drop("index",axis = 1,inplace = True)
d6_["Toxicity"].value_counts()

Toxicity
0    543
1     71
Name: count, dtype: int64

In [182]:
#Have overlap index, so weird....
#Some conpound's mol will be None when we 
counter1 = set(counter1)
d_final = d_final.drop(counter1)
d_final.reset_index(inplace=True)
d_final.drop("index",axis = 1,inplace = True)
d_final["Mol"] = [Chem.MolFromInchi(x) for x in d_final["Inchi"]]
d_final["Toxicity"].value_counts()

[12:06:10] Explicit valence for atom # 0 Cl, 5, is greater than permitted
[12:06:10] ERROR: Explicit valence for atom # 0 Cl, 5, is greater than permitted


[12:06:11] Explicit valence for atom # 0 Cl, 3, is greater than permitted
[12:06:11] ERROR: Explicit valence for atom # 0 Cl, 3, is greater than permitted

[12:06:12] Explicit valence for atom # 0 Cl, 5, is greater than permitted
[12:06:12] ERROR: Explicit valence for atom # 0 Cl, 5, is greater than permitted




Toxicity
0    5971
1    1542
Name: count, dtype: int64

In [185]:
for i in range(len(d_final)):
    inchi = Chem.MolFromInchi(d_final["Inchi"][i])
    if inchi == None:
        print(i,":",d_final["Mol"][i],d_final["Toxicity"][i] )
        d_final.drop(index = i,inplace=True,axis=0)
#     else:
#         mol_list.append(All_Mordred_descriptors(inchi))
d_final.reset_index(inplace=True)
d_final.drop("index",axis = 1,inplace=True)

[12:31:38] Explicit valence for atom # 0 Cl, 5, is greater than permitted
[12:31:38] ERROR: Explicit valence for atom # 0 Cl, 5, is greater than permitted



1322 : None 0



[12:31:39] Explicit valence for atom # 0 Cl, 3, is greater than permitted
[12:31:39] ERROR: Explicit valence for atom # 0 Cl, 3, is greater than permitted



5624 : None 0


[12:31:40] Explicit valence for atom # 0 Cl, 5, is greater than permitted
[12:31:40] ERROR: Explicit valence for atom # 0 Cl, 5, is greater than permitted




6454 : None 0




In [186]:
d_final.to_csv("data/train_raw_data.csv")
d6_.to_csv("data/external_test_raw_data.csv")