In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from rdkit.ML.Descriptors import MoleculeDescriptors
from rdkit.Chem import Descriptors
from rdkit import Chem
from IPython.core.display import display, HTML

In [18]:
def force_show_all(df):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
        display(HTML(df.to_html()))

# BindingDB

In [19]:
bindingDB = pd.read_csv('..\Raw_data\Binding_DB\CEC293F933A98F82D59BD6FBD763B2F4ki.tsv', sep = '\t')
bindingDB = bindingDB[['Ligand SMILES', 'IC50 (nM)']]
bindingDB['database_label'] = 'BindingDB'
force_show_all(bindingDB)

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@H]2N)CC1,0.800000,BindingDB
1,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@@H]2N)CC1,0.800,BindingDB
2,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000000,BindingDB
3,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000,BindingDB
4,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000,BindingDB
5,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(O)nc4CCCCn4c3=O)c2Cl)c(N)n1,1.000000,BindingDB
6,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,1.000,BindingDB
7,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C(Cl)=CCC2)c(n1)C(N)=O,1.000,BindingDB
8,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000000,BindingDB
9,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)c2Cl)c2nccn12,1.20,BindingDB


In [20]:
bindingDB.shape

(2189, 3)

## Missing Values

In [21]:
bindingDB.isnull().sum()

Ligand SMILES      0
IC50 (nM)         31
database_label     0
dtype: int64

In [22]:
bindingDB = bindingDB.dropna()

In [23]:
bindingDB.shape

(2158, 3)

## BindingDB Data

In [24]:
bindingDB_data = bindingDB
bindingDB_data

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.800000,BindingDB
1,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.800,BindingDB
2,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1...,1.000000,BindingDB
3,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-...,1.000,BindingDB
4,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1...,1.000,BindingDB
...,...,...,...
2184,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(N)c4)...,37.0,BindingDB
2185,Cn1c(ncc(Sc2cccc(Cl)c2Cl)c1=O)N1CCC2(COC[C@H]2...,37.0,BindingDB
2186,Cn1c(nc2[nH]nc(-c3ccccc3)c2c1=O)N1CCC2(C[C@H](...,37.0,BindingDB
2187,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccc(N)nc4C...,37,BindingDB


## Number of duplicate molecules

In [25]:
bindingDB_data.iloc[:, 0].duplicated(keep=False).sum()

670

## Analysis of Duplicate molecules

In [26]:
duplicates = bindingDB_data.loc[bindingDB_data.iloc[:, 0].duplicated(keep=False), :]
duplicates.shape

(670, 3)

In [27]:
force_show_all(duplicates)

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@H]2N)CC1,0.800000,BindingDB
2,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000000,BindingDB
3,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000,BindingDB
4,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000,BindingDB
6,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,1.000,BindingDB
8,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000000,BindingDB
10,Cc1nc(N2CCC3(CC2)Oc2ccccc2[C@H]3N)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.40,BindingDB
11,Cc1nc(N2CCC3(CC2)Oc2ccccc2[C@H]3N)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.4,BindingDB
12,C[C@@H]1OCC2(CCN(CC2)c2nc(C)c(nc2CO)-c2cccc(Cl)c2Cl)[C@@H]1N,1.5,BindingDB
17,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1ccc(F)c(C)c1Cl,2.0,BindingDB


In [28]:
sorted_duplicates = duplicates.sort_values(by=['IC50 (nM)'], ascending=True)
force_show_all(sorted_duplicates)

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@H]2N)CC1,0.800000,BindingDB
3,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000,BindingDB
4,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000,BindingDB
6,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,1.000,BindingDB
2,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000000,BindingDB
8,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000000,BindingDB
11,Cc1nc(N2CCC3(CC2)Oc2ccccc2[C@H]3N)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.4,BindingDB
10,Cc1nc(N2CCC3(CC2)Oc2ccccc2[C@H]3N)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.40,BindingDB
12,C[C@@H]1OCC2(CCN(CC2)c2nc(C)c(nc2CO)-c2cccc(Cl)c2Cl)[C@@H]1N,1.5,BindingDB
1949,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3ccnc(C4CC4)c3Cl)c(=O)n2C)[C@@H]1N,10.0,BindingDB


In [29]:
def show_duplicate_rows(smile):
    df = sorted_duplicates[sorted_duplicates['Ligand SMILES']==smile]
    force_show_all(df)

In [30]:
show_duplicate_rows('Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@H]2N)CC1')

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@H]2N)CC1,0.8,BindingDB
422,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@H]2N)CC1,270.0,BindingDB
594,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@H]2N)CC1,5.0,BindingDB


In [31]:
show_duplicate_rows('Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl')

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
4,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.0,BindingDB
2,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.0,BindingDB
595,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,5.0,BindingDB


In [32]:
# dropping duplicate values
unique_in_duplicates = duplicates.drop_duplicates(subset ="Ligand SMILES" ,keep='last')
unique_in_duplicates

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
111,CC1(CN)CCN(CC1)c1ncc(Sc2cccc(Cl)c2Cl)[nH]c1=O,<50,BindingDB
182,NCC1(CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl)c1cc...,65.0,BindingDB
183,NC1C2CN(CC12)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,65.0,BindingDB
199,NC[C@H]1CCN(C1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,68.0,BindingDB
235,CCCC1(CN)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,75.0,BindingDB
...,...,...,...
2170,Cn1c(nc(N)c(Sc2cccnc2C(F)(F)F)c1=O)N1CCC2(COC[...,34.0,BindingDB
2171,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)...,34,BindingDB
2174,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC...,35.0,BindingDB
2180,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC...,36,BindingDB


In [33]:
unique_in_duplicates.shape

(250, 3)

In [34]:
unique_in_duplicates.iloc[:, 0].duplicated().sum()

0

In [35]:
for smile in unique_in_duplicates['Ligand SMILES'].values:
    show_duplicate_rows(smile)
    print("******************************")

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
106,CC1(CN)CCN(CC1)c1ncc(Sc2cccc(Cl)c2Cl)[nH]c1=O,<50,BindingDB
111,CC1(CN)CCN(CC1)c1ncc(Sc2cccc(Cl)c2Cl)[nH]c1=O,<50,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
182,NCC1(CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl)c1ccccc1,65.0,BindingDB
179,NCC1(CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl)c1ccccc1,65.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
183,NC1C2CN(CC12)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,65.0,BindingDB
180,NC1C2CN(CC12)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,65.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
199,NC[C@H]1CCN(C1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,68.0,BindingDB
197,NC[C@H]1CCN(C1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,68.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
235,CCCC1(CN)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,75.0,BindingDB
234,CCCC1(CN)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,75.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
281,NCC1(CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl)c1cnccn1,93.0,BindingDB
279,NCC1(CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl)c1cnccn1,93.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
172,CC1(N)CCN(CC1)c1cnc(Sc2cccc(N)c2Cl)c(N)n1,62.0,BindingDB
188,CC1(N)CCN(CC1)c1cnc(Sc2cccc(N)c2Cl)c(N)n1,66.0,BindingDB
240,CC1(N)CCN(CC1)c1cnc(Sc2cccc(N)c2Cl)c(N)n1,76.0,BindingDB
263,CC1(N)CCN(CC1)c1cnc(Sc2cccc(N)c2Cl)c(N)n1,85.0,BindingDB
283,CC1(N)CCN(CC1)c1cnc(Sc2cccc(N)c2Cl)c(N)n1,94.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
301,Nc1nc(cnc1-c1cccc(Cl)c1Cl)N1CC2CNCC2C1,102,BindingDB
304,Nc1nc(cnc1-c1cccc(Cl)c1Cl)N1CC2CNCC2C1,102,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
302,NCC1(CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl)c1ccncc1,102,BindingDB
305,NCC1(CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl)c1ccncc1,102,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
311,Cn1c(nc(N)c(-c2cccc(Cl)c2Cl)c1=O)N1CCC(C)(N)CC1,105,BindingDB
310,Cn1c(nc(N)c(-c2cccc(Cl)c2Cl)c1=O)N1CCC(C)(N)CC1,105,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
319,NCC1(CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl)c1ccccn1,110,BindingDB
318,NCC1(CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl)c1ccccn1,110,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
330,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC2(COC[C@H]2N)CC1,123.0,BindingDB
332,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC2(COC[C@H]2N)CC1,123.0,BindingDB
108,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC2(COC[C@H]2N)CC1,50.0,BindingDB
110,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC2(COC[C@H]2N)CC1,50.0,BindingDB
109,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC2(COC[C@H]2N)CC1,50.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
367,C[C@@H]1OCC2(CCN(CC2)c2ccc(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,177,BindingDB
368,C[C@@H]1OCC2(CCN(CC2)c2ccc(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,177,BindingDB
370,C[C@@H]1OCC2(CCN(CC2)c2ccc(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,177,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
346,N[C@@H]1CC[C@@]11CCN(C1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,145,BindingDB
381,N[C@@H]1CC[C@@]11CCN(C1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,189,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
388,Cn1c(c(I)c2cc(C(O)=O)c(O)cc12)-c1cccc(NC(=O)C(=O)Nc2ccc(cc2)-c2ccsc2)c1,200,BindingDB
389,Cn1c(c(I)c2cc(C(O)=O)c(O)cc12)-c1cccc(NC(=O)C(=O)Nc2ccc(cc2)-c2ccsc2)c1,200,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
340,CC1(N)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,136.0,BindingDB
408,CC1(N)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,250.0,BindingDB
407,CC1(N)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,250.0,BindingDB
406,CC1(N)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,250.0,BindingDB
405,CC1(N)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,250.0,BindingDB
167,CC1(N)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,60.0,BindingDB
212,CC1(N)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,70.0,BindingDB
211,CC1(N)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,70.0,BindingDB
216,CC1(N)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,70.0,BindingDB
208,CC1(N)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,70.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
410,CC1(N)CCN(CC1)c1cnc(Sc2ccccc2Cl)c(N)n1,251.0,BindingDB
207,CC1(N)CCN(CC1)c1cnc(Sc2ccccc2Cl)c(N)n1,70.0,BindingDB
204,CC1(N)CCN(CC1)c1cnc(Sc2ccccc2Cl)c(N)n1,70.0,BindingDB
214,CC1(N)CCN(CC1)c1cnc(Sc2ccccc2Cl)c(N)n1,70.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
447,Oc1c(cc(c2cccnc12)S(O)(=O)=O)\N=N\c1ccc2cc(ccc2c1)S(O)(=O)=O,318,BindingDB
448,Oc1c(cc(c2cccnc12)S(O)(=O)=O)\N=N\c1ccc2cc(ccc2c1)S(O)(=O)=O,320,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
452,CC1(N)CCN(CC1)c1cnc(Sc2cccnc2C(F)(F)F)c(N)n1,339.0,BindingDB
187,CC1(N)CCN(CC1)c1cnc(Sc2cccnc2C(F)(F)F)c(N)n1,66.0,BindingDB
189,CC1(N)CCN(CC1)c1cnc(Sc2cccnc2C(F)(F)F)c(N)n1,67.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
462,CC1(CN)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,357.0,BindingDB
239,CC1(CN)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,76.0,BindingDB
241,CC1(CN)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,76.0,BindingDB
244,CC1(CN)CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,76.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
464,NCC1CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,358,BindingDB
463,NCC1CCN(CC1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,358,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
481,Nc1nc(cnc1-c1cccc(Cl)c1Cl)N1CC2CNCC12,404,BindingDB
480,Nc1nc(cnc1-c1cccc(Cl)c1Cl)N1CC2CNCC12,404,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
491,C[C@@H]1OCC2(CCN(CC2)c2cc(N)c(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,429,BindingDB
492,C[C@@H]1OCC2(CCN(CC2)c2cc(N)c(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,429,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
61,C[C@@H]1OCC2(CCN(CC2)c2cnc3c(c[nH]c3n2)-c2cccc(Cl)c2Cl)[C@@H]1N,39,BindingDB
63,C[C@@H]1OCC2(CCN(CC2)c2cnc3c(c[nH]c3n2)-c2cccc(Cl)c2Cl)[C@@H]1N,39,BindingDB
506,C[C@@H]1OCC2(CCN(CC2)c2cnc3c(c[nH]c3n2)-c2cccc(Cl)c2Cl)[C@@H]1N,484,BindingDB
507,C[C@@H]1OCC2(CCN(CC2)c2cnc3c(c[nH]c3n2)-c2cccc(Cl)c2Cl)[C@@H]1N,484,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
467,N[C@H]1CC[C@@]11CCN(C1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,365,BindingDB
522,N[C@H]1CC[C@@]11CCN(C1)c1cnc(c(N)n1)-c1cccc(Cl)c1Cl,548,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
433,CC1(N)CCN(CC1)c1nnc(c(N)n1)-c1cccc(Cl)c1Cl,300,BindingDB
435,CC1(N)CCN(CC1)c1nnc(c(N)n1)-c1cccc(Cl)c1Cl,300,BindingDB
540,CC1(N)CCN(CC1)c1nnc(c(N)n1)-c1cccc(Cl)c1Cl,620,BindingDB
542,CC1(N)CCN(CC1)c1nnc(c(N)n1)-c1cccc(Cl)c1Cl,620,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
547,CC1(N)CCN(CC1)c1cnc(c(N)n1)-c1cccc2cc[nH]c12,632,BindingDB
546,CC1(N)CCN(CC1)c1cnc(c(N)n1)-c1cccc2cc[nH]c12,632,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
85,Cc1nc(nc(C(N)=O)c1-c1cccc(Cl)c1Cl)N1CCC2(CCC[C@H]2N)CC1,45.0,BindingDB
549,Cc1nc(nc(C(N)=O)c1-c1cccc(Cl)c1Cl)N1CCC2(CCC[C@H]2N)CC1,660.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
34,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3ccnc4N5CCCC5COc34)nc2CO)[C@@H]1N,3.0,BindingDB
560,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3ccnc4N5CCCC5COc34)nc2CO)[C@@H]1N,4.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
21,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1ccc(F)c(Cl)c1Cl,2.0,BindingDB
19,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1ccc(F)c(Cl)c1Cl,2.0,BindingDB
564,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1ccc(F)c(Cl)c1Cl,4.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
47,Cc1nc(N2CCC3(CCC[C@H]3N)CC2)n2ccnc2c1-c1cccc(Cl)c1Cl,3.6,BindingDB
569,Cc1nc(N2CCC3(CCC[C@H]3N)CC2)n2ccnc2c1-c1cccc(Cl)c1Cl,4.1,BindingDB
143,Cc1nc(N2CCC3(CCC[C@H]3N)CC2)n2ccnc2c1-c1cccc(Cl)c1Cl,52.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@H]2N)CC1,0.8,BindingDB
422,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@H]2N)CC1,270.0,BindingDB
594,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@H]2N)CC1,5.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
4,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.0,BindingDB
2,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.0,BindingDB
595,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,5.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
601,N[C@@H]1CCCC11CCN(CC1)c1ncc(Sc2cncc3ccccc23)c2nccn12,670,BindingDB
602,N[C@@H]1CCCC11CCN(CC1)c1ncc(Sc2cncc3ccccc23)c2nccn12,670,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
194,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(N)CC1,67,BindingDB
196,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(N)CC1,67,BindingDB
195,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(N)CC1,67,BindingDB
193,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(N)CC1,67,BindingDB
612,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(N)CC1,746,BindingDB
613,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(N)CC1,746,BindingDB
611,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(N)CC1,746,BindingDB
614,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(N)CC1,746,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
630,Cc1cccc(c1Cl)-c1cnc(nc1N)N1CCC(C)(N)CC1,858,BindingDB
629,Cc1cccc(c1Cl)-c1cnc(nc1N)N1CCC(C)(N)CC1,858,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
634,Nc1nc(cnc1-c1cccc(Cl)c1Cl)N1CCC2NCCC2C1,908,BindingDB
635,Nc1nc(cnc1-c1cccc(Cl)c1Cl)N1CCC2NCCC2C1,908,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
645,CC1(N)CCN(CC1)c1ncc(-c2ccc(Cl)s2)c(N)n1,950,BindingDB
644,CC1(N)CCN(CC1)c1ncc(-c2ccc(Cl)s2)c(N)n1,950,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
648,Cc1ccc(s1)-c1cnc(nc1N)N1CCC(C)(N)CC1,974,BindingDB
649,Cc1ccc(s1)-c1cnc(nc1N)N1CCC(C)(N)CC1,974,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
669,Oc1c(cc(c2cccnc12)S(O)(=O)=O)N=Nc1ccc2cc(ccc2c1)S(O)(=O)=O,1087,BindingDB
685,Oc1c(cc(c2cccnc12)S(O)(=O)=O)N=Nc1ccc2cc(ccc2c1)S(O)(=O)=O,1110,BindingDB
445,Oc1c(cc(c2cccnc12)S(O)(=O)=O)N=Nc1ccc2cc(ccc2c1)S(O)(=O)=O,318,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
709,NC1CCN(CC1)c1ncc(c(N)n1)-c1cccc(Cl)c1Cl,1300,BindingDB
712,NC1CCN(CC1)c1ncc(c(N)n1)-c1cccc(Cl)c1Cl,1300,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
719,Cc1nc(N2CCC3(CC2)Oc2cnccc2[C@@H]3N)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1400.0,BindingDB
720,Cc1nc(N2CCC3(CC2)Oc2cnccc2[C@@H]3N)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1400.0,BindingDB
591,Cc1nc(N2CCC3(CC2)Oc2cnccc2[C@@H]3N)c(C)c(=O)n1-c1cccc(Cl)c1Cl,5.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
659,Cc1nc(N2CCC(C)(N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1000,BindingDB
721,Cc1nc(N2CCC(C)(N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1400,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
731,Cc1ccc(Cl)cc1N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)CCC(O)=O,1500,BindingDB
730,Cc1ccc(Cl)cc1N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)CCC(O)=O,1500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
357,N[C@H]1CCCC11CCN(CC1)c1cnc(Sc2cccnc2C(F)(F)F)c(N)n1,166,BindingDB
760,N[C@H]1CCCC11CCN(CC1)c1cnc(Sc2cccnc2C(F)(F)F)c(N)n1,1800,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
778,CC1(N)CCN(CC1)c1ncc(c(N)n1)-c1cccc(Cl)c1Cl,1980,BindingDB
776,CC1(N)CCN(CC1)c1ncc(c(N)n1)-c1cccc(Cl)c1Cl,1980,BindingDB
417,CC1(N)CCN(CC1)c1ncc(c(N)n1)-c1cccc(Cl)c1Cl,260,BindingDB
415,CC1(N)CCN(CC1)c1ncc(c(N)n1)-c1cccc(Cl)c1Cl,260,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
377,CC1(CN)CCN(CC1)c1ncc(Sc2cccnc2C(F)(F)F)c(=O)[nH]1,181,BindingDB
378,CC1(CN)CCN(CC1)c1ncc(Sc2cccnc2C(F)(F)F)c(=O)[nH]1,181,BindingDB
376,CC1(CN)CCN(CC1)c1ncc(Sc2cccnc2C(F)(F)F)c(=O)[nH]1,181,BindingDB
787,CC1(CN)CCN(CC1)c1ncc(Sc2cccnc2C(F)(F)F)c(=O)[nH]1,>2000,BindingDB
788,CC1(CN)CCN(CC1)c1ncc(Sc2cccnc2C(F)(F)F)c(=O)[nH]1,>2000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
793,OS(=O)(=O)c1ccc(cc1)N=Nc1c([nH]n(-c2ccccc2)c1=O)-c1ccc(cc1)[N+]([O-])=O,2100,BindingDB
795,OS(=O)(=O)c1ccc(cc1)N=Nc1c([nH]n(-c2ccccc2)c1=O)-c1ccc(cc1)[N+]([O-])=O,2100,BindingDB
798,OS(=O)(=O)c1ccc(cc1)N=Nc1c([nH]n(-c2ccccc2)c1=O)-c1ccc(cc1)[N+]([O-])=O,2100,BindingDB
165,OS(=O)(=O)c1ccc(cc1)N=Nc1c([nH]n(-c2ccccc2)c1=O)-c1ccc(cc1)[N+]([O-])=O,60,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
836,C(N[C@@H]1C[C@H]2OC[C@@H]([C@H]2O1)n1nnnc1-c1cccc(CN2CCCC2)c1)C1CCCCC1,2470,BindingDB
841,C(N[C@@H]1C[C@H]2OC[C@@H]([C@H]2O1)n1nnnc1-c1cccc(CN2CCCC2)c1)C1CCCCC1,2500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
844,CC1Cc2cc(ccc2N1C(=O)CCC(O)=O)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,2500,BindingDB
845,CC1Cc2cc(ccc2N1C(=O)CCC(O)=O)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,2500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
848,CC1(N)CCN(CC1)c1cnc(C2=CCCCC2)c(N)n1,2585,BindingDB
847,CC1(N)CCN(CC1)c1cnc(C2=CCCCC2)c(N)n1,2585,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
861,OC(=O)CCCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,2800,BindingDB
862,OC(=O)CCCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,2800,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
867,COc1cc(Cl)c(Cl)c(c1)-c1cnc(nc1N)N1CCC(C)(N)CC1,2859,BindingDB
866,COc1cc(Cl)c(Cl)c(c1)-c1cnc(nc1N)N1CCC(C)(N)CC1,2859,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
872,OC(=O)c1cccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)c1,3000,BindingDB
873,OC(=O)c1cccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)c1,3000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
891,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)Nc1ccccc1Cl,3200,BindingDB
889,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)Nc1ccccc1Cl,3200,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
905,CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(C)c1,3400,BindingDB
907,CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(C)c1,3400,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
913,OC(=O)c1ccc2NC(=O)\C(=N/Nc3ccccc3C(O)=O)c2c1,3550,BindingDB
624,OC(=O)c1ccc2NC(=O)\C(=N/Nc3ccccc3C(O)=O)c2c1,800,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
924,NC[C@@]12CCC[C@@H]1CN(C2)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,3700.0,BindingDB
102,NC[C@@]12CCC[C@@H]1CN(C2)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,49.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
736,N[C@@H]1CC[C@@]11CCN(C1)c1cnc(Sc2cccc(Cl)c2Cl)c(N)n1,1510,BindingDB
936,N[C@@H]1CC[C@@]11CCN(C1)c1cnc(Sc2cccc(Cl)c2Cl)c(N)n1,3933,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
899,C[C@]12CC[C@](C)(C[C@H]1[C@]1(C)CC[C@]3(C)C(=CC=c4c3cc(O)c(O)c4=C)[C@@]1(C)CC2)C(O)=O,3300,BindingDB
944,C[C@]12CC[C@](C)(C[C@H]1[C@]1(C)CC[C@]3(C)C(=CC=c4c3cc(O)c(O)c4=C)[C@@]1(C)CC2)C(O)=O,4100,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
945,CC(C)c1ccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)cc1,4100,BindingDB
943,CC(C)c1ccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)cc1,4100,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
959,Cc1nc(nc(C(N)=O)c1-c1cccc(Cl)c1Cl)N1CCC2(CC(F)(F)C[C@H]2N)CC1,4400.0,BindingDB
171,Cc1nc(nc(C(N)=O)c1-c1cccc(Cl)c1Cl)N1CCC2(CC(F)(F)C[C@H]2N)CC1,62.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
708,C[C@]1(N)CCCN(CC1)c1nc(N)c(c(n1)C(N)=O)-c1cccc(Cl)c1Cl,1300,BindingDB
961,C[C@]1(N)CCCN(CC1)c1nc(N)c(c(n1)C(N)=O)-c1cccc(Cl)c1Cl,4400,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
958,CC(C)C(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,4400,BindingDB
962,CC(C)C(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,4400,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
968,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cc(Cl)cc(Cl)c1,4500,BindingDB
970,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cc(Cl)cc(Cl)c1,4500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
969,CC(C)CC(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,4500,BindingDB
971,CC(C)CC(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,4500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
432,Oc1c(cc(c2cccnc12)S([O-])(=O)=O)N=Nc1ccc2cc(ccc2c1)S([O-])(=O)=O,300,BindingDB
449,Oc1c(cc(c2cccnc12)S([O-])(=O)=O)N=Nc1ccc2cc(ccc2c1)S([O-])(=O)=O,320,BindingDB
485,Oc1c(cc(c2cccnc12)S([O-])(=O)=O)N=Nc1ccc2cc(ccc2c1)S([O-])(=O)=O,410,BindingDB
975,Oc1c(cc(c2cccnc12)S([O-])(=O)=O)N=Nc1ccc2cc(ccc2c1)S([O-])(=O)=O,4500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
982,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1ccc(Cl)c(Cl)c1,4600,BindingDB
979,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1ccc(Cl)c(Cl)c1,4600,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
985,OC(=O)CNC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,4600,BindingDB
983,OC(=O)CNC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,4600,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
992,COc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)CCC(O)=O,4700,BindingDB
991,COc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)CCC(O)=O,4700,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
997,OC(=O)C(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,4800,BindingDB
999,OC(=O)C(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,4800,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1000,OC(=O)CCNC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,4800,BindingDB
1002,OC(=O)CCNC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,4800,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1014,CCOC(=O)C(C)NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,5100,BindingDB
1015,CCOC(=O)C(C)NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,5100,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1034,OC(=O)C(Cc1ccccc1)NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,5400,BindingDB
1031,OC(=O)C(Cc1ccccc1)NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,5400,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1044,NC[C@@]12CN(C[C@@H]1CCCC2)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,5600.0,BindingDB
237,NC[C@@]12CN(C[C@@H]1CCCC2)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,75.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1052,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)Nc1ccc(Cl)c(Cl)c1,5700,BindingDB
1054,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)Nc1ccc(Cl)c(Cl)c1,5700,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1055,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)c1cccc(Cl)c1,5700,BindingDB
1053,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)c1cccc(Cl)c1,5700,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1058,CC1(N)CCN(CC1)c1cnc(cn1)-c1cccc(Cl)c1Cl,5700,BindingDB
1057,CC1(N)CCN(CC1)c1cnc(cn1)-c1cccc(Cl)c1Cl,5700,BindingDB
1059,CC1(N)CCN(CC1)c1cnc(cn1)-c1cccc(Cl)c1Cl,5700,BindingDB
1051,CC1(N)CCN(CC1)c1cnc(cn1)-c1cccc(Cl)c1Cl,5700,BindingDB
1050,CC1(N)CCN(CC1)c1cnc(cn1)-c1cccc(Cl)c1Cl,5700,BindingDB
1049,CC1(N)CCN(CC1)c1cnc(cn1)-c1cccc(Cl)c1Cl,5700,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
459,N[C@H]1CC[C@@]11CCN(C1)c1cnc(Sc2cccc(Cl)c2Cl)c(N)n1,354,BindingDB
1062,N[C@H]1CC[C@@]11CCN(C1)c1cnc(Sc2cccc(Cl)c2Cl)c(N)n1,5787,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1072,CCOC(=O)C(CC(C)C)NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,6000,BindingDB
1071,CCOC(=O)C(CC(C)C)NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,6000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
906,C[C@@H]1OCC2(CCN(CC2)c2nc(C)n(-c3cccc(Cl)c3Cl)c(=O)c2C)[C@@H]1N,3400.0,BindingDB
57,C[C@@H]1OCC2(CCN(CC2)c2nc(C)n(-c3cccc(Cl)c3Cl)c(=O)c2C)[C@@H]1N,39.0,BindingDB
1073,C[C@@H]1OCC2(CCN(CC2)c2nc(C)n(-c3cccc(Cl)c3Cl)c(=O)c2C)[C@@H]1N,6000.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1083,COc1ccc(cc1)C(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,6300,BindingDB
1084,COc1ccc(cc1)C(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,6300,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1092,CC(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,6400,BindingDB
1091,CC(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,6400,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1094,CN(C)C1CCN(CC1)c1ncc(c(N)n1)-c1cccc(Cl)c1Cl,6500,BindingDB
1095,CN(C)C1CCN(CC1)c1ncc(c(N)n1)-c1cccc(Cl)c1Cl,6500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
3,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.0,BindingDB
8,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.0,BindingDB
23,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,2.0,BindingDB
1121,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,6.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
11,Cc1nc(N2CCC3(CC2)Oc2ccccc2[C@H]3N)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.4,BindingDB
10,Cc1nc(N2CCC3(CC2)Oc2ccccc2[C@H]3N)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.4,BindingDB
1124,Cc1nc(N2CCC3(CC2)Oc2ccccc2[C@H]3N)c(C)c(=O)n1-c1cccc(Cl)c1Cl,6.6,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1128,N[C@@H]1COCC11CCN(CC1)c1cnc(Sc2ccnc(N)c2Cl)c(N)n1,7.0,BindingDB
1130,N[C@@H]1COCC11CCN(CC1)c1cnc(Sc2ccnc(N)c2Cl)c(N)n1,7.0,BindingDB
295,N[C@@H]1COCC11CCN(CC1)c1cnc(Sc2ccnc(N)c2Cl)c(N)n1,99.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
17,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1ccc(F)c(C)c1Cl,2.0,BindingDB
18,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1ccc(F)c(C)c1Cl,2.0,BindingDB
1135,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1ccc(F)c(C)c1Cl,7.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1158,CCOC(=O)C(Cc1ccccc1)NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,6900,BindingDB
1160,CCOC(=O)C(Cc1ccccc1)NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,6900,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1159,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)C1CCCC1,6900,BindingDB
1162,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)C1CCCC1,6900,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1166,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)c1ccc(Cl)c(Cl)c1,7100,BindingDB
1167,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)c1ccc(Cl)c(Cl)c1,7100,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1181,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)c1ccc(Cl)cc1Cl,7600,BindingDB
1183,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)c1ccc(Cl)cc1Cl,7600,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1185,CCOC(=O)C(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(C)C,7700,BindingDB
1186,CCOC(=O)C(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(C)C,7700,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1192,CCCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,7800,BindingDB
1194,CCCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,7800,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
309,Cc1nc(nc(C(N)=O)c1-c1cccc(Cl)c1Cl)N1CCC(C)(N)CC1,104,BindingDB
320,Cc1nc(nc(C(N)=O)c1-c1cccc(Cl)c1Cl)N1CCC(C)(N)CC1,110,BindingDB
1199,Cc1nc(nc(C(N)=O)c1-c1cccc(Cl)c1Cl)N1CCC(C)(N)CC1,7900,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1202,Clc1ccc(CC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)cc1,8000,BindingDB
1203,Clc1ccc(CC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)cc1,8000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1206,COc1cccc(c1)C(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,8100,BindingDB
1208,COc1cccc(c1)C(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,8100,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1209,Fc1cccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)c1,8100,BindingDB
1207,Fc1cccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)c1,8100,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1213,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)c1cc(Cl)cc(Cl)c1,8200,BindingDB
1212,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)c1cc(Cl)cc(Cl)c1,8200,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
677,OC(CC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,1100,BindingDB
678,OC(CC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,1100,BindingDB
714,OC(CC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,1300,BindingDB
715,OC(CC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,1300,BindingDB
1211,OC(CC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,8200,BindingDB
1214,OC(CC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,8200,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1219,OC(=O)c1ccccc1NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,8300,BindingDB
1217,OC(=O)c1ccccc1NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,8300,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1218,Clc1cccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)c1,8300,BindingDB
1221,Clc1cccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)c1,8300,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1223,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)c1ccccc1,8400,BindingDB
1222,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)c1ccccc1,8400,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1226,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)C1CC1,8500,BindingDB
1227,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)C1CC1,8500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1235,FC(F)(F)c1ccccc1NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,9000,BindingDB
1237,FC(F)(F)c1ccccc1NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,9000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1238,COc1ccc(cc1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)CCC(O)=O,9000,BindingDB
1234,COc1ccc(cc1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)CCC(O)=O,9000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1239,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cc(cc(c1)C(F)(F)F)C(F)(F)F,9100,BindingDB
1242,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cc(cc(c1)C(F)(F)F)C(F)(F)F,9100,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1248,CCOC(=O)CCNC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,9300,BindingDB
1246,CCOC(=O)CCNC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,9300,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1252,COCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,9600,BindingDB
1253,COCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,9600,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1260,COc1cccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)c1,9900,BindingDB
1262,COc1cccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)c1,9900,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
355,CC1(N)CCN(CC1)c1nc(N)c(c(n1)C(N)=O)-c1cccc(Cl)c1Cl,160,BindingDB
257,CC1(N)CCN(CC1)c1nc(N)c(c(n1)C(N)=O)-c1cccc(Cl)c1Cl,82.0,BindingDB
1266,CC1(N)CCN(CC1)c1nc(N)c(c(n1)C(N)=O)-c1cccc(Cl)c1Cl,>10000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
372,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC(C)(N)CC1,180,BindingDB
404,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC(C)(N)CC1,250,BindingDB
1271,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC(C)(N)CC1,>10000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
391,C[C@@H]1OCC2(CCN(CC2)c2cc(=O)n(c(C)n2)-c2cccc(Cl)c2Cl)[C@@H]1N,200,BindingDB
1272,C[C@@H]1OCC2(CCN(CC2)c2cc(=O)n(c(C)n2)-c2cccc(Cl)c2Cl)[C@@H]1N,>10000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1056,Cc1nc(cc(=O)n1-c1c(F)ccc(Cl)c1Cl)N1CCC(C)(CN)CC1,5700,BindingDB
1275,Cc1nc(cc(=O)n1-c1c(F)ccc(Cl)c1Cl)N1CCC(C)(CN)CC1,>10000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
692,Cc1nc(N[C@H]2CCC[C@@H](N)C2)nc(C(N)=O)c1-c1cccc(Cl)c1Cl,1200,BindingDB
1310,Cc1nc(N[C@H]2CCC[C@@H](N)C2)nc(C(N)=O)c1-c1cccc(Cl)c1Cl,>10000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1311,NC[C@]12CCC[C@H]1CN(C2)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,10000,BindingDB
323,NC[C@]12CCC[C@H]1CN(C2)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,120,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
529,COc1c(F)ccc(c1Cl)-c1c(C)nc(nc1C(N)=O)N1CCC(C)(N)CC1,570,BindingDB
1312,COc1c(F)ccc(c1Cl)-c1c(C)nc(nc1C(N)=O)N1CCC(C)(N)CC1,>10000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
710,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCCC(N)(CC(F)F)CCC1,1300,BindingDB
1319,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCCC(N)(CC(F)F)CCC1,>10000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
142,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(CCC[C@H]2N)CC1,51.0,BindingDB
1322,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(CCC[C@H]2N)CC1,>10000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1334,Clc1ccc(cc1)C(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,10400,BindingDB
1336,Clc1ccc(cc1)C(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,10400,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1340,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1C#N)C(F)(F)F,10600,BindingDB
1339,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1C#N)C(F)(F)F,10600,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1349,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(c1)C(F)(F)F,11300,BindingDB
1347,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(c1)C(F)(F)F,11300,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1357,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)Nc1ccccc1,11700,BindingDB
1358,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)Nc1ccccc1,11700,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1372,Fc1ccccc1NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,12200,BindingDB
1370,Fc1ccccc1NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,12200,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1375,Fc1ccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)cc1,12600,BindingDB
1374,Fc1ccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)cc1,12600,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1378,CCC(C)C(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(=O)OC,12700,BindingDB
1377,CCC(C)C(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(=O)OC,12700,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1380,Cc1nc(N[C@@H]2CCC[C@H](N)C2)nc(C(N)=O)c1-c1cccc(Cl)c1Cl,13000,BindingDB
486,Cc1nc(N[C@@H]2CCC[C@H](N)C2)nc(C(N)=O)c1-c1cccc(Cl)c1Cl,410,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1383,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(F)c1,13200,BindingDB
1382,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(F)c1,13200,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1388,Cc1cc(C)c2[nH]c3nc(SCC(=O)Nc4cccc5ccccc45)nnc3c2c1,13900,BindingDB
877,Cc1cc(C)c2[nH]c3nc(SCC(=O)Nc4cccc5ccccc45)nnc3c2c1,3090,BindingDB
1170,Cc1cc(C)c2[nH]c3nc(SCC(=O)Nc4cccc5ccccc45)nnc3c2c1,7240,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1397,COC(=O)c1ccccc1NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,14300,BindingDB
1398,COC(=O)c1ccccc1NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,14300,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1403,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1ccccc1,14800,BindingDB
1404,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1ccccc1,14800,BindingDB
1076,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1ccccc1,6100,BindingDB
1078,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1ccccc1,6100,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1406,NC(=O)c1nc(nc(N)c1-c1cccc(Cl)c1Cl)N1CCC(N)(CC1)C(F)F,15000,BindingDB
892,NC(=O)c1nc(nc(N)c1-c1cccc(Cl)c1Cl)N1CCC(N)(CC1)C(F)F,3200,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1411,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1Br)C(F)(F)F,15200,BindingDB
1410,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1Br)C(F)(F)F,15200,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1413,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(OCc2ccccc2)cc1,15300,BindingDB
1412,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(OCc2ccccc2)cc1,15300,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1414,CCC(C)C(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,15500,BindingDB
1415,CCC(C)C(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,15500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1423,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(-c3cccc(Cl)c3Cl)c(n2)C(N)=O)[C@@H]1N,16000.0,BindingDB
145,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(-c3cccc(Cl)c3Cl)c(n2)C(N)=O)[C@@H]1N,52.0,BindingDB
222,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(-c3cccc(Cl)c3Cl)c(n2)C(N)=O)[C@@H]1N,72.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1426,Cc1ccc(cc1Cl)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)CCC(O)=O,16600,BindingDB
1427,Cc1ccc(cc1Cl)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)CCC(O)=O,16600,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1430,NC[C@]12CN(C[C@H]1CCCC2)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,17000,BindingDB
513,NC[C@]12CN(C[C@H]1CCCC2)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1438,Nc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC(N)(O)CC1,19000,BindingDB
1273,Nc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC(N)(O)CC1,>10000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1442,Clc1ccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)cc1,19100,BindingDB
1441,Clc1ccc(NC(=O)N2CCc3cc(ccc23)S(=O)(=O)N2CCN(CC2)c2cccc(Cl)c2)cc1,19100,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1452,OC(=O)C1CC=CCC1C(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,20300,BindingDB
1451,OC(=O)C1CC=CCC1C(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,20300,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1455,CC(C)CC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,20700,BindingDB
1454,CC(C)CC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,20700,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1456,CCOc1cc(O)c(cc1CN1CCC=C(C1)n1c2ccccc2[nH]c1=O)C(=O)\C=C\c1ccc(Cl)cc1Cl,20870,BindingDB
1184,CCOc1cc(O)c(cc1CN1CCC=C(C1)n1c2ccccc2[nH]c1=O)C(=O)\C=C\c1ccc(Cl)cc1Cl,7670,BindingDB
1257,CCOc1cc(O)c(cc1CN1CCC=C(C1)n1c2ccccc2[nH]c1=O)C(=O)\C=C\c1ccc(Cl)cc1Cl,9800,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1463,CNc1nc(cnc1-c1cccc(Cl)c1Cl)N1CCC(C)(N)CC1,22000,BindingDB
1464,CNc1nc(cnc1-c1cccc(Cl)c1Cl)N1CCC(C)(N)CC1,22000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1469,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)Cc1ccccc1,22800,BindingDB
1470,Clc1cccc(c1)N1CCN(CC1)S(=O)(=O)c1ccc2N(CCc2c1)C(=O)Cc1ccccc1,22800,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1479,COC(=O)c1cc(ccc1O)C#Cc1ccc(NC(=O)C(O)=O)cc1,27300,BindingDB
1480,COC(=O)c1cc(ccc1O)C#Cc1ccc(NC(=O)C(O)=O)cc1,27300,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1483,CC1(C)Cc2c(C1)c1C(=O)OC(O)Cc1c(C=O)c2O,29000,BindingDB
1481,CC1(C)Cc2c(C1)c1C(=O)OC(O)Cc1c(C=O)c2O,29000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1484,OC(=O)C(=O)Nc1ccc(cc1)-c1ccccc1,29000,BindingDB
1482,OC(=O)C(=O)Nc1ccc(cc1)-c1ccccc1,29000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1369,Cn1c(c(-c2cn(CCC(=O)Nc3ccc(cc3)-c3ccccc3)nn2)c2cc(C(O)=O)c(O)cc12)-c1ccccc1,12000,BindingDB
1491,Cn1c(c(-c2cn(CCC(=O)Nc3ccc(cc3)-c3ccccc3)nn2)c2cc(C(O)=O)c(O)cc12)-c1ccccc1,30000,BindingDB
1041,Cn1c(c(-c2cn(CCC(=O)Nc3ccc(cc3)-c3ccccc3)nn2)c2cc(C(O)=O)c(O)cc12)-c1ccccc1,5500,BindingDB
1042,Cn1c(c(-c2cn(CCC(=O)Nc3ccc(cc3)-c3ccccc3)nn2)c2cc(C(O)=O)c(O)cc12)-c1ccccc1,5500,BindingDB
1039,Cn1c(c(-c2cn(CCC(=O)Nc3ccc(cc3)-c3ccccc3)nn2)c2cc(C(O)=O)c(O)cc12)-c1ccccc1,5500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1489,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1F)C(F)(F)F,>30000,BindingDB
1493,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1F)C(F)(F)F,>30000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1490,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1Cl)C(F)(F)F,>30000,BindingDB
1494,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1Cl)C(F)(F)F,>30000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1488,OC(=O)C(=O)Nc1ccc(CCc2ccccc2)cc1,>30000,BindingDB
1496,OC(=O)C(=O)Nc1ccc(CCc2ccccc2)cc1,>30000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1498,OC(=O)C(=O)Nc1ccc(CCc2ccc(cc2)C(F)(F)F)cc1,>30000,BindingDB
1497,OC(=O)C(=O)Nc1ccc(CCc2ccc(cc2)C(F)(F)F)cc1,>30000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1487,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1C(F)(F)F)C(F)(F)F,>30000,BindingDB
1499,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1C(F)(F)F)C(F)(F)F,>30000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1500,OC(=O)C(=O)Nc1ccc(\C=C/c2ccccc2)cc1,>30000,BindingDB
1486,OC(=O)C(=O)Nc1ccc(\C=C/c2ccccc2)cc1,>30000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1495,OC(=O)C(=O)Nc1ccc(\C=C/c2ccc(cc2)C(F)(F)F)cc1,>30000,BindingDB
1501,OC(=O)C(=O)Nc1ccc(\C=C/c2ccc(cc2)C(F)(F)F)cc1,>30000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1504,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1ccc(F)cc1,31900,BindingDB
1506,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1ccc(F)cc1,31900,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1513,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1ccccc1F,34600,BindingDB
1512,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1ccccc1F,34600,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1521,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,37300,BindingDB
604,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,700,BindingDB
605,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,700,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1524,OC(=O)CCC(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,37600,BindingDB
1523,OC(=O)CCC(NC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1)C(O)=O,37600,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1541,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(O)c(Br)c1,44500,BindingDB
1542,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(O)c(Br)c1,44500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1468,CCN1\C(Sc2ccccc12)=C1\SC(=S)N(CC=C)C1=O,22500,BindingDB
1543,CCN1\C(Sc2ccccc12)=C1\SC(=S)N(CC=C)C1=O,45180,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1545,Nc1nc2ccc(Sc3cccc(Cl)c3Cl)nc2[nH]1,46000,BindingDB
1544,Nc1nc2ccc(Sc3cccc(Cl)c3Cl)nc2[nH]1,46000,BindingDB
1547,Nc1nc2ccc(Sc3cccc(Cl)c3Cl)nc2[nH]1,47000,BindingDB
1549,Nc1nc2ccc(Sc3cccc(Cl)c3Cl)nc2[nH]1,47000,BindingDB
1550,Nc1nc2ccc(Sc3cccc(Cl)c3Cl)nc2[nH]1,47000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1548,CC1Cc2cc(ccc2N1C(=O)C(F)(F)F)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,47000,BindingDB
1551,CC1Cc2cc(ccc2N1C(=O)C(F)(F)F)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,47000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1552,OS(=O)(=O)c1ccc2NC(=O)\C(=N/Nc3ccccc3[N+]([O-])=O)c2c1,47260,BindingDB
1006,OS(=O)(=O)c1ccc2NC(=O)\C(=N/Nc3ccccc3[N+]([O-])=O)c2c1,4990,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1553,CN(C)c1cc(O)c(cc1C#Cc1cccc(NC(=O)C(O)=O)c1)C(O)=O,47500,BindingDB
1554,CN(C)c1cc(O)c(cc1C#Cc1cccc(NC(=O)C(O)=O)c1)C(O)=O,47500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1566,COC(=O)c1ccc(cc1O)C#Cc1ccc(NC(=O)C(O)=O)cc1,>50000,BindingDB
1583,COC(=O)c1ccc(cc1O)C#Cc1ccc(NC(=O)C(O)=O)cc1,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1567,COC(=O)c1cc(C#Cc2ccc(NC(=O)C(O)=O)cc2)c(cc1O)N(C)C,>50000,BindingDB
1584,COC(=O)c1cc(C#Cc2ccc(NC(=O)C(O)=O)cc2)c(cc1O)N(C)C,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1568,COC(=O)c1cc(C#Cc2cccc(NC(=O)C(O)=O)c2)c(cc1O)N(C)C,>50000,BindingDB
1585,COC(=O)c1cc(C#Cc2cccc(NC(=O)C(O)=O)c2)c(cc1O)N(C)C,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1586,CC(C)(C)c1ccc(cc1)C#Cc1ccc(NC(=O)C(O)=O)cc1,>50000,BindingDB
1569,CC(C)(C)c1ccc(cc1)C#Cc1ccc(NC(=O)C(O)=O)cc1,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1570,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(OC(F)(F)F)cc1,>50000,BindingDB
1587,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(OC(F)(F)F)cc1,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1571,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1cccc(c1)C(F)(F)F,>50000,BindingDB
1588,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1cccc(c1)C(F)(F)F,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1589,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1)C(F)(F)F,>50000,BindingDB
1572,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1)C(F)(F)F,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1590,NS(=O)(=O)c1ccc(cc1)C#Cc1ccc(NC(=O)C(O)=O)cc1,>50000,BindingDB
1573,NS(=O)(=O)c1ccc(cc1)C#Cc1ccc(NC(=O)C(O)=O)cc1,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1591,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(O)cc1,>50000,BindingDB
1574,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(O)cc1,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1592,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1)C#N,>50000,BindingDB
1575,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc(cc1)C#N,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1593,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1cc(cc(c1)C(F)(F)F)C(F)(F)F,>50000,BindingDB
1576,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1cc(cc(c1)C(F)(F)F)C(F)(F)F,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1594,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc2ccccc2c1,>50000,BindingDB
1577,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccc2ccccc2c1,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1595,CC(C)(C)OC(=O)Nc1ccc(cc1)C#Cc1ccc(NC(=O)C(O)=O)cc1,>50000,BindingDB
1578,CC(C)(C)OC(=O)Nc1ccc(cc1)C#Cc1ccc(NC(=O)C(O)=O)cc1,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1446,Cc1nc(C)n(-c2cccc(Cl)c2Cl)c(=O)c1NC1CCC(C)(N)CC1,20000,BindingDB
1596,Cc1nc(C)n(-c2cccc(Cl)c2Cl)c(=O)c1NC1CCC(C)(N)CC1,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
384,Cc1c(nc(N)n(-c2cccc(Cl)c2Cl)c1=O)N1CCC2(CCC[C@@H]2N)CC1,190,BindingDB
800,Cc1c(nc(N)n(-c2cccc(Cl)c2Cl)c1=O)N1CCC2(CCC[C@@H]2N)CC1,2100,BindingDB
1597,Cc1c(nc(N)n(-c2cccc(Cl)c2Cl)c1=O)N1CCC2(CCC[C@@H]2N)CC1,>50000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1602,C[C@H]1CN(C[C@@H](C)N1)c1ncc(c(N)n1)-c1ccc(Cl)c(Cl)c1,54000,BindingDB
1601,C[C@H]1CN(C[C@@H](C)N1)c1ncc(c(N)n1)-c1ccc(Cl)c(Cl)c1,54000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1607,CC(C)(C)OC(=O)NCC#Cc1ccc(NC(=O)C(O)=O)cc1,58000,BindingDB
1608,CC(C)(C)OC(=O)NCC#Cc1ccc(NC(=O)C(O)=O)cc1,58000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1613,CCOC(=O)CNC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,60300,BindingDB
1612,CCOC(=O)CNC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1,60300,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1615,CC1(N)CCN(CC1)c1cnc(Oc2cccc(Cl)c2Cl)c(N)n1,64000,BindingDB
1616,CC1(N)CCN(CC1)c1cnc(Oc2cccc(Cl)c2Cl)c(N)n1,64427,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1647,OC(=O)C(=O)Nc1cccc(c1)C#Cc1ccccc1,69000,BindingDB
1648,OC(=O)C(=O)Nc1cccc(c1)C#Cc1ccccc1,69000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
334,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccncc4)c3c(=O)n2C)[C@@H]1N,129.0,BindingDB
335,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccncc4)c3c(=O)n2C)[C@@H]1N,129.0,BindingDB
1146,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccncc4)c3c(=O)n2C)[C@@H]1N,8.0,BindingDB
1145,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccncc4)c3c(=O)n2C)[C@@H]1N,8.0,BindingDB
1651,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccncc4)c3c(=O)n2C)[C@@H]1N,8.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1659,NC1C2CC2CC11CCN(CC1)c1cnc(Sc2ccnc(N)c2Cl)c(N)n1,8.3,BindingDB
1658,NC1C2CC2CC11CCN(CC1)c1cnc(Sc2ccnc(N)c2Cl)c(N)n1,8.3,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1689,Cc1c(Nc2nc(nc3cc(CN4CC[C@@H](O)C4)cnc23)C(F)F)cccc1-c1cccc(-c2nc3cc(CN4CCC(CC4)C(O)=O)cc(C#N)c3o2)c1C,<10,BindingDB
1692,Cc1c(Nc2nc(nc3cc(CN4CC[C@@H](O)C4)cnc23)C(F)F)cccc1-c1cccc(-c2nc3cc(CN4CCC(CC4)C(O)=O)cc(C#N)c3o2)c1C,<10,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1690,Cc1c(Nc2nc(nc3cc(CN4CC[C@@H](O)C4)cnc23)C(F)F)cccc1-c1cccc(-c2nc3cc(CN4CC[C@H](C4)C(O)=O)cc(C#N)c3o2)c1C,<10,BindingDB
1693,Cc1c(Nc2nc(nc3cc(CN4CC[C@@H](O)C4)cnc23)C(F)F)cccc1-c1cccc(-c2nc3cc(CN4CC[C@H](C4)C(O)=O)cc(C#N)c3o2)c1C,<10,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1691,Cc1c(Nc2nc(nc3cc(CN4CC[C@@H](O)C4)cnc23)C(F)F)cccc1-c1cccc(-c2nc3cc(CN4CC[C@](C)(C4)C(O)=O)cc(C#N)c3o2)c1C,<10,BindingDB
1694,Cc1c(Nc2nc(nc3cc(CN4CC[C@@H](O)C4)cnc23)C(F)F)cccc1-c1cccc(-c2nc3cc(CN4CC[C@](C)(C4)C(O)=O)cc(C#N)c3o2)c1C,<10,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1700,CN(C)c1cc(O)c(cc1C#Cc1ccc(NC(=O)C(O)=O)cc1)C(O)=O,78500,BindingDB
1649,CN(C)c1cc(O)c(cc1C#Cc1ccc(NC(=O)C(O)=O)cc1)C(O)=O,78500,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1721,C[C@H]1CN(C[C@@H](C)N1)c1ncc(c(N)n1)-c1cccc(Cl)c1,99000,BindingDB
1720,C[C@H]1CN(C[C@@H](C)N1)c1ncc(c(N)n1)-c1cccc(Cl)c1,99000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1300,OC(=O)C(=O)Nc1cccc(c1)-c1ccccc1,>10000,BindingDB
1746,OC(=O)C(=O)Nc1cccc(c1)-c1ccccc1,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1301,OC(=O)C(=O)Nc1cnc2ccccc2c1,>10000,BindingDB
1747,OC(=O)C(=O)Nc1cnc2ccccc2c1,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1750,COc1c2CC(C)(C)Cc2c2C(=O)OC(O)Cc2c1C=O,>100000,BindingDB
1751,COc1c2CC(C)(C)Cc2c2C(=O)OC(O)Cc2c1C=O,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1366,C[C@H]1CN(C[C@@H](C)N1)c1ncc(c(N)n1)-c1cccc(Cl)c1Cl,12000,BindingDB
1363,C[C@H]1CN(C[C@@H](C)N1)c1ncc(c(N)n1)-c1cccc(Cl)c1Cl,12000,BindingDB
1362,C[C@H]1CN(C[C@@H](C)N1)c1ncc(c(N)n1)-c1cccc(Cl)c1Cl,12000,BindingDB
1752,C[C@H]1CN(C[C@@H](C)N1)c1ncc(c(N)n1)-c1cccc(Cl)c1Cl,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1744,OC(=O)C(=O)Nc1ccccc1,>100000,BindingDB
1758,OC(=O)C(=O)Nc1ccccc1,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1745,OC(=O)C(=O)Nc1ccc(cc1)C#C,>100000,BindingDB
1759,OC(=O)C(=O)Nc1ccc(cc1)C#C,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1760,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccccc1,>100000,BindingDB
1754,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccccc1,>100000,BindingDB
1761,OC(=O)C(=O)Nc1ccc(cc1)C#Cc1ccccc1,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1610,COc1cccc(-c2nnc3n(Cc4ccccc4Cl)c(=O)c4ccccc4n23)c1O,60000,BindingDB
1762,COc1cccc(-c2nnc3n(Cc4ccccc4Cl)c(=O)c4ccccc4n23)c1O,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1437,COc1cccc(-c2nnc3n(Cc4ccc(cc4Cl)C(=O)N4CCC[C@H]4C(O)=O)c(=O)c4ccccc4n23)c1O,18900,BindingDB
1763,COc1cccc(-c2nnc3n(Cc4ccc(cc4Cl)C(=O)N4CCC[C@H]4C(O)=O)c(=O)c4ccccc4n23)c1O,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1459,OC(=O)c1ccc(O)c(c1)-c1nnc2n(Cc3ccccc3Cl)c(=O)c3ccccc3n12,21000,BindingDB
1765,OC(=O)c1ccc(O)c(c1)-c1nnc2n(Cc3ccccc3Cl)c(=O)c3ccccc3n12,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1764,Oc1ccccc1-c1nnc2n(Cc3ccccc3Cl)c(=O)c3ccccc3n12,>100000,BindingDB
1766,Oc1ccccc1-c1nnc2n(Cc3ccccc3Cl)c(=O)c3ccccc3n12,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1753,C[C@H]1CN(C[C@@H](C)N1)c1ncc(c(N)n1)-c1cccc(C)c1C,>100000,BindingDB
1767,C[C@H]1CN(C[C@@H](C)N1)c1ncc(c(N)n1)-c1cccc(C)c1C,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1461,OC(=O)c1cccc(N\N=C2/C(=O)Nc3ccc(cc23)C(=O)NCc2ccc(Cl)cc2)c1,21670,BindingDB
1812,OC(=O)c1cccc(N\N=C2/C(=O)Nc3ccc(cc23)C(=O)NCc2ccc(Cl)cc2)c1,>100000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1354,CC1(C)CC[C@@]2(CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)C(C)(C)[C@@H]5CC[C@@]34C)[C@@H]2C1)C(O)=O,11620,BindingDB
1836,CC1(C)CC[C@@]2(CC[C@]3(C)C(=CC[C@@H]4[C@@]5(C)CC[C@H](O)C(C)(C)[C@@H]5CC[C@@]34C)[C@@H]2C1)C(O)=O,>120000,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1871,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1Cl,143600,BindingDB
1872,OC(=O)CCC(=O)N1CCc2cc(ccc12)S(=O)(=O)N1CCN(CC1)c1cccc(Cl)c1Cl,143600,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1698,Cc1nc(Nc2cccc(c2C)-c2cccc(-c3nc4cc(CN5CC[C@H](C5)C(O)=O)cc(C#N)c4o3)c2C)c2ncc(CN3CC[C@@H](O)C3)cc2n1,<10,BindingDB
1943,Cc1nc(Nc2cccc(c2C)-c2cccc(-c3nc4cc(CN5CC[C@H](C5)C(O)=O)cc(C#N)c4o3)c2C)c2ncc(CN3CC[C@@H](O)C3)cc2n1,<10,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1677,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(Cl)c2Cl)c(N)n1,10.0,BindingDB
1950,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(Cl)c2Cl)c(N)n1,10.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1961,C[C@@H]1OCC2(CCN(CC2)c2cnc(Sc3ccnc(N)c3Cl)c(N)n2)[C@@H]1N,11,BindingDB
1964,C[C@@H]1OCC2(CCN(CC2)c2cnc(Sc3ccnc(N)c3Cl)c(N)n2)[C@@H]1N,11,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1972,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccnc2C(F)(F)F)c(N)n1,12,BindingDB
64,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccnc2C(F)(F)F)c(N)n1,40,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1676,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2ccnc(N)c2Cl)cn1,10.0,BindingDB
1983,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2ccnc(N)c2Cl)cn1,13.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
298,CC1(CN)CCN(CC1)c1cnc(Sc2ccnc(Cl)c2Cl)c(N)n1,100.0,BindingDB
2000,CC1(CN)CCN(CC1)c1cnc(Sc2ccnc(Cl)c2Cl)c(N)n1,14.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2016,C[C@H]1OCC2(CCN(CC2)c2nc(C)n(-c3cccc(Cl)c3Cl)c(=O)c2C)[C@@H]1N,15.0,BindingDB
1662,C[C@H]1OCC2(CCN(CC2)c2nc(C)n(-c3cccc(Cl)c3Cl)c(=O)c2C)[C@@H]1N,9.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
6,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,1.000,BindingDB
693,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,1200,BindingDB
2010,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,15.0,BindingDB
2028,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,17.0,BindingDB
20,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,2.00,BindingDB
420,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,270,BindingDB
450,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,330,BindingDB
520,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,530,BindingDB
1115,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,6.00,BindingDB
1116,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,6.00,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2029,CC1(CN)CCN(CC1)c1cnc(Sc2cccc(Cl)c2Cl)c(N)n1,17,BindingDB
266,CC1(CN)CCN(CC1)c1cnc(Sc2cccc(Cl)c2Cl)c(N)n1,88,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1949,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3ccnc(C4CC4)c3Cl)c(=O)n2C)[C@@H]1N,10.0,BindingDB
1699,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3ccnc(C4CC4)c3Cl)c(=O)n2C)[C@@H]1N,10.0,BindingDB
2006,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3ccnc(C4CC4)c3Cl)c(=O)n2C)[C@@H]1N,14.0,BindingDB
1998,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3ccnc(C4CC4)c3Cl)c(=O)n2C)[C@@H]1N,14.0,BindingDB
2031,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3ccnc(C4CC4)c3Cl)c(=O)n2C)[C@@H]1N,17.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1980,Cn1c(nc(N)c(Sc2cccc(Cl)c2Cl)c1=O)N1CCC(C)(CN)CC1,12.0,BindingDB
1978,Cn1c(nc(N)c(Sc2cccc(Cl)c2Cl)c1=O)N1CCC(C)(CN)CC1,12.0,BindingDB
2058,Cn1c(nc(N)c(Sc2cccc(Cl)c2Cl)c1=O)N1CCC(C)(CN)CC1,21.0,BindingDB
105,Cn1c(nc(N)c(Sc2cccc(Cl)c2Cl)c1=O)N1CCC(C)(CN)CC1,49.0,BindingDB
104,Cn1c(nc(N)c(Sc2cccc(Cl)c2Cl)c1=O)N1CCC(C)(CN)CC1,49.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2063,C[C@@H]1OCC2(CCN(CC2)c2nc(C)c(-c3cccc(Cl)c3Cl)c(n2)C(N)=O)[C@@H]1N,22.0,BindingDB
853,C[C@@H]1OCC2(CCN(CC2)c2nc(C)c(-c3cccc(Cl)c3Cl)c(n2)C(N)=O)[C@@H]1N,2600.0,BindingDB
97,C[C@@H]1OCC2(CCN(CC2)c2nc(C)c(-c3cccc(Cl)c3Cl)c(n2)C(N)=O)[C@@H]1N,48.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2068,CNc1nccc(-c2n[nH]c3nc(N4CCC5(CO[C@@H](C)[C@H]5N)CC4)n(C)c(=O)c23)c1Cl,22.0,BindingDB
2067,CNc1nccc(-c2n[nH]c3nc(N4CCC5(CO[C@@H](C)[C@H]5N)CC4)n(C)c(=O)c23)c1Cl,22.0,BindingDB
152,CNc1nccc(-c2n[nH]c3nc(N4CCC5(CO[C@@H](C)[C@H]5N)CC4)n(C)c(=O)c23)c1Cl,54.0,BindingDB
155,CNc1nccc(-c2n[nH]c3nc(N4CCC5(CO[C@@H](C)[C@H]5N)CC4)n(C)c(=O)c23)c1Cl,55.0,BindingDB
156,CNc1nccc(-c2n[nH]c3nc(N4CCC5(CO[C@@H](C)[C@H]5N)CC4)n(C)c(=O)c23)c1Cl,55.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2062,CC1(CN)CCN(CC1)c1cnc(Sc2cccc(N)c2Cl)c(N)n1,22.0,BindingDB
2070,CC1(CN)CCN(CC1)c1cnc(Sc2cccc(N)c2Cl)c(N)n1,22.0,BindingDB
267,CC1(CN)CCN(CC1)c1cnc(Sc2cccc(N)c2Cl)c(N)n1,89.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
331,CC1(CN)CCN(CC1)c1cnc(Sc2ccnc(N)c2Cl)c(N)n1,123.0,BindingDB
2074,CC1(CN)CCN(CC1)c1cnc(Sc2ccnc(N)c2Cl)c(N)n1,23.0,BindingDB
2082,CC1(CN)CCN(CC1)c1cnc(Sc2ccnc(N)c2Cl)c(N)n1,23.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2069,C[C@H]1C[C@@H](N)C2(C1)CCN(CC2)c1nc(N)c(Sc2cccnc2C(F)(F)F)c(=O)n1C,22.0,BindingDB
2072,C[C@H]1C[C@@H](N)C2(C1)CCN(CC2)c1nc(N)c(Sc2cccnc2C(F)(F)F)c(=O)n1C,22.0,BindingDB
2083,C[C@H]1C[C@@H](N)C2(C1)CCN(CC2)c1nc(N)c(Sc2cccnc2C(F)(F)F)c(=O)n1C,23.0,BindingDB
556,C[C@H]1C[C@@H](N)C2(C1)CCN(CC2)c1nc(N)c(Sc2cccnc2C(F)(F)F)c(=O)n1C,4.0,BindingDB
559,C[C@H]1C[C@@H](N)C2(C1)CCN(CC2)c1nc(N)c(Sc2cccnc2C(F)(F)F)c(=O)n1C,4.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2043,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,18.0,BindingDB
2045,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,18.0,BindingDB
2078,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,23.0,BindingDB
2085,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,23.0,BindingDB
148,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,53.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1994,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2ccnc(N)c2Cl)c(N)n1,14,BindingDB
2086,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2ccnc(N)c2Cl)c(N)n1,24,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1341,[O-][V]([O-])([O-])=O,10680.0,BindingDB
1353,[O-][V]([O-])([O-])=O,11620.0,BindingDB
1352,[O-][V]([O-])([O-])=O,11620.0,BindingDB
1351,[O-][V]([O-])([O-])=O,11620.0,BindingDB
1402,[O-][V]([O-])([O-])=O,14720.0,BindingDB
789,[O-][V]([O-])([O-])=O,2000.0,BindingDB
2089,[O-][V]([O-])([O-])=O,24.0,BindingDB
1474,[O-][V]([O-])([O-])=O,24050.0,BindingDB
1515,[O-][V]([O-])([O-])=O,34660.0,BindingDB
1175,[O-][V]([O-])([O-])=O,7430.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2106,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,26.0,BindingDB
2103,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,26.0,BindingDB
71,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,41.0,BindingDB
290,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,96.0,BindingDB
289,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@@H]1N,96.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2113,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(Sc4ccnc(N)c4Cl)c3c(=O)n2C)[C@@H]1N,27.0,BindingDB
2110,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(Sc4ccnc(N)c4Cl)c3c(=O)n2C)[C@@H]1N,27.0,BindingDB
2109,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(Sc4ccnc(N)c4Cl)c3c(=O)n2C)[C@@H]1N,27.0,BindingDB
288,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(Sc4ccnc(N)c4Cl)c3c(=O)n2C)[C@@H]1N,96.0,BindingDB
287,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(Sc4ccnc(N)c4Cl)c3c(=O)n2C)[C@@H]1N,96.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2026,C[C@H]1OCC2(CCN(CC2)c2cnc(Sc3ccnc(N)c3Cl)c(N)n2)[C@@H]1N,17,BindingDB
2116,C[C@H]1OCC2(CCN(CC2)c2cnc(Sc3ccnc(N)c3Cl)c(N)n2)[C@@H]1N,28,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1976,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4cccc(Cl)c4Cl)c3c(=O)n2C)[C@@H]1N,12.0,BindingDB
1979,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4cccc(Cl)c4Cl)c3c(=O)n2C)[C@@H]1N,12.0,BindingDB
2124,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4cccc(Cl)c4Cl)c3c(=O)n2C)[C@@H]1N,28.0,BindingDB
2120,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4cccc(Cl)c4Cl)c3c(=O)n2C)[C@@H]1N,28.0,BindingDB
2121,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4cccc(Cl)c4Cl)c3c(=O)n2C)[C@@H]1N,28.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2123,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(Cl)c4Cl)c3c(=O)n2C)[C@@H]1N,28,BindingDB
2125,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(Cl)c4Cl)c3c(=O)n2C)[C@@H]1N,28,BindingDB
276,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(Cl)c4Cl)c3c(=O)n2C)[C@@H]1N,93,BindingDB
277,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(Cl)c4Cl)c3c(=O)n2C)[C@@H]1N,93,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2126,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3ccnc(C)c3Cl)c(=O)n2C)[C@@H]1N,28,BindingDB
2129,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3ccnc(C)c3Cl)c(=O)n2C)[C@@H]1N,28,BindingDB
101,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3ccnc(C)c3Cl)c(=O)n2C)[C@@H]1N,48,BindingDB
98,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3ccnc(C)c3Cl)c(=O)n2C)[C@@H]1N,48,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
385,CC1(N)CCN(CC1)c1cnc(Sc2cccc(Cl)c2Cl)c(N)n1,195,BindingDB
2130,CC1(N)CCN(CC1)c1cnc(Sc2cccc(Cl)c2Cl)c(N)n1,29,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2042,C[C@@H]1CC2(CCN(CC2)c2nc(N)c(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@H](N)[C@H]1F,18.0,BindingDB
2044,C[C@@H]1CC2(CCN(CC2)c2nc(N)c(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@H](N)[C@H]1F,18.0,BindingDB
2131,C[C@@H]1CC2(CCN(CC2)c2nc(N)c(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@H](N)[C@H]1F,29.0,BindingDB
587,C[C@@H]1CC2(CCN(CC2)c2nc(N)c(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@H](N)[C@H]1F,5.0,BindingDB
584,C[C@@H]1CC2(CCN(CC2)c2nc(N)c(Sc3cccnc3C(F)(F)F)c(=O)n2C)[C@H](N)[C@H]1F,5.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
380,CC1(CN)CCN(CC1)c1cnc(Sc2cccnc2C(F)(F)F)c(N)n1,187.0,BindingDB
2133,CC1(CN)CCN(CC1)c1cnc(Sc2cccnc2C(F)(F)F)c(N)n1,29.0,BindingDB
2132,CC1(CN)CCN(CC1)c1cnc(Sc2cccnc2C(F)(F)F)c(N)n1,29.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
718,CC1(N)CCN(CC1)c1nc(Cl)c(c(n1)C(N)=O)-c1cccc(Cl)c1Cl,1400.0,BindingDB
2136,CC1(N)CCN(CC1)c1nc(Cl)c(c(n1)C(N)=O)-c1cccc(Cl)c1Cl,30.0,BindingDB
168,CC1(N)CCN(CC1)c1nc(Cl)c(c(n1)C(N)=O)-c1cccc(Cl)c1Cl,60.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
838,COc1ccc(cc1)-n1cc(-c2ccc3ccc(=O)oc3c2)c2cc(cnc12)-c1ccc(cc1)C(O)=O,2500,BindingDB
2144,COc1ccc(cc1)-n1cc(-c2ccc3ccc(=O)oc3c2)c2cc(cnc12)-c1ccc(cc1)C(O)=O,31,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
12,C[C@@H]1OCC2(CCN(CC2)c2nc(C)c(nc2CO)-c2cccc(Cl)c2Cl)[C@@H]1N,1.5,BindingDB
2145,C[C@@H]1OCC2(CCN(CC2)c2nc(C)c(nc2CO)-c2cccc(Cl)c2Cl)[C@@H]1N,31.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2141,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC2(CCC[C@H]2N)CC1,30,BindingDB
2139,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC2(CCC[C@H]2N)CC1,30,BindingDB
2147,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC2(CCC[C@H]2N)CC1,31,BindingDB
2146,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC2(CCC[C@H]2N)CC1,31,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2151,C[C@@H]1OCC2(CCN(CC2)c2cnc3c(n[nH]c3n2)-c2cccc(Cl)c2Cl)[C@@H]1N,31.0,BindingDB
2150,C[C@@H]1OCC2(CCN(CC2)c2cnc3c(n[nH]c3n2)-c2cccc(Cl)c2Cl)[C@@H]1N,31.0,BindingDB
1111,C[C@@H]1OCC2(CCN(CC2)c2cnc3c(n[nH]c3n2)-c2cccc(Cl)c2Cl)[C@@H]1N,6.0,BindingDB
1110,C[C@@H]1OCC2(CCN(CC2)c2cnc3c(n[nH]c3n2)-c2cccc(Cl)c2Cl)[C@@H]1N,6.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
663,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccnc3C(F)(F)F)c(=O)[nH]2)[C@@H]1N,1060.0,BindingDB
664,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccnc3C(F)(F)F)c(=O)[nH]2)[C@@H]1N,1060.0,BindingDB
2149,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccnc3C(F)(F)F)c(=O)[nH]2)[C@@H]1N,31.0,BindingDB
2148,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccnc3C(F)(F)F)c(=O)[nH]2)[C@@H]1N,31.0,BindingDB
2152,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccnc3C(F)(F)F)c(=O)[nH]2)[C@@H]1N,31.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2153,C[C@H]1C[C@@H](N)C2(C1)CCN(CC2)c1nc2[nH]nc(-c3ccnc(N)c3Cl)c2c(=O)n1C,32.0,BindingDB
2154,C[C@H]1C[C@@H](N)C2(C1)CCN(CC2)c1nc2[nH]nc(-c3ccnc(N)c3Cl)c2c(=O)n1C,32.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
307,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(N)c4Cl)c3c(=O)n2C)[C@@H]1N,103,BindingDB
306,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(N)c4Cl)c3c(=O)n2C)[C@@H]1N,103,BindingDB
2155,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(N)c4Cl)c3c(=O)n2C)[C@@H]1N,32,BindingDB
2159,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(N)c4Cl)c3c(=O)n2C)[C@@H]1N,32,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2162,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3cccc(Cl)c3Cl)c(=O)n2C)[C@@H]1N,33.0,BindingDB
585,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3cccc(Cl)c3Cl)c(=O)n2C)[C@@H]1N,5.0,BindingDB
581,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3cccc(Cl)c3Cl)c(=O)n2C)[C@@H]1N,5.0,BindingDB
586,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3cccc(Cl)c3Cl)c(=O)n2C)[C@@H]1N,5.0,BindingDB
583,C[C@@H]1OCC2(CCN(CC2)c2nc(N)c(Sc3cccc(Cl)c3Cl)c(=O)n2C)[C@@H]1N,5.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2170,Cn1c(nc(N)c(Sc2cccnc2C(F)(F)F)c1=O)N1CCC2(COC[C@H]2N)CC1,34.0,BindingDB
2167,Cn1c(nc(N)c(Sc2cccnc2C(F)(F)F)c1=O)N1CCC2(COC[C@H]2N)CC1,34.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2164,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(CN)CC1,34.0,BindingDB
2171,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(CN)CC1,34.0,BindingDB
2165,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(CN)CC1,34.0,BindingDB
461,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(CN)CC1,355.0,BindingDB
460,Cn1c(nc2[nH]nc(-c3cccc(Cl)c3Cl)c2c1=O)N1CCC(C)(CN)CC1,355.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
414,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CCC5)c4Cl)c3c(=O)n2C)[C@@H]1N,260.0,BindingDB
413,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CCC5)c4Cl)c3c(=O)n2C)[C@@H]1N,260.0,BindingDB
2174,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CCC5)c4Cl)c3c(=O)n2C)[C@@H]1N,35.0,BindingDB
456,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CCC5)c4Cl)c3c(=O)n2C)[C@@H]1N,350.0,BindingDB
457,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CCC5)c4Cl)c3c(=O)n2C)[C@@H]1N,350.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2177,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC5)c4Cl)c3c(=O)n2C)[C@@H]1N,36.0,BindingDB
2178,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC5)c4Cl)c3c(=O)n2C)[C@@H]1N,36.0,BindingDB
2179,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC5)c4Cl)c3c(=O)n2C)[C@@H]1N,36.0,BindingDB
2180,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC5)c4Cl)c3c(=O)n2C)[C@@H]1N,36.0,BindingDB
2176,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC5)c4Cl)c3c(=O)n2C)[C@@H]1N,36.0,BindingDB


******************************


Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
2187,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccc(N)nc4C)c3c(=O)n2C)[C@@H]1N,37.0,BindingDB
2188,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccc(N)nc4C)c3c(=O)n2C)[C@@H]1N,37.0,BindingDB
50,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccc(N)nc4C)c3c(=O)n2C)[C@@H]1N,37.0,BindingDB
56,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccc(N)nc4C)c3c(=O)n2C)[C@@H]1N,38.0,BindingDB
52,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccc(N)nc4C)c3c(=O)n2C)[C@@H]1N,38.0,BindingDB


******************************


In [37]:
not_duplicates = bindingDB_data.loc[~bindingDB_data.iloc[:, 0].duplicated(), :]
not_duplicates.shape

(1738, 3)

In [38]:
force_show_all(not_duplicates)

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@H]2N)CC1,0.800000,BindingDB
1,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3[C@@H]2N)CC1,0.800,BindingDB
2,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000000,BindingDB
3,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.000,BindingDB
5,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(O)nc4CCCCn4c3=O)c2Cl)c(N)n1,1.000000,BindingDB
6,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl)c2Cl)c(n1)C(N)=O,1.000,BindingDB
7,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C(Cl)=CCC2)c(n1)C(N)=O,1.000,BindingDB
9,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)c2Cl)c2nccn12,1.20,BindingDB
10,Cc1nc(N2CCC3(CC2)Oc2ccccc2[C@H]3N)c(C)c(=O)n1-c1cccc(Cl)c1Cl,1.40,BindingDB
12,C[C@@H]1OCC2(CCN(CC2)c2nc(C)c(nc2CO)-c2cccc(Cl)c2Cl)[C@@H]1N,1.5,BindingDB


In [39]:
not_duplicates.iloc[:, 0].duplicated().sum()

0

In [40]:
unique_df = pd.concat([not_duplicates, duplicates])
unique_df

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.800000,BindingDB
1,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.800,BindingDB
2,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1...,1.000000,BindingDB
3,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-...,1.000,BindingDB
5,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,1.000000,BindingDB
...,...,...,...
2178,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC...,36,BindingDB
2179,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC...,36,BindingDB
2180,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC...,36,BindingDB
2187,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccc(N)nc4C...,37,BindingDB


In [41]:
unique_df.shape

(2408, 3)

In [42]:
unique_df.iloc[:, 0].duplicated().sum()

670

In [43]:
duplicates_uni = unique_df.loc[unique_df.iloc[:, 0].duplicated(), :]
duplicates_uni

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.800000,BindingDB
2,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1...,1.000000,BindingDB
3,Cc1nc(N2CCC3(Cc4ccccc4[C@H]3N)CC2)c(C)c(=O)n1-...,1.000,BindingDB
4,Cc1nc(N2CCC3(Cc4ncccc4[C@@H]3N)CC2)c(C)c(=O)n1...,1.000,BindingDB
6,N[C@H]1c2cccnc2OC11CCN(CC1)c1nc(N)c(-c2cccc(Cl...,1.000,BindingDB
...,...,...,...
2178,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC...,36,BindingDB
2179,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC...,36,BindingDB
2180,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(NC5CC...,36,BindingDB
2187,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccc(N)nc4C...,37,BindingDB


In [44]:
# dropping duplicate values
unique_df.drop_duplicates(subset ="Ligand SMILES" ,keep='first',inplace=True)
unique_df.shape

(1738, 3)

## Removing duplicate molecules

In [45]:
# dropping duplicate values
bindingDB_data.drop_duplicates(subset ="Ligand SMILES" ,keep='last',inplace=True)
bindingDB_data

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.800,BindingDB
5,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,1.000000,BindingDB
7,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C...,1.000,BindingDB
9,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)...,1.20,BindingDB
13,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccc(Cl)c3Cl)c3n...,1.50,BindingDB
...,...,...,...
2183,C[C@H]1OCC2(CCN(CC2)c2cc(=O)n(c(C)n2)-c2cccc(C...,37.0,BindingDB
2184,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(N)c4)...,37.0,BindingDB
2185,Cn1c(ncc(Sc2cccc(Cl)c2Cl)c1=O)N1CCC2(COC[C@H]2...,37.0,BindingDB
2186,Cn1c(nc2[nH]nc(-c3ccccc3)c2c1=O)N1CCC2(C[C@H](...,37.0,BindingDB


In [46]:
bindingDB_data.iloc[:, 0].duplicated().sum()

0

In [47]:
bindingDB_data.shape

(1738, 3)

In [48]:
def conversion_for_signs(s):
    if s[0] == '>' or s[0] == '<':
        return s[0] + str(float(s[1:])/1000)
    else:
        return str(float(s)/1000)

In [49]:
bindingDB_data['IC50 (nM)'] = bindingDB_data['IC50 (nM)'].apply(conversion_for_signs)
bindingDB_data

Unnamed: 0,Ligand SMILES,IC50 (nM),database_label
1,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.0008,BindingDB
5,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,0.001,BindingDB
7,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C...,0.001,BindingDB
9,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)...,0.0012,BindingDB
13,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccc(Cl)c3Cl)c3n...,0.0015,BindingDB
...,...,...,...
2183,C[C@H]1OCC2(CCN(CC2)c2cc(=O)n(c(C)n2)-c2cccc(C...,0.037,BindingDB
2184,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(N)c4)...,0.037,BindingDB
2185,Cn1c(ncc(Sc2cccc(Cl)c2Cl)c1=O)N1CCC2(COC[C@H]2...,0.037,BindingDB
2186,Cn1c(nc2[nH]nc(-c3ccccc3)c2c1=O)N1CCC2(C[C@H](...,0.037,BindingDB


In [50]:
bindingDB_data.columns = ['SMILES', 'IC50(microM)','database_label']
bindingDB_data

Unnamed: 0,SMILES,IC50(microM),database_label
1,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.0008,BindingDB
5,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,0.001,BindingDB
7,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C...,0.001,BindingDB
9,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)...,0.0012,BindingDB
13,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccc(Cl)c3Cl)c3n...,0.0015,BindingDB
...,...,...,...
2183,C[C@H]1OCC2(CCN(CC2)c2cc(=O)n(c(C)n2)-c2cccc(C...,0.037,BindingDB
2184,C[C@@H]1OCC2(CCN(CC2)c2nc3[nH]nc(-c4ccnc(N)c4)...,0.037,BindingDB
2185,Cn1c(ncc(Sc2cccc(Cl)c2Cl)c1=O)N1CCC2(COC[C@H]2...,0.037,BindingDB
2186,Cn1c(nc2[nH]nc(-c3ccccc3)c2c1=O)N1CCC2(C[C@H](...,0.037,BindingDB


# Review Papers

In [51]:
Literature_1_df = pd.read_csv('..\Raw_data\Literature\Literature_1.csv')
Literature_1_df  = Literature_1_df [['Ligand SMILES', 'IC50 (micro)']]
Literature_1_df .columns = ['SMILES', 'IC50(microM)']
Literature_1_df ['database_label'] = 'Literature_1'
Literature_1_df 

Unnamed: 0,SMILES,IC50(microM),database_label
0,O=C1C(O)=CC(C(O)=O)=CC2=CC=C(O)C(O)=C21,0.097,Literature_1
1,NC1=CC2=NSN=C2C=C1C#C,2.110,Literature_1
2,OC1=C(C(N2C3=CC=C(C=C3SC2=N1)OC)=O)CC4=CC=CC=C4,10.000,Literature_1
3,O=C(C=C)NC1=C2C=CC=CC2=NC=C1,35.000,Literature_1
4,O=C1N(C2=CCCN(C2)CC3=CC(C(/C=C/C4=CC=C(Cl)C=C4...,9.800,Literature_1
...,...,...,...
56,CCCCC/C=C\C/C=C\CCCCCCCCCCCCC(O)=O,23.900,Literature_1
57,OC1=CC(/C=C/C(O)=O)=CC=C1O,26.600,Literature_1
58,O[C@@H]([C@@H]([C@H]([C@H](CO)O1)O)O)C1OCC(O)C...,20.700,Literature_1
59,[H][C@]1(O[C@@]2(OC[C@H](C)CC2)[C@H]3C)C[C@@]4...,1.700,Literature_1


In [52]:
Literature_2_df  = pd.read_csv('..\Raw_data\Literature\Literature_2.csv')
Literature_2_df  = Literature_2_df [['Ligand SMILES', 'IC50 (microM)']]
Literature_2_df .columns = ['SMILES', 'IC50(microM)']
Literature_2_df ['database_label'] = 'Literature_2'
Literature_2_df 

Unnamed: 0,SMILES,IC50(microM),database_label
0,ClC1=C(Cl)C(C2=NC=C(N3CCC(N)(C)CC3)N=C2N)=CC=C1,0.07,Literature_2
1,ClC1=C(Cl)C(C2=C(N)N=C(N3C[C@H](C)N[C@H](C)C3)...,12.00,Literature_2
2,ClC1=CC=CC=C1CN2C3=NN=C(C4=C(O)C(OC)=CC=C4)N3C...,60.00,Literature_2
3,O=S(C1=CC=C2C(C=CC(/N=N/C3=CC(S(=O)(O)=O)=C(C=...,0.30,Literature_2
4,O=C1CC2=CC=C(S(NCC3=CC=C(F)C=C3)(=O)=O)C=C2/C1...,1.00,Literature_2
...,...,...,...
90,NC1=C(N2CCC(CN)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.64,Literature_2
91,NC1=C(N2CCC(C)(N)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.58,Literature_2
92,NC1=C(NC2=CC(N)=CC=C2)N=CC(C3=CC=CN=C3)=C1,9.47,Literature_2
93,NCC(CC1)CCN1C2=C(CO)C=C(C3=CC=CC(Cl)=C3Cl)C=N2,1.36,Literature_2


In [53]:
review_paper_data = pd.concat([Literature_1_df,Literature_2_df])
review_paper_data

Unnamed: 0,SMILES,IC50(microM),database_label
0,O=C1C(O)=CC(C(O)=O)=CC2=CC=C(O)C(O)=C21,0.097,Literature_1
1,NC1=CC2=NSN=C2C=C1C#C,2.110,Literature_1
2,OC1=C(C(N2C3=CC=C(C=C3SC2=N1)OC)=O)CC4=CC=CC=C4,10.000,Literature_1
3,O=C(C=C)NC1=C2C=CC=CC2=NC=C1,35.000,Literature_1
4,O=C1N(C2=CCCN(C2)CC3=CC(C(/C=C/C4=CC=C(Cl)C=C4...,9.800,Literature_1
...,...,...,...
90,NC1=C(N2CCC(CN)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.640,Literature_2
91,NC1=C(N2CCC(C)(N)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.580,Literature_2
92,NC1=C(NC2=CC(N)=CC=C2)N=CC(C3=CC=CN=C3)=C1,9.470,Literature_2
93,NCC(CC1)CCN1C2=C(CO)C=C(C3=CC=CC(Cl)=C3Cl)C=N2,1.360,Literature_2


In [None]:
#review_paper_data[review_paper_data['SMILES']=='']

# Combined data = BindingDB + Review Paper

In [54]:
bindingDB_data['database_label'].value_counts()

BindingDB    1738
Name: database_label, dtype: int64

In [55]:
review_paper_data['database_label'].value_counts()

Literature_2    95
Literature_1    61
Name: database_label, dtype: int64

In [56]:
combined_data = pd.concat([bindingDB_data,review_paper_data])
combined_data

Unnamed: 0,SMILES,IC50(microM),database_label
1,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.0008,BindingDB
5,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,0.001,BindingDB
7,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C...,0.001,BindingDB
9,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)...,0.0012,BindingDB
13,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccc(Cl)c3Cl)c3n...,0.0015,BindingDB
...,...,...,...
90,NC1=C(N2CCC(CN)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.64,Literature_2
91,NC1=C(N2CCC(C)(N)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.58,Literature_2
92,NC1=C(NC2=CC(N)=CC=C2)N=CC(C3=CC=CN=C3)=C1,9.47,Literature_2
93,NCC(CC1)CCN1C2=C(CO)C=C(C3=CC=CC(Cl)=C3Cl)C=N2,1.36,Literature_2


In [57]:
combined_data = combined_data.reset_index()
combined_data = combined_data.drop(['index'],axis=1)
combined_data

Unnamed: 0,SMILES,IC50(microM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.0008,BindingDB
1,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,0.001,BindingDB
2,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C...,0.001,BindingDB
3,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)...,0.0012,BindingDB
4,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccc(Cl)c3Cl)c3n...,0.0015,BindingDB
...,...,...,...
1889,NC1=C(N2CCC(CN)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.64,Literature_2
1890,NC1=C(N2CCC(C)(N)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.58,Literature_2
1891,NC1=C(NC2=CC(N)=CC=C2)N=CC(C3=CC=CN=C3)=C1,9.47,Literature_2
1892,NCC(CC1)CCN1C2=C(CO)C=C(C3=CC=CC(Cl)=C3Cl)C=N2,1.36,Literature_2


In [58]:
combined_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1894 entries, 0 to 1893
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   SMILES          1894 non-null   object
 1   IC50(microM)    1894 non-null   object
 2   database_label  1894 non-null   object
dtypes: object(3)
memory usage: 44.5+ KB


## Missing Values

In [59]:
combined_data.isnull().sum()

SMILES            0
IC50(microM)      0
database_label    0
dtype: int64

## Removing Duplicates

In [60]:
combined_data.iloc[:, 0].duplicated().sum()

6

In [61]:
combined_duplicates = combined_data.loc[combined_data.iloc[:, 0].duplicated(keep=False), :]
force_show_all(combined_duplicates)

Unnamed: 0,SMILES,IC50(microM),database_label
1739,NC1=CC2=NSN=C2C=C1C#C,2.11,Literature_1
1743,NC1=NC(N2CCC(C)(N)CC2)=CN=C1C3=CC=CC(Cl)=C3Cl,0.07,Literature_1
1748,O=C(CCN1C=C(N=N1)C(C2=C(C=C(C(C(O)=O)=C2)O)N3C)=C3C4=CC=CC=C4)NC(C=C5)=CC=C5C6=CC=CC=C6,5.5,Literature_1
1750,NC1=NC(N2CCC(C)(N)CC2)=CN=C1C3=CC=CC(Cl)=C3Cl,0.071,Literature_1
1769,O=C(CCN1C=C(N=N1)C(C2=C(C=C(C(C(O)=O)=C2)O)N3C)=C3C4=CC=CC=C4)NC(C=C5)=CC=C5C6=CC=CC=C6,5.5,Literature_1
1798,O=C1O[C@@]2(O)CC[C@@]3(C)[C@](CC[C@]3([H])[C@H](C)/C=C/[C@H](C)C(C)C)([H])C2=C1,6.75,Literature_1
1799,ClC1=C(Cl)C(C2=NC=C(N3CCC(N)(C)CC3)N=C2N)=CC=C1,0.07,Literature_2
1801,ClC1=CC=CC=C1CN2C3=NN=C(C4=C(O)C(OC)=CC=C4)N3C5=CC=CC=C5C2=O,60.0,Literature_2
1815,O=C1O[C@@]2(O)CC[C@@]3(C)[C@](CC[C@]3([H])[C@H](C)/C=C/[C@H](C)C(C)C)([H])C2=C1,0.02,Literature_2
1845,ClC1=C(Cl)C(C2=NC=C(N3CCC(N)(C)CC3)N=C2N)=CC=C1,0.07,Literature_2


In [62]:
combined_data.drop_duplicates(subset ="SMILES" ,keep='last',inplace=True)
combined_data

Unnamed: 0,SMILES,IC50(microM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.0008,BindingDB
1,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,0.001,BindingDB
2,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C...,0.001,BindingDB
3,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)...,0.0012,BindingDB
4,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccc(Cl)c3Cl)c3n...,0.0015,BindingDB
...,...,...,...
1889,NC1=C(N2CCC(CN)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.64,Literature_2
1890,NC1=C(N2CCC(C)(N)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.58,Literature_2
1891,NC1=C(NC2=CC(N)=CC=C2)N=CC(C3=CC=CN=C3)=C1,9.47,Literature_2
1892,NCC(CC1)CCN1C2=C(CO)C=C(C3=CC=CC(Cl)=C3Cl)C=N2,1.36,Literature_2


In [63]:
combined_data['database_label'].value_counts()

BindingDB       1738
Literature_2      93
Literature_1      57
Name: database_label, dtype: int64

## SMILES Validity

In [64]:
def check_smiles_validity(df):
    smiles = list(df['SMILES'].values)
    invalid_smile_indices = []
    invalid_chemistry_indices = []

    for idx, smile in enumerate(tqdm(smiles)):
        m = Chem.MolFromSmiles(smile, sanitize=False)
        if m is None:
            print(idx, smile, "invalid smile")
            invalid_smile_indices.append(idx)
        else:
            try:
                Chem.SanitizeMol(m)
            except:
                print(idx, smile, "invalid chemistry")
                invalid_chemistry_indices.append(idx)
  
    return invalid_smile_indices, invalid_chemistry_indices

In [65]:
invalid_smiles, invalid_chemistry = check_smiles_validity(combined_data)

  0%|          | 0/1888 [00:00<?, ?it/s]

626 Clc1ccc2O[V]3(=O)(Oc4ccc(Cl)cc4C=[N]3c3ccccc3)[N](=Cc2c1)c1ccccc1 invalid chemistry
647 [O-][N+](=O)c1ccc2O[V]3(=O)(Oc4ccc(cc4C=[N]3c3ccccc3)[N+]([O-])=O)[N](=Cc2c1)c1ccccc1 invalid chemistry
734 [OH2+][V]12([OH2+])(=O)OC(=O)c3ccccc3[N]1=Cc1cc(ccc1O2)[N+]([O-])=O invalid chemistry
1187 Brc1ccc2O[V]3(=O)(Oc4ccc(Br)cc4C=[N]3c3ccccc3)[N](=Cc2c1)c1ccccc1 invalid chemistry
1219 [OH2+][V]12([OH2+])(=O)OC(=O)c3ccccc3[N]1=Cc1cc(Br)ccc1O2 invalid chemistry
1269 ON(=O)c1cccc(N\N=C2/C(=O)Nc3ccc(cc23)S(O)(=O)=O)c1 invalid chemistry
1430 ON(=O)c1cccc(N\N=C2/C(=O)Nc3ccccc23)c1 invalid chemistry
1870 O=c(C1#NNC2=CC=C(S(=O)(O)=O)C=C2)n(C3=CC=CC=C3)N=C1C4=CC=C([N+]([O-])=O)C=C4 invalid chemistry
1878 O=C(C1CCN(C(C2=CC=CO2)=O)CC1)NC3=CC=C(C4=CC=C(NC(C5CCN(C(C6=CC=CO6)=O)CC5)#[OH])C=C4)C=C3 invalid chemistry


[23:56:00] Explicit valence for atom # 17 N, 4, is greater than permitted
[23:56:00] Explicit valence for atom # 18 N, 4, is greater than permitted
[23:56:00] Explicit valence for atom # 13 N, 4, is greater than permitted
[23:56:00] Explicit valence for atom # 17 N, 4, is greater than permitted
[23:56:00] Explicit valence for atom # 13 N, 4, is greater than permitted
[23:56:00] Explicit valence for atom # 1 N, 4, is greater than permitted
[23:56:00] Explicit valence for atom # 1 N, 4, is greater than permitted
[23:56:01] Explicit valence for atom # 2 C, 5, is greater than permitted
[23:56:01] Explicit valence for atom # 25 C, 5, is greater than permitted


In [66]:
invalid_smiles

[]

In [67]:
invalid_chemistry

[626, 647, 734, 1187, 1219, 1269, 1430, 1870, 1878]

In [68]:
force_show_all(combined_data.loc[invalid_chemistry])

Unnamed: 0,SMILES,IC50(microM),database_label
626,Clc1ccc2O[V]3(=O)(Oc4ccc(Cl)cc4C=[N]3c3ccccc3)[N](=Cc2c1)c1ccccc1,2.3,BindingDB
647,[O-][N+](=O)c1ccc2O[V]3(=O)(Oc4ccc(cc4C=[N]3c3ccccc3)[N+]([O-])=O)[N](=Cc2c1)c1ccccc1,2.6,BindingDB
734,[OH2+][V]12([OH2+])(=O)OC(=O)c3ccccc3[N]1=Cc1cc(ccc1O2)[N+]([O-])=O,4.2,BindingDB
1187,Brc1ccc2O[V]3(=O)(Oc4ccc(Br)cc4C=[N]3c3ccccc3)[N](=Cc2c1)c1ccccc1,35.0,BindingDB
1219,[OH2+][V]12([OH2+])(=O)OC(=O)c3ccccc3[N]1=Cc1cc(Br)ccc1O2,49.0,BindingDB
1269,ON(=O)c1cccc(N\N=C2/C(=O)Nc3ccc(cc23)S(O)(=O)=O)c1,66.5,BindingDB
1430,ON(=O)c1cccc(N\N=C2/C(=O)Nc3ccccc23)c1,>100.0,BindingDB
1870,O=C1C(SC2=C(C(F)(F)F)N=CC=C2)=CN=C(N3CCC4(CO[C@@H](C)[C@H]4N)CC3)N1,0.031,Literature_2
1878,O=C(O)[C@H]([C@@]1([H])NC(C(O)=O)=C(C)CS1)NC([C@H](S(=O)(O)=O)C2=CC=CC=C2)=O,1.38,Literature_2


In [None]:
# combined_data.loc[invalid_chemistry[0],'SMILES'] = r"Clc1cc2c(O[V]3(Oc4c(CN3c5ccccc5)cc(Cl)cc4)(N(c6ccccc6)C2)=O)cc1"
# combined_data.loc[invalid_chemistry[1],'SMILES'] = r"[O-][N+](c1cc2c(O[V]3(Oc4c(CN3c5ccccc5)cc([N+]([O-])=O)cc4)(N(c6ccccc6)C2)=O)cc1)=O"
# combined_data.loc[invalid_chemistry[2],'SMILES'] = r"[OH2+][V]12([OH2+])(OC(c3c(N1Cc4c(O2)ccc([N+]([O-])=O)c4)cccc3)=O)=O"
# combined_data.loc[invalid_chemistry[3],'SMILES'] = r"Brc1cc2c(O[V]3(Oc4c(CN3c5ccccc5)cc(Br)cc4)(N(c6ccccc6)C2)=O)cc1"
# combined_data.loc[invalid_chemistry[4],'SMILES'] = r"[OH2+][V]12([OH2+])(OC(c3ccccc3N1Cc4cc(Br)ccc4O2)=O)=O"
# combined_data.loc[invalid_chemistry[5],'SMILES'] = r"O=Nc1cccc(N/N=C2C(Nc3ccc(S(O)(=O)=O)cc3\2)=O)c1"
# combined_data.loc[invalid_chemistry[6],'SMILES'] = r"O=Nc1cccc(N/N=C2C(Nc3ccccc3\2)=O)c1"
# combined_data.loc[invalid_chemistry[7],'SMILES'] = r"O=c(C1=NNC2=CC=C(C=C2)S(=O)(O)=O)n(N=C1C3=CC=C(C=C3)[N+]([O-])=O)C4=CC=CC=C4"
# combined_data.loc[invalid_chemistry[8],'SMILES'] = r"O=C(NC1=CC=C(C=C1)C2=CC=C(C=C2)NC(C3CCN(CC3)C(C4=CC=CO4)=O)=O)C5CCN(CC5)C(C6=CC=CO6)=O"

In [69]:
def modify_dataframe(data, indices):
    data = data.drop(indices, axis=0)
    data = data.reset_index()
    data = data.drop(['index'],axis=1)
    return data

In [70]:
combined_data = modify_dataframe(combined_data, invalid_chemistry)
combined_data

Unnamed: 0,SMILES,IC50(microM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.0008,BindingDB
1,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,0.001,BindingDB
2,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C...,0.001,BindingDB
3,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)...,0.0012,BindingDB
4,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccc(Cl)c3Cl)c3n...,0.0015,BindingDB
...,...,...,...
1874,NC1=C(N2CCC(CN)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.64,Literature_2
1875,NC1=C(N2CCC(C)(N)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.58,Literature_2
1876,NC1=C(NC2=CC(N)=CC=C2)N=CC(C3=CC=CN=C3)=C1,9.47,Literature_2
1877,NCC(CC1)CCN1C2=C(CO)C=C(C3=CC=CC(Cl)=C3Cl)C=N2,1.36,Literature_2


In [71]:
combined_data.shape

(1879, 3)

In [72]:
invalid_smiles, invalid_chemistry = check_smiles_validity(combined_data)

  0%|          | 0/1879 [00:00<?, ?it/s]

1862 O=c(C1#NNC2=CC=C(S(=O)(O)=O)C=C2)n(C3=CC=CC=C3)N=C1C4=CC=C([N+]([O-])=O)C=C4 invalid chemistry
1869 O=C(C1CCN(C(C2=CC=CO2)=O)CC1)NC3=CC=C(C4=CC=C(NC(C5CCN(C(C6=CC=CO6)=O)CC5)#[OH])C=C4)C=C3 invalid chemistry


[23:56:34] Explicit valence for atom # 2 C, 5, is greater than permitted
[23:56:34] Explicit valence for atom # 25 C, 5, is greater than permitted


In [73]:
# combined_data.loc[invalid_chemistry[0],'SMILES'] = r'O=c(C1=NNC2=CC=C(C=C2)S(=O)(O)=O)n(N=C1C3=CC=C(C=C3)[N+]([O-])=O)C4=CC=CC=C4'
# combined_data.loc[invalid_chemistry[1],'SMILES'] = r'O=C(NC1=CC=C(C=C1)C2=CC=C(C=C2)NC(C3CCN(CC3)C(C4=CC=CO4)=O)=O)C5CCN(CC5)C(C6=CC=CO6)=O'

In [74]:
combined_data = modify_dataframe(combined_data, invalid_chemistry)
combined_data

Unnamed: 0,SMILES,IC50(microM),database_label
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.0008,BindingDB
1,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,0.001,BindingDB
2,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C...,0.001,BindingDB
3,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)...,0.0012,BindingDB
4,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccc(Cl)c3Cl)c3n...,0.0015,BindingDB
...,...,...,...
1872,NC1=C(N2CCC(CN)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.64,Literature_2
1873,NC1=C(N2CCC(C)(N)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.58,Literature_2
1874,NC1=C(NC2=CC(N)=CC=C2)N=CC(C3=CC=CN=C3)=C1,9.47,Literature_2
1875,NCC(CC1)CCN1C2=C(CO)C=C(C3=CC=CC(Cl)=C3Cl)C=N2,1.36,Literature_2


In [75]:
invalid_smiles, invalid_chemistry = check_smiles_validity(combined_data)

  0%|          | 0/1877 [00:00<?, ?it/s]

In [76]:
combined_data.shape

(1877, 3)

## Calculating RDKit Descriptors

### 2D

In [79]:
class RDKit_2D:
    def __init__(self, smiles):
        self.mols = [Chem.MolFromSmiles(i) for i in smiles]
        self.smiles = smiles
        
        
    def compute_2Drdkit(self, name):
        rdkit_2d_desc = []
        calc = MoleculeDescriptors.MolecularDescriptorCalculator([x[0] for x in Descriptors._descList])
        header = calc.GetDescriptorNames()
        for i in tqdm(range(len(self.mols))):
            try:
                ds = calc.CalcDescriptors(self.mols[i])
                rdkit_2d_desc.append(ds)
            except:
                print(self.smiles[i])
        df = pd.DataFrame(rdkit_2d_desc,columns=header)
        df.insert(loc=0, column='SMILES', value=self.smiles)
        df.to_csv(name + '_RDKit_2D.csv', index=False)

In [93]:
def main():
    filename = "shp2_1877"         
    smiles = [i for i in combined_data['SMILES'].values]  
    ## Compute RDKit_2D Fingerprints and export a csv file.
    RDKit_descriptor = RDKit_2D(smiles)        # create your RDKit_2D object and provide smiles
    RDKit_descriptor.compute_2Drdkit(filename) # compute RDKit_2D and provide the name of your desired output file. you can use the same name as the input file because the RDKit_2D class will ensure to add "_RDKit_2D.csv" as part of the output file.

if __name__ == '__main__':
    main()

  0%|          | 0/1877 [00:00<?, ?it/s]

In [81]:
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)

In [94]:
descriptor_data = pd.read_csv('shp2_1877_RDKit_2D.csv')
descriptor_data

Unnamed: 0,SMILES,MaxEStateIndex,MinEStateIndex,MaxAbsEStateIndex,MinAbsEStateIndex,qed,MolWt,HeavyAtomMolWt,ExactMolWt,NumValenceElectrons,...,fr_sulfide,fr_sulfonamd,fr_sulfone,fr_term_acetylene,fr_tetrazole,fr_thiazole,fr_thiocyan,fr_thiophene,fr_unbrch_alkane,fr_urea
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,12.992564,-0.169513,12.992564,0.068072,0.609173,455.389,431.197,454.132717,160,...,0,0,0,0,0,0,0,0,0,0
1,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,13.061568,-0.781987,13.061568,0.246174,0.340043,597.145,563.881,596.208486,216,...,0,0,0,0,0,0,0,0,0,0
2,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C...,12.340744,-0.676170,12.340744,0.017202,0.606557,485.419,459.211,484.154515,172,...,0,0,0,0,0,0,0,0,0,0
3,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)...,10.063495,-0.264210,10.063495,0.058620,0.603375,464.422,441.238,463.100037,158,...,0,0,0,0,0,0,0,0,0,0
4,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccc(Cl)c3Cl)c3n...,6.464779,0.086398,6.464779,0.086398,0.614971,464.422,441.238,463.100037,158,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1872,NC1=C(N2CCC(CN)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,6.277779,0.520256,6.277779,0.520256,0.882336,351.281,331.121,350.106502,122,...,0,0,0,0,0,0,0,0,0,0
1873,NC1=C(N2CCC(C)(N)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,6.275110,-0.097703,6.275110,0.097703,0.859533,351.281,331.121,350.106502,122,...,0,0,0,0,0,0,0,0,0,0
1874,NC1=C(NC2=CC(N)=CC=C2)N=CC(C3=CC=CN=C3)=C1,6.067947,0.568313,6.067947,0.568313,0.640083,277.331,262.211,277.132745,104,...,0,0,0,0,0,0,0,0,0,0
1875,NCC(CC1)CCN1C2=C(CO)C=C(C3=CC=CC(Cl)=C3Cl)C=N2,9.796768,-0.064310,9.796768,0.064310,0.865831,366.292,345.124,365.106168,128,...,0,0,0,0,0,0,0,0,0,0


In [95]:
descriptor_data['IC50(microM)'] = combined_data['IC50(microM)']
descriptor_data.shape

(1877, 210)

In [96]:
descriptor_data.isnull().sum()

SMILES               0
MaxEStateIndex       0
MinEStateIndex       0
MaxAbsEStateIndex    0
MinAbsEStateIndex    0
                    ..
fr_thiocyan          0
fr_thiophene         0
fr_unbrch_alkane     0
fr_urea              0
IC50(microM)         0
Length: 210, dtype: int64

In [97]:
descriptor_data_na = descriptor_data[descriptor_data.isna().any(axis=1)]
descriptor_data_na

Unnamed: 0,SMILES,MaxEStateIndex,MinEStateIndex,MaxAbsEStateIndex,MinAbsEStateIndex,qed,MolWt,HeavyAtomMolWt,ExactMolWt,NumValenceElectrons,...,fr_sulfonamd,fr_sulfone,fr_term_acetylene,fr_tetrazole,fr_thiazole,fr_thiocyan,fr_thiophene,fr_unbrch_alkane,fr_urea,IC50(microM)
1655,[O-][V]([O-])([O-])=O,8.609375,-5.875,8.609375,5.875,0.324875,114.938,114.938,113.928463,32,...,0,0,0,0,0,0,0,0,0,0.024
1822,O=C1C=CC2=C(C3=CC=CC=C3C(O)=O)C4=CC=C(O)C([As]...,13.194508,-1.742006,13.194508,0.01005,0.244942,664.515,646.371,663.846901,178,...,0,0,0,0,0,0,0,0,0,0.074


In [98]:
descriptor_data_na['SMILES'].values

array(['[O-][V]([O-])([O-])=O',
       'O=C1C=CC2=C(C3=CC=CC=C3C(O)=O)C4=CC=C(O)C([As]5SCCS5)=C4OC2=C1[As]6SCCS6'],
      dtype=object)

In [99]:
descriptor_data = descriptor_data.dropna()
descriptor_data

Unnamed: 0,SMILES,MaxEStateIndex,MinEStateIndex,MaxAbsEStateIndex,MinAbsEStateIndex,qed,MolWt,HeavyAtomMolWt,ExactMolWt,NumValenceElectrons,...,fr_sulfonamd,fr_sulfone,fr_term_acetylene,fr_tetrazole,fr_thiazole,fr_thiocyan,fr_thiophene,fr_unbrch_alkane,fr_urea,IC50(microM)
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,12.992564,-0.169513,12.992564,0.068072,0.609173,455.389,431.197,454.132717,160,...,0,0,0,0,0,0,0,0,0,0.0008
1,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,13.061568,-0.781987,13.061568,0.246174,0.340043,597.145,563.881,596.208486,216,...,0,0,0,0,0,0,0,0,0,0.001
2,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C...,12.340744,-0.676170,12.340744,0.017202,0.606557,485.419,459.211,484.154515,172,...,0,0,0,0,0,0,0,0,0,0.001
3,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)...,10.063495,-0.264210,10.063495,0.058620,0.603375,464.422,441.238,463.100037,158,...,0,0,0,0,0,0,0,0,0,0.0012
4,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccc(Cl)c3Cl)c3n...,6.464779,0.086398,6.464779,0.086398,0.614971,464.422,441.238,463.100037,158,...,0,0,0,0,0,0,0,0,0,0.0015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1872,NC1=C(N2CCC(CN)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,6.277779,0.520256,6.277779,0.520256,0.882336,351.281,331.121,350.106502,122,...,0,0,0,0,0,0,0,0,0,1.64
1873,NC1=C(N2CCC(C)(N)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,6.275110,-0.097703,6.275110,0.097703,0.859533,351.281,331.121,350.106502,122,...,0,0,0,0,0,0,0,0,0,1.58
1874,NC1=C(NC2=CC(N)=CC=C2)N=CC(C3=CC=CN=C3)=C1,6.067947,0.568313,6.067947,0.568313,0.640083,277.331,262.211,277.132745,104,...,0,0,0,0,0,0,0,0,0,9.47
1875,NCC(CC1)CCN1C2=C(CO)C=C(C3=CC=CC(Cl)=C3Cl)C=N2,9.796768,-0.064310,9.796768,0.064310,0.865831,366.292,345.124,365.106168,128,...,0,0,0,0,0,0,0,0,0,1.36


In [100]:
training_molecules = descriptor_data[['SMILES','IC50(microM)']]
training_molecules

Unnamed: 0,SMILES,IC50(microM)
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,0.0008
1,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,0.001
2,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C...,0.001
3,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)...,0.0012
4,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccc(Cl)c3Cl)c3n...,0.0015
...,...,...
1872,NC1=C(N2CCC(CN)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.64
1873,NC1=C(N2CCC(C)(N)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,1.58
1874,NC1=C(NC2=CC(N)=CC=C2)N=CC(C3=CC=CN=C3)=C1,9.47
1875,NCC(CC1)CCN1C2=C(CO)C=C(C3=CC=CC(Cl)=C3Cl)C=N2,1.36


In [101]:
training_molecules.to_csv('SHP2_training_1875.csv',index=False)

In [102]:
descriptor_data

Unnamed: 0,SMILES,MaxEStateIndex,MinEStateIndex,MaxAbsEStateIndex,MinAbsEStateIndex,qed,MolWt,HeavyAtomMolWt,ExactMolWt,NumValenceElectrons,...,fr_sulfonamd,fr_sulfone,fr_term_acetylene,fr_tetrazole,fr_thiazole,fr_thiocyan,fr_thiophene,fr_unbrch_alkane,fr_urea,IC50(microM)
0,Cc1nc(cc(=O)n1-c1cccc(Cl)c1Cl)N1CCC2(Cc3ccccc3...,12.992564,-0.169513,12.992564,0.068072,0.609173,455.389,431.197,454.132717,160,...,0,0,0,0,0,0,0,0,0,0.0008
1,N[C@@H]1CCCC11CCN(CC1)c1cnc(Sc2cccc(NC(=O)c3c(...,13.061568,-0.781987,13.061568,0.246174,0.340043,597.145,563.881,596.208486,216,...,0,0,0,0,0,0,0,0,0,0.001
2,N[C@@H]1c2ccccc2CC11CCN(CC1)c1nc(N)c(C2=C(Cl)C...,12.340744,-0.676170,12.340744,0.017202,0.606557,485.419,459.211,484.154515,172,...,0,0,0,0,0,0,0,0,0,0.001
3,N[C@@H]1C[C@H](O)CC11CCN(CC1)c1ncc(Sc2cccc(Cl)...,10.063495,-0.264210,10.063495,0.058620,0.603375,464.422,441.238,463.100037,158,...,0,0,0,0,0,0,0,0,0,0.0012
4,C[C@@H]1OCC2(CCN(CC2)c2ncc(Sc3cccc(Cl)c3Cl)c3n...,6.464779,0.086398,6.464779,0.086398,0.614971,464.422,441.238,463.100037,158,...,0,0,0,0,0,0,0,0,0,0.0015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1872,NC1=C(N2CCC(CN)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,6.277779,0.520256,6.277779,0.520256,0.882336,351.281,331.121,350.106502,122,...,0,0,0,0,0,0,0,0,0,1.64
1873,NC1=C(N2CCC(C)(N)CC2)N=CC(C3=CC=CC(Cl)=C3Cl)=C1,6.275110,-0.097703,6.275110,0.097703,0.859533,351.281,331.121,350.106502,122,...,0,0,0,0,0,0,0,0,0,1.58
1874,NC1=C(NC2=CC(N)=CC=C2)N=CC(C3=CC=CN=C3)=C1,6.067947,0.568313,6.067947,0.568313,0.640083,277.331,262.211,277.132745,104,...,0,0,0,0,0,0,0,0,0,9.47
1875,NCC(CC1)CCN1C2=C(CO)C=C(C3=CC=CC(Cl)=C3Cl)C=N2,9.796768,-0.064310,9.796768,0.064310,0.865831,366.292,345.124,365.106168,128,...,0,0,0,0,0,0,0,0,0,1.36


In [103]:
descriptor_data.to_csv('SHP2_train_descriptors_1875.csv',index=False)