## Introduction

**Alternative (naive) approach: Using only the pure building blocks as predictors.**

Link to the competition and data: https://www.kaggle.com/competitions/leash-BELKA/overview

This works actually quite well, because the the test data also contains a lot of the building blocks present in the train data.

**TODO: Clean up Data loading**
 in the main.ipynb this is done much more concisely. And the preprocessing is put into layers, which is much nicer for model comparisons.

## Switches:

In [None]:
# If submitting this file, different parameters will be used
submit = True

## Imports

In [None]:
import tensorflow as tf
import os
import numpy as np
import sys
import pandas as pd
tf.keras.utils.set_random_seed(42)

gpu_name = tf.test.gpu_device_name()
if "GPU" not in gpu_name:
    print("GPU device not found")
print('Found GPU at: {}'.format(gpu_name))

## Select Hyperparameters

In [None]:
# Parameters for testing (small numbers):

BATCH_SIZE = 25
N_TRAIN = 5000#00#-1 # set to -1 for "all"
N_TEST = 100#-1 # set to -1 for "all"
N_EPOCHS = 4
WITH_DROPOUT = False
DROPOUT_RATE = 0.05
ACTIVATION = 'relu'
#ACTIVATION = 'linear'
#ACTIVATION = tf.keras.layers.LeakyReLU(negative_slope=0.01)
HIDDEN_NEURONS = [2000, 5] #Numbers of neurons per internal layer
RESAMPLE_INSTEAD_OF_WEIGHTS = True # both methods are rebalancing the data distributions

# Parameters for submissions (larger numbers):
if submit:
    BATCH_SIZE = 1024*3 # should be big, if (not RESAMPLE_INSTEAD_OF_REWEIGHT) in order to have at least some positive samples in the batch?
    N_TRAIN = -1
    N_TEST = -1 # -1 means "all"
    N_EPOCHS = 10
    HIDDEN_NEURONS = []
    #ACTIVATION = 'relu'
    ACTIVATION = tf.keras.layers.LeakyReLU(negative_slope=0.01)
    WITH_DROPOUT = False
    DROPOUT_RATE = 0.005 # only needed if WITH_DROPOUT
    RESAMPLE_INSTEAD_OF_WEIGHTS = True
    
STEPS_PER_EPOCH = N_TRAIN//BATCH_SIZE
if N_TRAIN == -1:
    STEPS_PER_EPOCH = 98415610 // BATCH_SIZE # THE NUMBER 98415610 is taken from the data description page.

In [None]:
train_path = '/kaggle/input/leash-BELKA/train.csv'
positive_train_path = '/kaggle/working/positive_train.csv'
test_path = '/kaggle/input/leash-BELKA/test.csv'

## Functions for reading csv

In [None]:
def parse_bb_oneHot(x):
    bb1 = x["buildingblock1_smiles"]
    bb2 = x["buildingblock2_smiles"]
    bb3 = x["buildingblock3_smiles"]
    oneHot = tf.math.equal(x['protein_name'], ['BRD4', 'sEH', 'HSA'])
    oneHot = tf.reshape(oneHot,(3,))
    return {'buildingblock1_smiles': bb1, 'buildingblock2_smiles': bb2, 'buildingblock3_smiles': bb3, 'oneHot': oneHot}

def parse_bb_oneHot_binds(x,y):
    y = tf.reshape(y, shape=(1,))
    y = tf.cast(y, tf.int32)
    return parse_bb_oneHot(x), y

## Get Dataset:

In [None]:
def get_ds_csv(csv_path, n_samples, labeled=True):
    """ n_samples=-1 means "take all"
    """
    if labeled:
        ds = tf.data.experimental.make_csv_dataset(
            csv_path,
            batch_size=4, # arbitrary .. ´will be overridden later on
            shuffle=False,
            num_epochs=1, # to prevent repeat()
            label_name='binds')
    else:
        ds = tf.data.experimental.make_csv_dataset(
            csv_path,
            batch_size=4,   # arbitrary .. ´will be overridden later on
            num_epochs=1, # to prevent repeat()
            shuffle=False)
    ds = ds.unbatch()
    if n_samples!=-1:
        ds = ds.take(n_samples)
    if labeled:
        ds = ds.map(parse_bb_oneHot_binds)
    else:
        ds = ds.map(parse_bb_oneHot)
    return ds

ds = get_ds_csv(train_path, n_samples=N_TRAIN, labeled=True)
#for elem in ds.take(1):
#    print(elem)
#    print()

## Creating File with positive samples and merge with old dataset
That's important to provide well-distributed input data to the training

In [None]:
def create_file_of_positives(positive_train_path = positive_train_path):
    chunksize=10000
    if N_TRAIN == -1:
        max_chunks = 3*98415610/chunksize
    else:
        max_chunks = N_TRAIN/chunksize
        
    for i, chunk in enumerate(pd.read_csv(train_path, chunksize=chunksize)):
        positive_lines = chunk[chunk.binds==1]
        mode = 'w' if i==0 else 'a' # writing the first line will overwrite old files
        add_header = True if i==0 else False
        positive_lines.to_csv(positive_train_path, mode=mode, index=False, header=add_header)
        if i%1000 == 0 and N_TRAIN ==-1 :
            print(i+1, 'of', max_chunks, "chunks searched for positive samples...")
        if i>=1 and i*chunksize >= N_TRAIN and (i-1)*chunksize <= N_TRAIN:
            # condidion is complicated to treat the special case N_TRAIN=-1..
            break
    print("Done extracting positive samples.")
    
def merge_ds(ds1, ds2):
    """ The args need to be unbatched.
        The result will be set to repeat! """
    ds1 = ds1.repeat()
    ds2 = ds2.repeat()
    ds = tf.data.Dataset.zip((ds1, ds2)).flat_map(
                    lambda x,y : tf.data.Dataset.from_tensors(x).concatenate(tf.data.Dataset.from_tensors(y)))
    return ds

if RESAMPLE_INSTEAD_OF_WEIGHTS:
    create_file_of_positives()
    ds_positive = tf.data.experimental.make_csv_dataset(
                positive_train_path,
                batch_size=4, # arbitrary .. ´will be overridden later on
                shuffle=False,
                num_epochs=1, # to prevent repeat()
                label_name='binds')
    ds_positive = ds_positive.unbatch()
    ds_positive = ds_positive.map(parse_bb_oneHot_binds)
    ds_merged = merge_ds(ds, ds_positive)

## Callbacks

In [None]:
class TimeStopping(tf.keras.callbacks.Callback):
    def __init__(self, max_hours=3):
        super().__init__()
        self.max_seconds = max_hours*60*60
        self.start_time = None

    def on_train_begin(self, logs=None):
        import time
        self.start_time = time.time()

    def on_batch_end(self, batch, logs=None):
        import time
        if time.time() - self.start_time >= self.max_seconds:
            print("\nTime over. Stop training.")
            self.model.stop_training = True
stop_after_3_hours = TimeStopping(max_hours=3)
stop_after_5_hours = TimeStopping(max_hours=5)
stop_after_7_hours = TimeStopping(max_hours=7)

In [None]:
class TerminateAndBackup(tf.keras.callbacks.Callback):
    def __init__(self, 
                 directory="/kaggle/working/temp"):
        super().__init__()
        self.backup_file = directory+".weights.h5"
        try:
            os.stat(directory)
        except:
            os.mkdir(directory) 

    def on_epoch_begin(self, epoch, logs=None):
        #Save weights in the beginning of epoch
        self.model.save_weights(self.backup_file, overwrite=True)

    def on_train_batch_end(self, batch, logs=None):
        logs = logs or {}
        loss = logs.get('loss')
        if loss is not None:
            # Check if we have NaN or Inf
            if np.isnan(loss) or np.isinf(loss):
                print('\n---Stopping learning  due to nans. --- ')
                model.load_weights(self.backup_file)
                self.model.stop_training  = True
            elif batch % 2000 == 0:
                #Save weights from time to time
                self.model.save_weights(self.backup_file, overwrite=True)
terminate_and_backup = TerminateAndBackup()

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', factor=0.2,
                              patience=5, min_lr=0.001)

## Prepare training weights

In [None]:
## Define (approximate) proportion of positive binds (source: Competition description)
## (Important for training weights)
if RESAMPLE_INSTEAD_OF_WEIGHTS:
    binds_rate = 0.5
else:
    binds_rate = 0.005

## If you want to calculate it exactly:

#initial_state = (0,0)
#def combined_reduce(state, data):
#    count, sum_binds = state
#    count += 1
#    sum_binds += int(data[1])
#    return count, sum_binds
#n_samples, n_binds = ds.unbatch().take(N_TRAIN).reduce(initial_state, combined_reduce)
#n_binds = n_binds.numpy()[0]
#n_samples = n_samples.numpy()
#print(f' {n_binds} of {n_samples} are positive')
#binds_rate = n_binds / n_samples
#print(f'bind rate = {binds_rate}')

## Precalculated list of possible building blocks in Training set:
list_bb1, list_bb2, list_bb3

In [None]:
list_bb1 = ['C#CC[C@@H](CC(=O)O)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'C#CC[C@@H](NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'C#CC[C@@](C)(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'C#CC[C@H](CC(=O)O)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'C#CC[C@H](NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'C=CCC(CC=C)(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'C=CCC(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'C=CCC[C@@H](NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'C=CC[C@@H](NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'C=CC[C@H](NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'CC(=O)c1ccc(C[C@H](NC(=O)OCC2c3ccccc3-c3ccccc32)C(=O)O)cc1',
 'CC(C)(C)OC(=O)CC(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'CC(C)(C)OC(=O)CCC(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'CC(C)(C)OC(=O)N1CCN(C(=O)OCC2c3ccccc3-c3ccccc32)C1C(=O)O',
 'CC(C)(C)OC(=O)N1C[C@@H](NC(=O)OCC2c3ccccc3-c3ccccc32)[C@H](C(=O)O)C1',
 'CC(C)(C)OCC(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'CC(C)CC(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'CC(OC(C)(C)C)C(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'CCC(C)C(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'CCCCC(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'CCOc1cc(NC(=O)OCC2c3ccccc3-c3ccccc32)c(Cl)cc1C(=O)O',
 'CCS(=O)(=O)c1cc(C(=O)O)c(OC)cc1NC(=O)OCC1c2ccccc2-c2ccccc21',
 'CCc1cccc(NC(=O)OCC2c3ccccc3-c3ccccc32)c1C(=O)O',
 'CN(C(=O)OCC1c2ccccc2-c2ccccc21)[C@@H](CC1CCCCC1)C(=O)O',
 'COC(=O)CC(NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'COC(=O)c1ccc(C(=O)O)c(NC(=O)OCC2c3ccccc3-c3ccccc32)c1',
 'COc1c(F)ccc(C(=O)O)c1NC(=O)OCC1c2ccccc2-c2ccccc21',
 'COc1cc(C(=O)O)c(N)cc1NC(=O)OCC1c2ccccc2-c2ccccc21',
 'COc1cc(C(=O)O)c(NC(=O)OCC2c3ccccc3-c3ccccc32)cn1',
 'COc1cc(NC(=O)OCC2c3ccccc3-c3ccccc32)c(C(=O)O)c(OC)c1',
 'COc1cc(NC(=O)OCC2c3ccccc3-c3ccccc32)c(C(=O)O)cc1OC',
 'COc1ccc(C(=O)O)c(NC(=O)OCC2c3ccccc3-c3ccccc32)c1',
 'COc1ccc(C[C@H](NC(=O)OCC2c3ccccc3-c3ccccc32)C(=O)O)cc1OC',
 'COc1ccc(NC(=O)OCC2c3ccccc3-c3ccccc32)c(C(=O)O)c1',
 'COc1ccc([C@H](NC(=O)OCC2c3ccccc3-c3ccccc32)C(=O)O)cc1',
 'COc1cccc(C(=O)O)c1NC(=O)OCC1c2ccccc2-c2ccccc21',
 'COc1cccc(NC(=O)OCC2c3ccccc3-c3ccccc32)c1C(=O)O',
 'COc1nccc(C(=O)O)c1NC(=O)OCC1c2ccccc2-c2ccccc21',
 'CS(=O)(=O)c1ccc(C(=O)O)c(NC(=O)OCC2c3ccccc3-c3ccccc32)c1',
 'CSc1ncc(NC(=O)OCC2c3ccccc3-c3ccccc32)c(C(=O)O)n1',
 'C[C@@H](OCc1ccccc1)[C@H](NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O',
 'C[C@@]1(C(=O)O)CCCN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'Cc1c(Br)ccc(C(=O)O)c1NC(=O)OCC1c2ccccc2-c2ccccc21',
 'Cc1cc(Br)c(C(=O)O)cc1NC(=O)OCC1c2ccccc2-c2ccccc21',
 'Cc1cc(Br)c(NC(=O)OCC2c3ccccc3-c3ccccc32)c(C(=O)O)c1',
 'Cc1cc(Br)cc(C(=O)O)c1NC(=O)OCC1c2ccccc2-c2ccccc21',
 'Cc1cc(C(=O)O)ccc1NC(=O)OCC1c2ccccc2-c2ccccc21',
 'Cc1cc(C)c(NC(=O)OCC2c3ccccc3-c3ccccc32)c(C(=O)O)c1',
 'Cc1cc(Cl)cc(C(=O)O)c1NC(=O)OCC1c2ccccc2-c2ccccc21',
 'Cc1ccc(C(=O)O)c(NC(=O)OCC2c3ccccc3-c3ccccc32)c1C',
 'Cc1ccc(C(=O)O)cc1NC(=O)OCC1c2ccccc2-c2ccccc21',
 'Cc1ccc(C(CC(=O)O)NC(=O)OCC2c3ccccc3-c3ccccc32)cc1',
 'Cc1ccc(C[C@H](NC(=O)OCC2c3ccccc3-c3ccccc32)C(=O)O)cc1',
 'Cc1ccc(NC(=O)OCC2c3ccccc3-c3ccccc32)c(C(=O)O)c1',
 'Cc1cccc(C(=O)O)c1NC(=O)OCC1c2ccccc2-c2ccccc21',
 'Cc1cccc(NC(=O)OCC2c3ccccc3-c3ccccc32)c1C(=O)O',
 'Cc1ccccc1[C@@H](CC(=O)O)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'Cn1cc(C[C@@H](NC(=O)OCC2c3ccccc3-c3ccccc32)C(=O)O)c2ccccc21',
 'N#Cc1ccc(C[C@@H](CC(=O)O)NC(=O)OCC2c3ccccc3-c3ccccc32)cc1',
 'N#Cc1ccc(C[C@@H](NC(=O)OCC2c3ccccc3-c3ccccc32)C(=O)O)cc1',
 'N#Cc1ccc(C[C@H](CC(=O)O)NC(=O)OCC2c3ccccc3-c3ccccc32)cc1',
 'N#Cc1ccc(C[C@H](NC(=O)OCC2c3ccccc3-c3ccccc32)C(=O)O)cc1',
 'N#Cc1ccc(NC(=O)OCC2c3ccccc3-c3ccccc32)c(C(=O)O)c1',
 'N#Cc1ccc([C@H](CC(=O)O)NC(=O)OCC2c3ccccc3-c3ccccc32)cc1',
 'N#Cc1cccc(C[C@H](CC(=O)O)NC(=O)OCC2c3ccccc3-c3ccccc32)c1',
 'O=C(CC[C@H](NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O)OC1CCCCC1',
 'O=C(NC(CC1CCCCC1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(NC(CCc1ccccc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(NC(Cc1ccccc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(NC1(C(=O)O)CC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(NC1(C(=O)O)CCC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(NC1(C(=O)O)CCCC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(NC1(C(=O)O)CCCCC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(NC1(C(=O)O)CCOCC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(NC1(C(=O)O)CCc2ccccc21)OCC1c2ccccc2-c2ccccc21',
 'O=C(NCC1CCC(C(=O)O)CC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(NC[C@H]1CC[C@H](C(=O)O)CC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(NCc1cccc(C(=O)O)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](C(=O)O)C1CCCC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](C(=O)O)C1CCCCC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](C/C=C/c1ccccc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](CC1CC1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](CCC1CCCCC1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1c(F)c(F)c(F)c(F)c1F)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1cc(F)c(F)c(F)c1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1ccc(Br)cc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1ccc(C(F)(F)F)cc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1ccc(Cl)c(Cl)c1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1ccc(Cl)cc1Cl)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1ccc(F)c(F)c1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1ccc(F)cc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1ccc(F)cc1F)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1ccc(I)cc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1ccc([N+](=O)[O-])cc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1ccccc1F)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1cccnc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1ccco1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1cccs1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1ccsc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1csc2ccccc12)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H](Cc1cscn1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@@H]1CC[C@H](C(=O)O)C1)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](C(=O)O)C1CC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](C(=O)O)C1CCCC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](C(=O)O)C1CCCCC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](C(=O)O)c1ccsc1)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](CC1CCCC1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](CCC1CCCCC1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](Cc1cc(F)cc(F)c1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](Cc1ccc(-c2ccccc2)cc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](Cc1ccc(C(F)(F)F)cc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](Cc1ccc(Cl)c(Cl)c1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](Cc1ccc(Cl)cc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](Cc1ccc(F)c(F)c1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](Cc1ccc(I)cc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](Cc1ccc([N+](=O)[O-])cc1)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](Cc1ccccc1Cl)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H](Cc1csc2ccccc12)C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H]1C=C[C@@H](C(=O)O)C1)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H]1CCC[C@@H]1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H]1CC[C@@H](C(=O)O)C1)OCC1c2ccccc2-c2ccccc21',
 'O=C(N[C@H]1CC[C@H](C(=O)O)CC1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(Br)cc(C(=O)O)cc1Br)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(Br)cc(F)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(Br)cccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(C(=O)O)cc(Cl)c(Br)c1F)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(C(=O)O)ccc(Br)c1F)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(C(=O)O)ccc2ccccc12)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(C(=O)O)cccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(C(=O)O)cnn1-c1ccc(F)cc1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(C(=O)O)cnn1CCO)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(C(=O)O)sc2ncccc12)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(Cl)c(Cl)nc(C(=O)O)c1Cl)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(Cl)cc(Cl)nc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(Cl)cc(F)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(Cl)cccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(F)cc(Br)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(F)cc(C(=O)O)cc1F)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(F)ccc(Br)c1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(F)cccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(I)c(C(=O)O)c(I)c(C(=O)O)c1I)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(I)cccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1c(OC(F)(F)F)cccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(-n2cccn2)ccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(Br)c(Cl)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(Br)c(F)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(Br)cc(C(=O)O)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(Br)ccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(Br)cnc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(C(=O)O)cc(C(=O)O)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(C(=O)O)ccc1Br)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(C(=O)O)ccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(C(=O)O)ccc1C(F)(F)F)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(C(=O)O)ccc1Cl)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(Cl)cc(C(=O)O)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(Cl)ccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(Cl)nc(C(=O)O)c1Cl)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(Cl)ncc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(F)c(Br)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(F)c(F)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(F)cc(F)c1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(F)ccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc(I)ccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc([N+](=O)[O-])ccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cc2ccccc2cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(Br)c(C(=O)O)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(Br)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)c(C(=O)O)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)c(C(F)(F)F)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)c(Cl)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)c(F)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)c([N+](=O)[O-])c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)cc1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)cc1C(F)(F)F)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)cc1Cl)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)cc1F)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)cc1O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)cc1OC(F)(F)F)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)cn1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(=O)O)nc1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(C(F)(F)F)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(Cl)c(C(=O)O)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(Cl)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(Cl)nc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(F)c(C(=O)O)c1F)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(F)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc(I)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc([N+](=O)[O-])c(C(=O)O)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc([N+](=O)[O-])cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccc2cc(C(=O)O)ccc2c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cccc(-c2cccc(C(=O)O)c2)c1O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cccc(Br)c1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cccc(C(=O)O)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cccc(C(=O)O)c1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cccc(Cl)c1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cccc(F)c1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cccc(I)c1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cccc([N+](=O)[O-])c1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccccc1C(=O)c1ccccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cccnc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ccnc(C(=O)O)c1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cnc(Cl)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cncc(C(=O)O)n1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1cnccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1nc(-c2ccc(C(=O)O)cc2)cs1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1nc(C(F)(F)F)c(C(=O)O)s1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1nc(Cl)ccc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1nc2cc(C(=O)O)ccc2[nH]1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1nc2ccc(C(=O)O)cc2s1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1nc2ncc(CNc3ccc(C(=O)O)cc3)nc2c(=O)[nH]1)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ncc(Br)cc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ncc(Br)nc1C(=O)O)OCC1c2ccccc2-c2ccccc21',
 'O=C(Nc1ncc(C(=O)O)s1)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C1CCCN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C1CCN(C(=O)OCC2c3ccccc3-c3ccccc32)C1',
 'O=C(O)C1CN(C(=O)OCC2c3ccccc3-c3ccccc32)C1',
 'O=C(O)C1c2ccccc2CN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)CC(NC(=O)OCC1c2ccccc2-c2ccccc21)c1ccc(Br)cc1',
 'O=C(O)CC1(CNC(=O)OCC2c3ccccc3-c3ccccc32)CCCCC1',
 'O=C(O)CC1(NC(=O)OCC2c3ccccc3-c3ccccc32)CCCCC1',
 'O=C(O)CNC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)COC[C@H]1CCCN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)COc1cccc(-c2csc(NC(=O)OCC3c4ccccc4-c4ccccc43)n2)c1',
 'O=C(O)C[C@@H](Cc1ccc(Br)cc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@@H](Cc1ccc(C(F)(F)F)cc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@@H](Cc1ccc(Cl)c(Cl)c1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@@H](Cc1ccc(Cl)cc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@@H](Cc1ccc(Cl)cc1Cl)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@@H](Cc1ccc(F)cc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@@H](Cc1ccc(I)cc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@@H](Cc1ccc([N+](=O)[O-])cc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@@H](Cc1cccc(F)c1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@@H](Cc1ccccc1Cl)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@@H](Cc1cccs1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@@H](NC(=O)OCC1c2ccccc2-c2ccccc21)c1ccc(Br)cc1',
 'O=C(O)C[C@@H](NC(=O)OCC1c2ccccc2-c2ccccc21)c1ccc(C(F)(F)F)cc1',
 'O=C(O)C[C@@H](NC(=O)OCC1c2ccccc2-c2ccccc21)c1ccc(Cl)cc1',
 'O=C(O)C[C@@H](NC(=O)OCC1c2ccccc2-c2ccccc21)c1cccc(Cl)c1',
 'O=C(O)C[C@@H](NC(=O)OCC1c2ccccc2-c2ccccc21)c1cccc([N+](=O)[O-])c1',
 'O=C(O)C[C@@H](NC(=O)OCC1c2ccccc2-c2ccccc21)c1cccs1',
 'O=C(O)C[C@@H]1CCCN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@H](C/C=C/c1ccccc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@H](Cc1c(F)c(F)c(F)c(F)c1F)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@H](Cc1ccc(Br)cc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@H](Cc1ccc(C(F)(F)F)cc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@H](Cc1ccc(Cl)c(Cl)c1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@H](Cc1ccc(Cl)cc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@H](Cc1ccc(F)cc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@H](Cc1ccc(I)cc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@H](Cc1ccc([N+](=O)[O-])cc1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@H](Cc1cccs1)NC(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@H](NC(=O)OCC1c2ccccc2-c2ccccc21)c1ccc(F)cc1',
 'O=C(O)C[C@H](NC(=O)OCC1c2ccccc2-c2ccccc21)c1cccc(Cl)c1Cl',
 'O=C(O)C[C@H]1CCCN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)C[C@H]1Cc2ccccc2CN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)Cc1ccc(NC(=O)OCC2c3ccccc3-c3ccccc32)cc1',
 'O=C(O)[C@@H]1CCCCN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@@H]1CCCN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@@H]1CCCN1C(=O)[C@@H]1CCCN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@@H]1CSCN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@@H]1C[C@@H]2CCCC[C@@H]2N1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@@H]1Cc2ccc(O)cc2CN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@@H]1Cc2ccccc2CN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@@H]1Cc2ccccc2N1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@H]1CC2CCCCC2N1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@H]1CCCCN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@H]1CCN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@H]1COCCN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@H]1C[C@H](O)CN1C(=O)OCC1c2ccccc2-c2ccccc21',
 'O=C(O)[C@H]1Cc2ccccc2CN1C(=O)OCC1c2ccccc2-c2ccccc21',
 '[N-]=[N+]=NCCC[C@H](NC(=O)OCC1c2ccccc2-c2ccccc21)C(=O)O']

In [None]:
list_bb2 =['C#CCOc1ccc(CN)cc1.Cl',
 'C#CCOc1cccc(CN)c1.Cl',
 'C#Cc1ccc(N)cc1',
 'C#Cc1cccc(N)c1',
 'C=C(C)C(=O)NCCN.Cl',
 'C=C(C)COCCN.Cl',
 'C=C(Cl)CN.Cl',
 'C=C1CCC(CN)CC1.Cl',
 'C=CCNC(=O)CN.Cl',
 'C=CCOC(C)CN',
 'C=CCOCCCN',
 'C=CCOCCN',
 'C=CCSCCN',
 'CC(=O)Nc1cccc(N)n1',
 'CC(=O)SCCN.Cl',
 'CC(=O)c1ccc(N)c(F)c1',
 'CC(=O)c1cccc(N)c1',
 'CC(C)(C#N)c1ccc(N)cc1',
 'CC(C)(C)NS(=O)(=O)c1cccc(N)c1',
 'CC(C)(C)OC(=O)N1CCN(c2ccccc2N)CC1',
 'CC(C)(C)OC(=O)N1Cc2c(N)n[nH]c2C1(C)C',
 'CC(C)(C)OC(=O)n1ncc2cc(N)ccc21',
 'CC(C)(C)c1ccc(O)c(N)c1',
 'CC(C)(C)c1nnc(CN)s1.Cl',
 'CC(C)(C)c1ocnc1CN.Cl',
 'CC(C)(CN)C(=O)N1CCCC1',
 'CC(C)(CN)CCC#N',
 'CC(C)(CN)CCS(C)(=O)=O.Cl',
 'CC(C)CC(C)(CN)NC(=O)c1cc(Cl)c(Cl)[nH]1.Cl',
 'CC(C)Cn1cnc2c(N)nc3ccccc3c21',
 'CC(C)NC(=O)NCCN.Cl.Cl',
 'CC(C)c1nnc([C@H]2C[C@H](CN)[C@H](O)C2)[nH]1',
 'CC(CCN)S(C)=O',
 'CC(CN)N1CCC1',
 'CC(CN)OC(C)(C)C.Cl',
 'CC(CN)Oc1ccc(Cl)cc1',
 'CC(CN)S(=O)(=O)N1CCN(c2ccccc2)CC1.Cl.Cl',
 'CC(CN)S(C)=O',
 'CC(CN)S(N)(=O)=O.Cl',
 'CC(CN)c1c(Cl)cccc1Cl',
 'CC(F)(F)CN.Cl',
 'CC(O)(CN)CN1CCOCC1',
 'CC(O)CCN',
 'CC1(C)CC(CCN)C(=O)N1.Cl',
 'CC1(C)CC(CN)C(C)(C)O1',
 'CC1(C)CCOC1CCN',
 'CC1(C)NC(=O)N(CCCN)C1=O.Cl',
 'CC1(C)OB(c2ccc(N)cc2)OC1(C)C',
 'CC1(CCCCN)OCCO1',
 'CC1(CN)CCC2(CC1)OCCO2',
 'CC1(F)CCN(CCN)C1.Cl.Cl',
 'CC12CCC(CN)(C1)OC2.Cl',
 'CC1=CCN(CCN)CC1.Cl.Cl',
 'CC1CC(CN)C(C)O1',
 'CC1CCCC(CN)O1',
 'CC1CN(S(=O)(=O)CCN)CC(C)O1',
 'CC1CN(c2cc(CN)ccn2)CCO1',
 'CCC#CCN',
 'CCC(C)(O)CCN',
 'CCC(CC)(CN)OC',
 'CCC(CN)Oc1ccccc1C.Cl',
 'CCC1COCCN1CCN.Cl.Cl',
 'CCN(CCCN)S(C)(=O)=O',
 'CCN1C(=O)C[C@H](CN)[C@H]1c1ccncc1',
 'CCN1CCN(Cc2ccc(N)nc2)CC1',
 'CCOC(=O)c1ccc(O)c(N)c1',
 'CCOC(=O)c1cccnc1N',
 'CCOC(=O)c1cnc(N)cn1',
 'CCOC(=O)c1cnc(SC)nc1N',
 'CCOC(=O)c1cncnc1N',
 'CCOC(=O)c1csc(N)n1',
 'CCOC(=O)c1ncccc1N',
 'CCOC(CN)CN(C)C',
 'CCOCCCN',
 'CCON(C)C(=O)CN.Cl',
 'CCON(CC)C(=O)CN.Cl',
 'CCOc1cc(N)ccn1',
 'CCOc1cc2c(cc1CN)OC(C)C2.Cl',
 'CCOc1ccnc(N)c1',
 'CCS(=O)(=O)N1CC(CN)C1.Cl',
 'CCS(=O)CCN.Cl',
 'CCSC1CCC1(O)CN',
 'CCSCCN.Cl',
 'CCn1cc2cc(N)ccc2n1',
 'CN(C)C(=O)C1CCC(CN)O1.Cl',
 'CN(C)C1(CN)CCSC1',
 'CN(C)c1cccnc1CN',
 'CN(C)c1nc(Cl)c(CN)s1.Cl.Cl',
 'CN(C)c1ncccc1CN.Cl.Cl',
 'CN(CC(F)(F)F)C(=O)CN.Cl',
 'CN(CCN)C1CCOCC1',
 'CN(Cc1ccco1)Cc1ccccc1CN',
 'CN1C(=O)Cc2cc(CN)ccc21.Cl',
 'CN1CC(CCN)CC1=O',
 'CN1CC(CN)CC1=O',
 'CN1CC2(CCC1CN)CC2.Cl.Cl',
 'CN1CCN(C(=O)CCN)CC1',
 'CN1CCN(C(=O)CN)CC1',
 'CN1CCN(CCN)CC1',
 'CN1CCN(Cc2ccc(N)cc2C(F)(F)F)CC1',
 'CN1CCN(Cc2cccc(N)c2)CC1',
 'CN1CCN(Cc2ccccc2N)CC1',
 'CN1CCN(c2ccc(N)cc2F)CC1',
 'CN1CCO[C@@H](CN)[C@@H]1c1cnn(C)c1',
 'CNC(=O)C(O)CN.Cl',
 'CNC(=O)COCCN.Cl',
 'CNC(=O)c1cc(Oc2ccc(N)cc2)ccn1',
 'CNC(=O)c1ccc(N)cc1F',
 'COC(=O)c1cc(Cl)ccc1N',
 'COC(=O)c1cc(F)c(F)cc1N',
 'COC(=O)c1cc(N)cc(F)c1',
 'COC(=O)c1cc(N)cc(O)c1',
 'COC(=O)c1cc(N)ccc1C',
 'COC(=O)c1cc(N)ccc1Cl',
 'COC(=O)c1cc(N)ccc1F',
 'COC(=O)c1cc(N)cs1',
 'COC(=O)c1cc(OC)c(OC)cc1N',
 'COC(=O)c1ccc(N)cc1C',
 'COC(=O)c1ccc(N)cc1O',
 'COC(=O)c1ccc(N)cc1OC',
 'COC(=O)c1cccc(N)n1',
 'COC(=O)c1ccnc(N)c1',
 'COC(=O)c1cnc(N)cn1',
 'COC(=O)c1cncc(N)c1',
 'COC(=O)c1cnccc1N',
 'COC(=O)c1cscc1N.Cl',
 'COC(=O)c1nc(Cl)c(Cl)nc1N',
 'COC(=O)c1nccnc1N',
 'COC(=O)c1occc1N',
 'COC(=O)c1scnc1N',
 'COC(C)(CCN)OC',
 'COC(C)(CN)C1CC1',
 'COC(CN)C1CCCOC1',
 'COC(CN)C1CCOC1',
 'COC(CN)CC(N)=O.Cl',
 'COC1(C(F)(F)CN)CCOCC1.Cl',
 'COC1(CN)CCC1',
 'COC1(CN)CCOC1.Cl',
 'COC1(OC)CC(CN)C1',
 'COC1CCC(CCN)CC1',
 'COCC1(CN)CCCCC1',
 'COCCNC(=O)CN.Cl',
 'COCOCCCN',
 'COC[C@H](CN)OC',
 'COCc1ccc(N)cc1',
 'COCc1ccccc1CN',
 'CO[C@@H]1COC[C@H]1n1cc(CN)nn1.Cl',
 'COc1c(F)cc(N)cc1F',
 'COc1c(F)ccc(F)c1CN.Cl',
 'COc1c(N)cccc1F',
 'COc1cc(Br)ccc1N',
 'COc1cc(C#N)c(F)cc1N',
 'COc1cc(C)c(N)cn1',
 'COc1cc(CN)c(Br)cn1',
 'COc1cc(CN)c2ccccc2n1',
 'COc1cc(F)c(Cl)cc1N',
 'COc1cc(F)ccc1N',
 'COc1cc(N)cc(OC)c1',
 'COc1cc(N)ccn1',
 'COc1cc2c(cc1CN)OCO2',
 'COc1ccc(C(CN)N2CCOCC2)cc1OC',
 'COc1ccc(C)nc1CN.Cl.Cl',
 'COc1ccc(CN)c(C)c1OC',
 'COc1ccc(CN)cc1C#N.Cl',
 'COc1ccc(Cl)c(N)c1.Cl',
 'COc1ccc(N)c(Cl)c1',
 'COc1ccc(N)cc1Cl.Cl',
 'COc1ccc(N)cc1OC',
 'COc1ccc(N)cc1[N+](=O)[O-]',
 'COc1ccc(N)cn1',
 'COc1ccc(N)nc1',
 'COc1ccc(O)c(N)c1',
 'COc1ccc(OC)c(N)c1',
 'COc1ccc([N+](=O)[O-])c(N)n1',
 'COc1cccc(-c2cc(N)on2)c1',
 'COc1cccc(F)c1CN',
 'COc1cccc(N)c1F',
 'COc1cccc(N)n1',
 'COc1ccccc1-c1nnc(N)s1',
 'COc1ccccc1OCCCCN',
 'COc1ccnc(N)c1',
 'COc1ccnc(N)n1',
 'COc1ccncc1CN',
 'COc1cnc(CN)cn1.Cl.Cl',
 'COc1cnc(N)cn1',
 'COc1cnc(N)nc1',
 'COc1cncc(N)c1',
 'COc1cncc(N)n1',
 'COc1nc(Br)ccc1N',
 'COc1nc(Cl)ncc1N',
 'COc1ncc(N)cc1C(F)(F)F',
 'COc1ncc(N)cn1',
 'COc1ncccc1N',
 'CS(=O)(=O)CC1(CN)CCOCC1.Cl',
 'CS(=O)(=O)NC(=O)CCN.Cl',
 'CS(=O)(=O)NC1CCCC1CN.Cl',
 'CS(=O)(=O)Nc1ccc(-c2csc(N)n2)cc1',
 'CS(=O)(=O)Nc1cccc(N)c1',
 'CS(=O)(=O)c1ccc(N)cc1',
 'CS(=O)(=O)c1ccc(N)cc1F',
 'CS(=O)(=O)c1cccc(N)c1',
 'CS(=O)C1(CN)CCC1',
 'CS(=O)CC(O)CN.Cl',
 'CS(=O)CCCCN',
 'CSC1(CN)CCOCC1.Cl',
 'CSCc1nnc(CN)[nH]1.Cl',
 'CSSCCN.Cl',
 'CSc1ccc(CN)cc1C#N.Cl',
 'CSc1ccc(CN)o1.Cl',
 'C[Si](C)(C)C#Cc1ccc(N)cn1',
 'Cc1[nH][nH]c(=O)c1CCN',
 'Cc1c(Br)cncc1CN.Cl.Cl',
 'Cc1c(CN)oc2ccc(F)cc12.Cl',
 'Cc1c(N)cccc1Br',
 'Cc1c(N)cccc1F',
 'Cc1c([C@@H]2[C@@H](CN)CC(=O)N2C)cnn1C',
 'Cc1cc(=O)oc2cc(N)ccc12',
 'Cc1cc(C#N)c(N)s1',
 'Cc1cc(C#N)cnc1N',
 'Cc1cc(C)c(CN)c(=O)[nH]1.Cl',
 'Cc1cc(C)c(N)c([N+](=O)[O-])c1',
 'Cc1cc(C)nc(N)n1',
 'Cc1cc(CN)cc(Cl)n1.Cl.Cl',
 'Cc1cc(CN)ccc1Oc1ccc(Cl)cc1Cl.Cl',
 'Cc1cc(Cl)ncc1N',
 'Cc1cc(Cl)nnc1N',
 'Cc1cc(F)ccc1N',
 'Cc1cc(F)ncc1CN',
 'Cc1cc(N)cc(Cl)c1',
 'Cc1cc(N)cc(F)c1',
 'Cc1cc(N)ccc1Cl',
 'Cc1cc(N)ccc1F',
 'Cc1cc(N)ccc1O',
 'Cc1cc(N)ccn1',
 'Cc1cc(N)cnc1Br',
 'Cc1cc(N)n(-c2ccccc2)n1',
 'Cc1cc(N)n(C(C)C)n1',
 'Cc1cc(N)ncc1Cl',
 'Cc1cc(N)ncc1[N+](=O)[O-]',
 'Cc1cc(N)ncn1',
 'Cc1cc(N)nn1C',
 'Cc1cc(N)nnc1Cl',
 'Cc1cc(N)on1',
 'Cc1cc(O)cc(C)c1N',
 'Cc1cc(O)ccc1N',
 'Cc1cc([N+](=O)[O-])c(Cl)cc1N',
 'Cc1cc([N+](=O)[O-])c(N)cc1Cl',
 'Cc1ccc(-c2cc(C(F)(F)F)nc(OCCN)n2)cc1',
 'Cc1ccc(C#N)cc1N',
 'Cc1ccc(Cl)c(N)c1',
 'Cc1ccc(F)c(N)c1',
 'Cc1ccc(N)c(CO)c1',
 'Cc1ccc(N)c(Cl)n1',
 'Cc1ccc(N)cc1Br',
 'Cc1ccc(N)nn1',
 'Cc1ccc(O)c(CN)n1.Cl.Cl',
 'Cc1ccc(S(C)(=O)=O)cc1N',
 'Cc1ccc([N+](=O)[O-])c(N)n1',
 'Cc1ccc2cccc(N)c2n1',
 'Cc1cccc(N)c1Cl',
 'Cc1cccc(OCCCN)c1C.Cl',
 'Cc1cccc2oc(CCN)nc12.Cl.Cl',
 'Cc1ccccc1-c1csc(N)n1',
 'Cc1ccccc1-n1nc(C)c(CN)c1C.Cl.Cl',
 'Cc1cccnc1NC(=O)CCN.Cl.Cl',
 'Cc1ccnc(Cl)c1N',
 'Cc1ccnc(N)c1',
 'Cc1ccncc1N',
 'Cc1cn(-c2cc(N)cc(C(F)(F)F)c2)cn1',
 'Cc1cnc(CCN)s1.Cl.Cl',
 'Cc1cnc(Cl)cc1N',
 'Cc1cnc(Cl)nc1N',
 'Cc1cnc(N)cn1',
 'Cc1cnc(N)s1',
 'Cc1cnc(O)c(N)c1',
 'Cc1csc(N)n1',
 'Cc1nc(CN)ccc1C(C)O.Cl.Cl',
 'Cc1nc(CN)oc1C.Cl.Cl',
 'Cc1nc(Cl)ccc1N',
 'Cc1nc(N)ccc1Cl',
 'Cc1nc(N)ccc1[N+](=O)[O-]',
 'Cc1ncc(N)cc1Br',
 'Cc1ncccc1N',
 'Cc1nccn1-c1ncccc1CN',
 'Cc1nccnc1N',
 'Cc1nn(C)c2ncc(CN)cc12.Cl',
 'Cc1nnc(CCCN)s1.Cl',
 'Cc1nnc(N)s1',
 'Cc1nncn1CCCN.Cl',
 'Cc1sc(C)c(CN)c1Br.Cl',
 'Cc1sc(CCN)nc1-c1ccccc1.Cl.Cl',
 'Cc1sc(N)c(C#N)c1C',
 'Cl.Cl.Cn1ccnc1Cn1c(CCCN)nc2c1CCCC2',
 'Cl.Cl.Cn1cncc1CN',
 'Cl.Cl.N=C(N)CCCN',
 'Cl.Cl.NCC(=O)NCCN1CCOCC1',
 'Cl.Cl.NCC(=O)Nc1nccs1',
 'Cl.Cl.NCC1(N2CCOCC2)CCOCC1',
 'Cl.Cl.NCC1(c2ccncc2)CC1',
 'Cl.Cl.NCC1CCN(C(N)=O)C1',
 'Cl.Cl.NCC1CCN(CC(F)F)CC1',
 'Cl.Cl.NCC1CCNC(=O)C1',
 'Cl.Cl.NCC=Cc1cccnc1',
 'Cl.Cl.NCCC(=O)Nc1ccncc1',
 'Cl.Cl.NCCCN1CCNC(=O)C1',
 'Cl.Cl.NCCN1CCS(=O)CC1',
 'Cl.Cl.NCCNC(=O)c1cnccn1',
 'Cl.Cl.NCCc1nc2c(s1)COCC2',
 'Cl.Cl.NCCc1nccn1C(F)F',
 'Cl.Cl.NCCc1nnc2c(=O)[nH]ccn12',
 'Cl.Cl.NCCn1cc(C2CC2)nn1',
 'Cl.Cl.NCCn1cccnc1=O',
 'Cl.Cl.NCCn1cnc2ccsc2c1=O',
 'Cl.Cl.NC[C@@H]1CCO[C@H]1c1cn[nH]c1',
 'Cl.Cl.NCc1cc(=O)nc[nH]1',
 'Cl.Cl.NCc1cc(Br)cc2cccnc12',
 'Cl.Cl.NCc1cc2n(n1)CCCO2',
 'Cl.Cl.NCc1ccc(-n2cncn2)cc1',
 'Cl.Cl.NCc1ccc(O)c2ncccc12',
 'Cl.Cl.NCc1ccc2ccccc2n1',
 'Cl.Cl.NCc1ccc[n+]([O-])c1',
 'Cl.Cl.NCc1cccc(-n2ccnn2)c1',
 'Cl.Cl.NCc1ccncc1C(F)(F)F',
 'Cl.Cl.NCc1cn2cc(Br)ccc2n1',
 'Cl.Cl.NCc1cn[nH]c1',
 'Cl.Cl.NCc1cncc(F)c1',
 'Cl.Cl.NCc1cnsc1',
 'Cl.Cl.NCc1nc(-c2ccncc2)no1',
 'Cl.Cl.NCc1nc2c(F)c(F)ccc2[nH]1',
 'Cl.Cl.NCc1nc2cnccc2s1',
 'Cl.Cl.NCc1ncc[nH]1',
 'Cl.Cl.NCc1nccc2[nH]ccc12',
 'Cl.Cl.NCc1ncccc1F',
 'Cl.Cl.NCc1nccn1-c1ccccc1',
 'Cl.Cl.NCc1nnc(C2CCOC2)[nH]1',
 'Cl.Cl.NCc1nnc2n1CCOCC2',
 'Cl.Cn1c(CN)n[nH]c1=O',
 'Cl.Cn1cc(CN)c(=O)[nH]c1=O',
 'Cl.Cn1cc(CN)c(Br)n1',
 'Cl.Cn1cc(N)ccc1=O',
 'Cl.Cn1nnc(CN)c1C(F)F',
 'Cl.N#CC1(NC(=O)CN)CC1',
 'Cl.N#Cc1ccc(CN)nc1',
 'Cl.NC1CCC(=O)CC1',
 'Cl.NCC(=O)N1CCCO1',
 'Cl.NCC(=O)NC1CCC1',
 'Cl.NCC(=O)NCc1ccncc1',
 'Cl.NCC(F)(F)C(F)(F)F',
 'Cl.NCC(F)(F)C(N)=O',
 'Cl.NCC(F)(F)C1CC1',
 'Cl.NCC(F)(F)CC1CC1',
 'Cl.NCC1(F)CCCCC1',
 'Cl.NCC1(F)CCOC1',
 'Cl.NCC1(O)C2C3CC4C5C3CC2C5C41',
 'Cl.NCC1(OCCO)CCS(=O)(=O)C1',
 'Cl.NCC12CC3CC(CC(C3)C1)C2',
 'Cl.NCC1C=CCC1',
 'Cl.NCC1CC(=O)N(Cc2ccccc2)C1',
 'Cl.NCC1CC(C(N)=O)=NO1',
 'Cl.NCC1CC(CC(N)=O)CO1',
 'Cl.NCC1CC12CCOCC2',
 'Cl.NCC1CC2(C1)CC2(F)F',
 'Cl.NCC1CC2CC1C1CC21',
 'Cl.NCC1CCC2(CCC2)CO1',
 'Cl.NCC1CCCC1',
 'Cl.NCC1CCCC2(CCC2)C1O',
 'Cl.NCC1CCCOC1',
 'Cl.NCC1CCN(c2ccc(Br)cc2)C1',
 'Cl.NCC1CCOC2(CCOCC2)C1',
 'Cl.NCC1CCOCC12CCCC2',
 'Cl.NCC1CNC(=O)C1',
 'Cl.NCC1Cc2ccccc2C1',
 'Cl.NCC1Cc2ccccc2NC1=O',
 'Cl.NCC1OCCc2ccsc21',
 'Cl.NCC=C(Cl)Cl',
 'Cl.NCCC1CC1',
 'Cl.NCCC1CC2(CCC2)CO1',
 'Cl.NCCC1CCCC1(F)F',
 'Cl.NCCC1CN(c2ncnc3[nH]ncc23)c2ccccc21',
 'Cl.NCCCCF',
 'Cl.NCCCCN1C(=O)c2ccccc2C1=O',
 'Cl.NCCCN1C(=O)CCC1=O',
 'Cl.NCCCNC(=O)C1CCC1',
 'Cl.NCCCNC(=O)c1ccc(F)cc1',
 'Cl.NCCCOc1cccc(F)c1',
 'Cl.NCCC[C@@H]1NC(=O)NC1=O',
 'Cl.NCCN1C(=O)SC(=Cc2cccs2)C1=O',
 'Cl.NCCN1C(=O)c2ccccc2S1(=O)=O',
 'Cl.NCCN1CCCS1(=O)=O',
 'Cl.NCCNC(=O)c1ccc(Cl)cc1',
 'Cl.NCCNC(=O)c1ccccc1F',
 'Cl.NCCNC(=O)c1ccn[nH]1',
 'Cl.NCCNC(=O)c1ccno1',
 'Cl.NCCNC(N)=O',
 'Cl.NCCOC1CCS(=O)(=O)C1',
 'Cl.NCCOCC(F)F',
 'Cl.NCCOc1ccc(F)c(F)c1',
 'Cl.NCCS(=O)(=O)C1CCOCC1',
 'Cl.NCCS(=O)(=O)c1ccc(Cl)cc1',
 'Cl.NCCc1c[nH]c2cc(Cl)ccc12',
 'Cl.NCCc1cc(Br)c(Br)s1',
 'Cl.NCCc1ccno1',
 'Cl.NCCn1cnc2sccc2c1=O',
 'Cl.NCCn1cnnn1',
 'Cl.NC[C@@H]1CCCO1',
 'Cl.NC[C@@H]1CCO[C@@H]1c1nc(C2CC2)no1',
 'Cl.NC[C@@H]1C[C@@H]2O[C@H]1[C@H]1C[C@H]12',
 'Cl.NC[C@H]1CC[C@H](C(N)=O)CC1',
 'Cl.NCc1cc(=O)[nH]c2ccccc12',
 'Cl.NCc1cc(C(F)(F)F)co1',
 'Cl.NCc1cc(F)c(Cl)cc1F',
 'Cl.NCc1cc2ccccc2[nH]c1=O',
 'Cl.NCc1ccc(-c2c[nH]c(=O)[nH]c2=O)cc1',
 'Cl.NCc1ccc(-c2nnn[nH]2)cc1',
 'Cl.NCc1ccc(CN2C(=O)CNC2=O)cc1',
 'Cl.NCc1ccc[nH]1',
 'Cl.NCc1ccc[nH]c1=O',
 'Cl.NCc1cccc2cc[nH]c12',
 'Cl.NCc1ccnc(C(N)=O)c1',
 'Cl.NCc1ccsc1C(F)(F)F',
 'Cl.NCc1cnc(Cl)s1',
 'Cl.NCc1cnc2n1CCOC2',
 'Cl.NCc1cnoc1C1CC1',
 'Cl.NCc1cocn1',
 'Cl.NCc1csc(=O)[nH]1',
 'Cl.NCc1cscc1C(F)(F)F',
 'Cl.NCc1nc(-c2ccco2)n[nH]1',
 'Cl.NCc1nc2cc(F)ccc2o1',
 'Cl.NCc1nc2ccccc2c(=O)[nH]1',
 'Cl.NCc1nc2ccccc2o1',
 'Cl.NCc1ncon1',
 'Cl.NCc1nnc(-c2ccncc2)[nH]1',
 'Cl.NCc1nnc2c(=O)[nH]ccn12',
 'Cl.NCc1nnc2ncccn12',
 'Cl.NCc1noc(C2CCOCC2)n1',
 'Cl.NCc1noc2ccc(F)cc12',
 'Cl.Nc1cc(O)ccn1',
 'Cl.Nc1ccc(O)cc1Cl',
 'Cl.Nc1ccc2cccnc2c1',
 'Cl.Nc1cccc2c(=O)cc(-c3nn[nH]n3)oc12',
 'Cl.Nc1ncns1',
 'Cn1cc(-c2ccccc2CN)cn1',
 'Cn1cc(C2(CN)CCCO2)cn1',
 'Cn1cc(CN)cn1',
 'Cn1ccc2cc(N)ccc21',
 'Cn1ccnc1N',
 'Cn1cncc1C(CN)N1CCCC1',
 'Cn1ncc2cc(N)ccc21',
 'Cn1nccc1CN',
 'Cn1nccc1CN1C[C@@H](F)C[C@H]1CN',
 'N#CC1=C(N)CCC1',
 'N#Cc1c(N)cccc1F',
 'N#Cc1c(N)sc2c1CCCC2',
 'N#Cc1cc(F)ccc1N',
 'N#Cc1cc(N)ccc1Cl',
 'N#Cc1cc(N)ccc1F',
 'N#Cc1ccc(N)c([N+](=O)[O-])c1',
 'N#Cc1ccc(N)cc1',
 'N#Cc1ccc(N)cc1C(F)(F)F',
 'N#Cc1cccc(N)n1',
 'N#Cc1ccccc1N',
 'N#Cc1cccnc1N',
 'N#Cc1ccnc(N)c1',
 'N#Cc1cncnc1N',
 'N#Cc1ncc(N)cc1C(F)(F)F',
 'NC/C=C/Br',
 'NC1=NC(=O)CS1',
 'NC1COC1',
 'NCC(O)COc1cccc(Cl)c1Cl',
 'NCC(O)COc1ccccc1Br',
 'NCC1(C2CC2)CCCO1',
 'NCC1(CO)CC2C=CC1C2',
 'NCC1(CO)CC=CC1',
 'NCC1(CO)CCOC1',
 'NCC1(Cc2ccccc2)CC1',
 'NCC1(N2CCOCC2)CC1',
 'NCC1(O)CC1',
 'NCC1(O)CCCC1',
 'NCC1(O)CCSC1',
 'NCC1(OCCO)CCC1',
 'NCC12CC=CC(C1)OC2',
 'NCC12CCCC(CO1)C2',
 'NCC1CC2(CC2)CO1',
 'NCC1CCC(C(F)F)CC1',
 'NCC1CCC(F)(F)CC1',
 'NCC1CCC2CC2C1',
 'NCC1CCCCC(F)(F)C1',
 'NCC1CCN(c2ccncc2)CC1',
 'NCC1CN2CCN1CC2',
 'NCCC(=O)N1CCN(c2ccccn2)CC1',
 'NCCC(=O)NC1CCCC1',
 'NCCC(O)C1CCCC1',
 'NCCC1CCOC1',
 'NCCC1CSC1',
 'NCCC1OCCc2ccccc21',
 'NCCC1SCCS1',
 'NCCCCN1CCCC1',
 'NCCCc1cc(=O)[nH][nH]1',
 'NCCN1CC2CCC1C2',
 'NCCN1CCC2(C1)OCCO2',
 'NCCN1CCCOCC1',
 'NCCN1CC[C@@H](O)C1',
 'NCCNC(=O)c1cccnc1',
 'NCC[C@@H]1COC[C@H]1O',
 'NCCc1ccncc1F',
 'NCCc1coc2ccccc12',
 'NC[C@@H]1CCC(=O)N1',
 'NC[C@@H]1C[C@@H]2C=C[C@H]1C2',
 'NC[C@]1(CO)COC[C@H]2CCCN21',
 'NCc1c(Br)cncc1Br',
 'NCc1c(F)cccc1N1CCCC1',
 'NCc1c[nH]c2ccccc12',
 'NCc1cc(-c2ccccc2)[nH]n1',
 'NCc1cc(=O)c(O)co1',
 'NCc1cc(F)cc(F)c1',
 'NCc1ccc2[nH]ccc2c1',
 'NCc1cccc(C(=O)N2CCCC2)c1',
 'NCc1cccc(C(F)(F)F)c1',
 'NCc1cccc(C(F)(F)F)n1',
 'NCc1cccc(N2CCOCC2)c1',
 'NCc1cccc2c1OCO2',
 'NCc1ccccc1CN1CCCC1=O',
 'NCc1ccccc1CS(=O)(=O)N1CCOCC1',
 'NCc1ccccc1N1CCOCC1',
 'NCc1ccccn1',
 'NCc1cccnc1',
 'NCc1cccnc1N1CCC(C(N)=O)CC1',
 'NCc1cccnc1OC(F)F',
 'NCc1cccs1',
 'NCc1ccnc(-n2cncn2)c1',
 'NCc1ccncn1',
 'NCc1cn(-c2ccccc2)nc1-c1ccncc1',
 'NCc1cn(C(F)F)c2ccccc12',
 'NCc1cnn(Cc2ccccc2)c1',
 'Nc1c(F)cccc1F',
 'Nc1c2c(cc3c1CCC3)CCC2',
 'Nc1c2ccccc2nc2ccccc12',
 'Nc1cc(-c2cccc(Br)c2)no1',
 'Nc1cc(-c2ccccc2)[nH]n1',
 'Nc1cc(Br)ccc1CO',
 'Nc1cc(Br)ccn1',
 'Nc1cc(C(F)(F)F)cc(C(F)(F)F)c1',
 'Nc1cc(C(F)(F)F)cnc1Cl',
 'Nc1cc(CO)ccn1',
 'Nc1cc(Cl)c(F)c(Cl)c1',
 'Nc1cc(Cl)c(O)c(Cl)c1',
 'Nc1cc(Cl)c([N+](=O)[O-])cn1',
 'Nc1cc(Cl)ccc1F',
 'Nc1cc(Cl)ccn1',
 'Nc1cc(Cl)cnc1Cl',
 'Nc1cc(Cl)nc(Cl)n1',
 'Nc1cc(Cl)ncn1',
 'Nc1cc(Cl)nnc1Cl',
 'Nc1cc(F)c(F)c(F)c1',
 'Nc1cc(F)c(F)cc1Br',
 'Nc1cc(F)c(F)cc1[N+](=O)[O-]',
 'Nc1cc(F)cc(F)c1',
 'Nc1cc(F)cc(F)c1[N+](=O)[O-]',
 'Nc1cc(F)ccn1',
 'Nc1cc(N2CCCC2)ccn1',
 'Nc1cc(N2CCNCC2)ccc1[N+](=O)[O-]',
 'Nc1cc2cccnc2c2ncccc12',
 'Nc1ccc(-c2ccccc2)cn1',
 'Nc1ccc(-c2cnco2)cc1',
 'Nc1ccc(-c2ncc[nH]2)cc1',
 'Nc1ccc(-n2cccc2)cc1',
 'Nc1ccc(-n2cncn2)cc1',
 'Nc1ccc(Br)c(F)n1',
 'Nc1ccc(C(=O)N2CCOCC2)cc1',
 'Nc1ccc(CC2COC(=O)N2)cc1',
 'Nc1ccc(Cl)c(C(F)(F)F)c1',
 'Nc1ccc(Cl)c(F)c1',
 'Nc1ccc(Cl)cc1F',
 'Nc1ccc(Cl)cn1',
 'Nc1ccc(Cl)nc1',
 'Nc1ccc(Cl)nc1Cl',
 'Nc1ccc(Cn2ccnc2)cc1',
 'Nc1ccc(F)c(C(F)(F)F)c1',
 'Nc1ccc(F)c(Cl)c1',
 'Nc1ccc(F)c(Cl)c1F',
 'Nc1ccc(F)c([N+](=O)[O-])c1',
 'Nc1ccc(F)cc1CO',
 'Nc1ccc(F)cc1Cl',
 'Nc1ccc(F)cc1F',
 'Nc1ccc(F)cn1',
 'Nc1ccc(F)nc1',
 'Nc1ccc(N2CCC(N3CCOCC3)CC2)cc1',
 'Nc1ccc(N2CCC=C(N3CCOCC3)C2=O)cc1',
 'Nc1ccc(N2CCCC2=O)cc1',
 'Nc1ccc(N2CCOCC2)c(F)c1',
 'Nc1ccc(N2CCOCC2)cc1',
 'Nc1ccc(N2CCOCC2)cn1',
 'Nc1ccc(N2CCOCC2=O)cc1',
 'Nc1ccc(O)cc1C(F)(F)F',
 'Nc1ccc(O)cn1',
 'Nc1ccc(OC(F)(F)F)c(Cl)c1',
 'Nc1ccc(OC(F)F)cc1',
 'Nc1ccc([N+](=O)[O-])c(Br)c1',
 'Nc1ccc([N+](=O)[O-])c(C(F)(F)F)c1',
 'Nc1ccc([N+](=O)[O-])c(F)c1',
 'Nc1ccc([N+](=O)[O-])cn1',
 'Nc1ccc2[nH]c(=O)[nH]c2c1',
 'Nc1ccc2[nH]ncc2c1',
 'Nc1ccc2c(c1)C(=O)CCC2',
 'Nc1ccc2c(c1)C(O)CCC2',
 'Nc1ccc2c(c1)CC(=O)N2',
 'Nc1ccc2c(c1)CCC2=O',
 'Nc1ccc2c(c1)CCCC2=O',
 'Nc1ccc2c(c1)CNC2=O',
 'Nc1ccc2c(c1)CNCC2',
 'Nc1ccc2c(c1)COC2=O',
 'Nc1ccc2c(c1)OCCCO2',
 'Nc1ccc2c(c1)OCO2',
 'Nc1ccc2c(c1)oc1ccccc12',
 'Nc1ccc2cc(Br)ccc2c1',
 'Nc1ccc2cn[nH]c2c1',
 'Nc1ccc2cnccc2c1',
 'Nc1ccc2ncccc2c1',
 'Nc1ccc2nccnc2c1',
 'Nc1ccc2nccnc2c1Br',
 'Nc1ccc2ncoc2c1',
 'Nc1ccc2nsnc2c1',
 'Nc1cccc(-n2cncn2)c1',
 'Nc1cccc(CN2CCCCC2)c1',
 'Nc1cccc(F)n1',
 'Nc1cccc(N2CCCC2)c1',
 'Nc1cccc(N2CCOCC2)c1',
 'Nc1cccc(OC(F)F)c1',
 'Nc1cccc2[nH]ccc12',
 'Nc1cccc2c(=O)[nH][nH]c(=O)c12',
 'Nc1cccc2c1C(=O)N(C1CCC(=O)NC1=O)C2=O',
 'Nc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O',
 'Nc1cccc2cnccc12',
 'Nc1cccc2ncccc12',
 'Nc1cccc2ocnc12',
 'Nc1ccccc1-c1nnn[nH]1',
 'Nc1cccnc1Cl',
 'Nc1cccnc1F',
 'Nc1cccnc1[N+](=O)[O-]',
 'Nc1ccnc(-c2ccccc2)c1',
 'Nc1ccnc(Cl)c1',
 'Nc1ccnc(Cl)n1',
 'Nc1ccncc1Cl',
 'Nc1ccncc1[N+](=O)[O-]',
 'Nc1ccncn1',
 'Nc1ccsc1.O=C(O)C(=O)O',
 'Nc1cn[nH]c1',
 'Nc1cnc(Br)cn1',
 'Nc1cnc(Cl)c(Cl)c1',
 'Nc1cnc(Cl)cc1Cl',
 'Nc1cnc(Cl)cn1',
 'Nc1cnc(Cl)nc1',
 'Nc1cncc(Cl)n1',
 'Nc1cncc(F)c1',
 'Nc1cnccn1',
 'Nc1cncnc1',
 'Nc1nc(-c2ccc(Cl)c(Cl)c2)cs1',
 'Nc1nc(-c2ccc(Cl)cc2)cs1',
 'Nc1nc(-c2ccc(F)cc2)cs1',
 'Nc1nc(-c2cccc([N+](=O)[O-])c2)cs1',
 'Nc1nc(-c2ccccc2Cl)cs1',
 'Nc1nc(-c2cccnc2)cs1',
 'Nc1nc(C2CC2)cs1',
 'Nc1nc(Cl)cc(Cl)n1',
 'Nc1nc(Cl)ccc1Cl',
 'Nc1nc(Cl)ncc1Cl',
 'Nc1nc(F)cc(F)n1',
 'Nc1nc(NCc2ccc(F)cc2)ccc1[N+](=O)[O-]',
 'Nc1ncc(-c2ccccc2)s1',
 'Nc1ncc(Cl)c(Cl)n1',
 'Nc1ncc(Cl)cc1Cl',
 'Nc1ncc(Cl)cc1F',
 'Nc1ncc(Cl)nc1Cl',
 'Nc1ncc(F)cc1[N+](=O)[O-]',
 'Nc1ncc(F)cn1',
 'Nc1ncc([N+](=O)[O-])cn1',
 'Nc1ncc([N+](=O)[O-])s1',
 'Nc1ncc[nH]c1=O',
 'Nc1nccc(-c2cccnc2)n1',
 'Nc1nccc(Cl)n1',
 'Nc1nccc2cccnc12',
 'Nc1ncccc1C=O',
 'Nc1ncccc1CO',
 'Nc1ncccc1F',
 'Nc1nccnc1Br',
 'Nc1nccnc1Cl',
 'Nc1ncco1',
 'Nc1nccs1',
 'Nc1ncnc2c1ncn2C1CCCCO1',
 'Nc1ncncc1Br',
 'Nc1nnc(S)s1',
 'Nc1nncs1',
 'Nc1noc2ccc(F)cc12']

In [None]:
list_bb3 = ['Br.Br.NCC1CCCN1c1cccnn1',
 'Br.NCc1cccc(Br)n1',
 'C#CCOc1ccc(CN)cc1.Cl',
 'C#CCOc1cccc(CN)c1.Cl',
 'C#Cc1ccc(N)cc1',
 'C#Cc1cccc(N)c1',
 'C=C(C)C(=O)NCCN.Cl',
 'C=C(C)COCCN.Cl',
 'C=C(Cl)CN.Cl',
 'C=C1CCC(CN)CC1.Cl',
 'C=CCNC(=O)CN.Cl',
 'C=CCOC(C)CN',
 'C=CCOCCCN',
 'C=CCOCCN',
 'C=CCSCCN',
 'CC(=O)Nc1cccc(N)n1',
 'CC(=O)SCCN.Cl',
 'CC(=O)c1ccc(N)c(F)c1',
 'CC(=O)c1cccc(N)c1',
 'CC(C)(C#N)c1ccc(N)cc1',
 'CC(C)(C)NS(=O)(=O)c1cccc(N)c1',
 'CC(C)(C)OC(=O)N1CCC(n2cc(N)cn2)CC1',
 'CC(C)(C)OC(=O)N1CCN(c2ccccc2N)CC1',
 'CC(C)(C)OC(=O)N1Cc2c(N)n[nH]c2C1(C)C',
 'CC(C)(C)OC(=O)n1ncc2cc(N)ccc21',
 'CC(C)(C)c1cc(N)n[nH]1',
 'CC(C)(C)c1ccc(O)c(N)c1',
 'CC(C)(C)c1nnc(CN)s1.Cl',
 'CC(C)(C)c1ocnc1CN.Cl',
 'CC(C)(CN)C(=O)N1CCCC1',
 'CC(C)(CN)CCC#N',
 'CC(C)(CN)CCS(C)(=O)=O.Cl',
 'CC(C)CC(C)(CN)NC(=O)c1cc(Cl)c(Cl)[nH]1.Cl',
 'CC(C)Cn1cnc2c(N)nc3ccccc3c21',
 'CC(C)NC(=O)NCCN.Cl.Cl',
 'CC(C)c1nc(N)sc1Br',
 'CC(C)c1nnc([C@H]2C[C@H](CN)[C@H](O)C2)[nH]1',
 'CC(CCN)S(C)=O',
 'CC(CN)Cn1cccn1',
 'CC(CN)N1CCC1',
 'CC(CN)OC(C)(C)C.Cl',
 'CC(CN)Oc1ccc(Cl)cc1',
 'CC(CN)S(=O)(=O)N1CCN(c2ccccc2)CC1.Cl.Cl',
 'CC(CN)S(C)=O',
 'CC(CN)S(N)(=O)=O.Cl',
 'CC(CN)Sc1ccccc1',
 'CC(CN)c1c(Cl)cccc1Cl',
 'CC(CN)c1nccs1',
 'CC(F)(F)CN.Cl',
 'CC(O)(CN)CN1CCOCC1',
 'CC(O)(CN)c1ccsc1',
 'CC(O)CCN',
 'CC1(C)CC(CCN)C(=O)N1.Cl',
 'CC1(C)CC(CN)C(C)(C)O1',
 'CC1(C)CCOC1CCN',
 'CC1(C)NC(=O)N(CCCN)C1=O.Cl',
 'CC1(C)OB(c2ccc(N)cc2)OC1(C)C',
 'CC1(CCCCN)OCCO1',
 'CC1(CN)CCC2(CC1)OCCO2',
 'CC1(F)CCN(CCN)C1.Cl.Cl',
 'CC12CCC(CN)(C1)OC2.Cl',
 'CC1=CCN(CCN)CC1.Cl.Cl',
 'CC1CC(CN)C(C)O1',
 'CC1CCCC(CN)O1',
 'CC1CCc2nc(CN)sc2C1',
 'CC1CN(S(=O)(=O)CCN)CC(C)O1',
 'CC1CN(c2cc(CN)ccn2)CCO1',
 'CC1Cc2cc(CN)ccc2O1',
 'CCC#CCN',
 'CCC(C)(O)CCN',
 'CCC(CC)(CN)OC',
 'CCC(CN)Oc1ccccc1C.Cl',
 'CCC1=NN(Cc2ccccc2C)C(=O)C1CCN',
 'CCC1COCCN1CCN.Cl.Cl',
 'CCN(CCCN)S(C)(=O)=O',
 'CCN1C(=O)C[C@H](CN)[C@H]1c1ccncc1',
 'CCN1CCN(Cc2ccc(N)nc2)CC1',
 'CCOC(=O)c1c(C)csc1N',
 'CCOC(=O)c1c[nH]nc1N',
 'CCOC(=O)c1cc(N)[nH]n1',
 'CCOC(=O)c1ccc(O)c(N)c1',
 'CCOC(=O)c1cccnc1N',
 'CCOC(=O)c1cnc(N)[nH]1',
 'CCOC(=O)c1cnc(N)cn1',
 'CCOC(=O)c1cnc(SC)nc1N',
 'CCOC(=O)c1cncnc1N',
 'CCOC(=O)c1cnn(C)c1N',
 'CCOC(=O)c1coc(N)n1',
 'CCOC(=O)c1csc(N)n1',
 'CCOC(=O)c1ncccc1N',
 'CCOC(=O)c1nnc(N)o1',
 'CCOC(=O)c1nonc1N',
 'CCOC(=O)c1sc(N)nc1C',
 'CCOC(CN)CN(C)C',
 'CCOCCCN',
 'CCON(C)C(=O)CN.Cl',
 'CCON(CC)C(=O)CN.Cl',
 'CCOc1cc(N)ccn1',
 'CCOc1cc2c(cc1CN)OC(C)C2.Cl',
 'CCOc1ccnc(N)c1',
 'CCS(=O)(=O)N1CC(CN)C1.Cl',
 'CCS(=O)CCN.Cl',
 'CCSC1CCC1(O)CN',
 'CCSCCN.Cl',
 'CCSc1ccc(CN)cc1',
 'CCc1cc(N)n[nH]1',
 'CCn1cc(N)c(C)n1',
 'CCn1cc2cc(N)ccc2n1',
 'CCn1nccc1CN',
 'CN(C)C(=O)C1CCC(CN)O1.Cl',
 'CN(C)C(CN)c1ccco1',
 'CN(C)C1(CN)CCSC1',
 'CN(C)c1cccnc1CN',
 'CN(C)c1nc(Cl)c(CN)s1.Cl.Cl',
 'CN(C)c1ncccc1CN.Cl.Cl',
 'CN(CC(F)(F)F)C(=O)CN.Cl',
 'CN(CCN)C1CCOCC1',
 'CN(Cc1ccco1)Cc1ccccc1CN',
 'CN1C(=O)Cc2cc(CN)ccc21.Cl',
 'CN1CC(CCN)CC1=O',
 'CN1CC(CN)CC1=O',
 'CN1CC2(CCC1CN)CC2.Cl.Cl',
 'CN1CCN(C(=O)CCN)CC1',
 'CN1CCN(C(=O)CN)CC1',
 'CN1CCN(CCN)CC1',
 'CN1CCN(Cc2ccc(N)cc2C(F)(F)F)CC1',
 'CN1CCN(Cc2cccc(N)c2)CC1',
 'CN1CCN(Cc2ccccc2N)CC1',
 'CN1CCN(c2ccc(N)cc2F)CC1',
 'CN1CCO[C@@H](CN)[C@@H]1c1cnn(C)c1',
 'CNC(=O)C(O)CN.Cl',
 'CNC(=O)COCCN.Cl',
 'CNC(=O)c1cc(Oc2ccc(N)cc2)ccn1',
 'CNC(=O)c1ccc(N)cc1F',
 'COC(=O)Cc1nc(N)sc1C',
 'COC(=O)c1c[nH]c(C(=O)OC)c1N',
 'COC(=O)c1c[nH]nc1N',
 'COC(=O)c1cc(Cl)ccc1N',
 'COC(=O)c1cc(Cl)nc(Cl)c1N',
 'COC(=O)c1cc(F)c(F)cc1N',
 'COC(=O)c1cc(N)cc(F)c1',
 'COC(=O)c1cc(N)cc(O)c1',
 'COC(=O)c1cc(N)ccc1C',
 'COC(=O)c1cc(N)ccc1Cl',
 'COC(=O)c1cc(N)ccc1F',
 'COC(=O)c1cc(N)cs1',
 'COC(=O)c1cc(OC)c(OC)cc1N',
 'COC(=O)c1ccc(N)cc1C',
 'COC(=O)c1ccc(N)cc1O',
 'COC(=O)c1ccc(N)cc1OC',
 'COC(=O)c1ccc2nc(N)sc2c1',
 'COC(=O)c1cccc(N)n1',
 'COC(=O)c1ccnc(N)c1',
 'COC(=O)c1ccsc1N',
 'COC(=O)c1cnc(N)cn1',
 'COC(=O)c1cncc(N)c1',
 'COC(=O)c1cnccc1N',
 'COC(=O)c1cscc1N.Cl',
 'COC(=O)c1nc(Cl)c(Cl)nc1N',
 'COC(=O)c1nc(N)sc1Br',
 'COC(=O)c1nccnc1N',
 'COC(=O)c1occc1N',
 'COC(=O)c1sc(-c2ccc(Cl)cc2)cc1N',
 'COC(=O)c1sc(C)cc1N',
 'COC(=O)c1scc(C)c1N',
 'COC(=O)c1sccc1N',
 'COC(=O)c1scnc1N',
 'COC(C)(CCN)OC',
 'COC(C)(CN)C1CC1',
 'COC(CN)C1CCCOC1',
 'COC(CN)C1CCOC1',
 'COC(CN)CC(N)=O.Cl',
 'COC1(C(F)(F)CN)CCOCC1.Cl',
 'COC1(CN)CCC1',
 'COC1(CN)CCOC1.Cl',
 'COC1(OC)CC(CN)C1',
 'COC1CCC(CCN)CC1',
 'COCC1(CN)CCCCC1',
 'COCCNC(=O)CN.Cl',
 'COCOCCCN',
 'COC[C@H](CN)OC',
 'COCc1ccc(N)cc1',
 'COCc1ccccc1CN',
 'CO[C@@H]1COC[C@H]1n1cc(CN)nn1.Cl',
 'COc1c(F)cc(N)cc1F',
 'COc1c(F)ccc(F)c1CN.Cl',
 'COc1c(N)cccc1F',
 'COc1c(N)ncnc1Cl',
 'COc1cc(Br)ccc1N',
 'COc1cc(C#N)c(F)cc1N',
 'COc1cc(C)c(N)cn1',
 'COc1cc(CN)c(Br)cn1',
 'COc1cc(CN)c2ccccc2n1',
 'COc1cc(CN)sn1.Cl.Cl',
 'COc1cc(F)c(Cl)cc1N',
 'COc1cc(F)ccc1N',
 'COc1cc(N)cc(OC)c1',
 'COc1cc(N)ccn1',
 'COc1cc2c(cc1CN)OCO2',
 'COc1cc2nc(Cl)nc(N)c2cc1OC',
 'COc1ccc(C(CN)N2CCOCC2)cc1OC',
 'COc1ccc(C)nc1CN.Cl.Cl',
 'COc1ccc(CN)c(C)c1OC',
 'COc1ccc(CN)cc1C#N.Cl',
 'COc1ccc(CN)cc1CN1CCOCC1.Cl.Cl',
 'COc1ccc(Cl)c(N)c1.Cl',
 'COc1ccc(N)c(Cl)c1',
 'COc1ccc(N)cc1Cl.Cl',
 'COc1ccc(N)cc1OC',
 'COc1ccc(N)cc1[N+](=O)[O-]',
 'COc1ccc(N)cn1',
 'COc1ccc(N)nc1',
 'COc1ccc(O)c(N)c1',
 'COc1ccc(OC)c(N)c1',
 'COc1ccc([N+](=O)[O-])c(N)n1',
 'COc1ccc2nc(N)sc2c1',
 'COc1ccc2nnc(CN)n2n1',
 'COc1cccc(-c2cc(N)on2)c1',
 'COc1cccc(F)c1CN',
 'COc1cccc(N)c1F',
 'COc1cccc(N)n1',
 'COc1ccccc1-c1nnc(N)s1',
 'COc1ccccc1OCCCCN',
 'COc1ccnc(N)c1',
 'COc1ccnc(N)n1',
 'COc1ccncc1CN',
 'COc1cnc(CN)cn1.Cl.Cl',
 'COc1cnc(N)cn1',
 'COc1cnc(N)nc1',
 'COc1cncc(N)c1',
 'COc1cncc(N)n1',
 'COc1nc(Br)ccc1N',
 'COc1nc(C)ccc1CN',
 'COc1nc(C)nc(N)n1',
 'COc1nc(Cl)ncc1N',
 'COc1ncc(N)cc1C(F)(F)F',
 'COc1ncc(N)cn1',
 'COc1ncccc1N',
 'CS(=O)(=O)CC1(CN)CCOCC1.Cl',
 'CS(=O)(=O)NC(=O)CCN.Cl',
 'CS(=O)(=O)NC1CCCC1CN.Cl',
 'CS(=O)(=O)Nc1ccc(-c2csc(N)n2)cc1',
 'CS(=O)(=O)Nc1cccc(N)c1',
 'CS(=O)(=O)c1ccc(N)cc1',
 'CS(=O)(=O)c1ccc(N)cc1F',
 'CS(=O)(=O)c1cccc(N)c1',
 'CS(=O)C1(CN)CCC1',
 'CS(=O)CC(O)CN.Cl',
 'CS(=O)CCCCN',
 'CSC1(CN)CCOCC1.Cl',
 'CSCc1nnc(CN)[nH]1.Cl',
 'CSSCCN.Cl',
 'CSc1ccc(CN)cc1C#N.Cl',
 'CSc1ccc(CN)o1.Cl',
 'CSc1nnc(CN)s1',
 'C[Si](C)(C)C#Cc1ccc(N)cn1',
 'Cc1[nH][nH]c(=O)c1CCN',
 'Cc1c(Br)cncc1CN.Cl.Cl',
 'Cc1c(CN)oc2ccc(F)cc12.Cl',
 'Cc1c(N)cccc1Br',
 'Cc1c(N)cccc1F',
 'Cc1c([C@@H]2[C@@H](CN)CC(=O)N2C)cnn1C',
 'Cc1cc(=O)oc2cc(N)ccc12',
 'Cc1cc(C#N)c(N)s1',
 'Cc1cc(C#N)cnc1N',
 'Cc1cc(C)c(CN)c(=O)[nH]1.Cl',
 'Cc1cc(C)c(N)c([N+](=O)[O-])c1',
 'Cc1cc(C)nc(N)n1',
 'Cc1cc(CN)c(C)[nH]1',
 'Cc1cc(CN)c(C)o1',
 'Cc1cc(CN)c(C)s1',
 'Cc1cc(CN)cc(Cl)n1.Cl.Cl',
 'Cc1cc(CN)ccc1Oc1ccc(Cl)cc1Cl.Cl',
 'Cc1cc(Cl)nc(Cl)c1N',
 'Cc1cc(Cl)ncc1N',
 'Cc1cc(Cl)nnc1N',
 'Cc1cc(F)ccc1N',
 'Cc1cc(F)ncc1CN',
 'Cc1cc(N)cc(Cl)c1',
 'Cc1cc(N)cc(F)c1',
 'Cc1cc(N)ccc1Cl',
 'Cc1cc(N)ccc1F',
 'Cc1cc(N)ccc1O',
 'Cc1cc(N)ccn1',
 'Cc1cc(N)cnc1Br',
 'Cc1cc(N)n(-c2ccccc2)n1',
 'Cc1cc(N)n(C(C)C)n1',
 'Cc1cc(N)n[nH]1',
 'Cc1cc(N)ncc1Cl',
 'Cc1cc(N)ncc1[N+](=O)[O-]',
 'Cc1cc(N)ncn1',
 'Cc1cc(N)nn1C',
 'Cc1cc(N)nnc1Cl',
 'Cc1cc(N)on1',
 'Cc1cc(O)cc(C)c1N',
 'Cc1cc(O)ccc1N',
 'Cc1cc([N+](=O)[O-])c(Cl)cc1N',
 'Cc1cc([N+](=O)[O-])c(N)cc1Cl',
 'Cc1cc2cc(CN)ccc2[nH]1',
 'Cc1ccc(-c2cc(C(F)(F)F)nc(OCCN)n2)cc1',
 'Cc1ccc(C#N)cc1N',
 'Cc1ccc(C)c(CN)n1',
 'Cc1ccc(CN)c(Cl)n1',
 'Cc1ccc(CN)c(OC2CCOC2)c1',
 'Cc1ccc(CN)n1C',
 'Cc1ccc(CN)nc1C.Cl.Cl',
 'Cc1ccc(Cl)c(N)c1',
 'Cc1ccc(F)c(N)c1',
 'Cc1ccc(N)c(CO)c1',
 'Cc1ccc(N)c(Cl)n1',
 'Cc1ccc(N)cc1Br',
 'Cc1ccc(N)nn1',
 'Cc1ccc(O)c(CN)n1.Cl.Cl',
 'Cc1ccc(S(C)(=O)=O)cc1N',
 'Cc1ccc([N+](=O)[O-])c(N)n1',
 'Cc1ccc2cccc(N)c2n1',
 'Cc1cccc(CCCN)n1',
 'Cc1cccc(N)c1Cl',
 'Cc1cccc(OCCCN)c1C.Cl',
 'Cc1cccc2oc(CCN)nc12.Cl.Cl',
 'Cc1cccc2sc(N)nc12',
 'Cc1ccccc1-c1csc(N)n1',
 'Cc1ccccc1-n1nc(C)c(CN)c1C.Cl.Cl',
 'Cc1cccnc1NC(=O)CCN.Cl.Cl',
 'Cc1ccnc(Cl)c1N',
 'Cc1ccnc(N)c1',
 'Cc1ccncc1N',
 'Cc1ccsc1CN',
 'Cc1cn(-c2cc(N)cc(C(F)(F)F)c2)cn1',
 'Cc1cnc(CCN)s1.Cl.Cl',
 'Cc1cnc(CN)o1.Cl',
 'Cc1cnc(CN)s1',
 'Cc1cnc(Cl)cc1N',
 'Cc1cnc(Cl)nc1N',
 'Cc1cnc(F)c(CN)c1',
 'Cc1cnc(N)cn1',
 'Cc1cnc(N)s1',
 'Cc1cnc(O)c(N)c1',
 'Cc1conc1CN',
 'Cc1csc(N)n1',
 'Cc1cscc1CN',
 'Cc1n[nH]c(N)c1C',
 'Cc1nc(CCN)sc1C',
 'Cc1nc(CN)ccc1C(C)O.Cl.Cl',
 'Cc1nc(CN)oc1C.Cl.Cl',
 'Cc1nc(CN)sc1C',
 'Cc1nc(Cl)c(N)c(Cl)n1',
 'Cc1nc(Cl)ccc1N',
 'Cc1nc(N)ccc1Cl',
 'Cc1nc(N)ccc1[N+](=O)[O-]',
 'Cc1ncc(N)cc1Br',
 'Cc1ncccc1N',
 'Cc1nccn1-c1ncccc1CN',
 'Cc1nccnc1N',
 'Cc1nn(C)c2ncc(CN)cc12.Cl',
 'Cc1nn(CCN)c(C)c1Cl',
 'Cc1nnc(CCCN)s1.Cl',
 'Cc1nnc(N)o1',
 'Cc1nnc(N)s1',
 'Cc1nncn1CCCN.Cl',
 'Cc1nnsc1CN',
 'Cc1sc(C)c(CN)c1Br.Cl',
 'Cc1sc(CCN)nc1-c1ccccc1.Cl.Cl',
 'Cc1sc(CN)nc1C(C)C',
 'Cc1sc(N)c(C#N)c1C',
 'Cl.Cl.Cn1ccnc1Cn1c(CCCN)nc2c1CCCC2',
 'Cl.Cl.Cn1cncc1CN',
 'Cl.Cl.N=C(N)CCCN',
 'Cl.Cl.NCC#Cc1cccnc1',
 'Cl.Cl.NCC(=O)NCCN1CCOCC1',
 'Cl.Cl.NCC(=O)Nc1nccs1',
 'Cl.Cl.NCC(O)c1ccncc1',
 'Cl.Cl.NCC1(N2CCOCC2)CCOCC1',
 'Cl.Cl.NCC1(c2ccncc2)CC1',
 'Cl.Cl.NCC1CCN(C(N)=O)C1',
 'Cl.Cl.NCC1CCN(CC(F)F)CC1',
 'Cl.Cl.NCC1CCNC(=O)C1',
 'Cl.Cl.NCC=Cc1cccnc1',
 'Cl.Cl.NCCC(=O)Nc1ccncc1',
 'Cl.Cl.NCCCN1CCNC(=O)C1',
 'Cl.Cl.NCCN1CCS(=O)CC1',
 'Cl.Cl.NCCNC(=O)c1cnccn1',
 'Cl.Cl.NCCc1nc2c(s1)COCC2',
 'Cl.Cl.NCCc1nccn1C(F)F',
 'Cl.Cl.NCCc1nnc2c(=O)[nH]ccn12',
 'Cl.Cl.NCCn1cc(C2CC2)nn1',
 'Cl.Cl.NCCn1cccnc1=O',
 'Cl.Cl.NCCn1cnc2ccsc2c1=O',
 'Cl.Cl.NC[C@@H]1CCO[C@H]1c1cn[nH]c1',
 'Cl.Cl.NCc1cc(=O)nc[nH]1',
 'Cl.Cl.NCc1cc(Br)cc2cccnc12',
 'Cl.Cl.NCc1cc2n(n1)CCCO2',
 'Cl.Cl.NCc1ccc(-n2cncn2)cc1',
 'Cl.Cl.NCc1ccc(O)c2ncccc12',
 'Cl.Cl.NCc1ccc2ccccc2n1',
 'Cl.Cl.NCc1ccc2scnc2c1',
 'Cl.Cl.NCc1ccc[n+]([O-])c1',
 'Cl.Cl.NCc1cccc(-n2ccnn2)c1',
 'Cl.Cl.NCc1ccncc1C(F)(F)F',
 'Cl.Cl.NCc1cn2cc(Br)ccc2n1',
 'Cl.Cl.NCc1cn[nH]c1',
 'Cl.Cl.NCc1cncc(F)c1',
 'Cl.Cl.NCc1cnsc1',
 'Cl.Cl.NCc1nc(-c2ccncc2)no1',
 'Cl.Cl.NCc1nc2c(F)c(F)ccc2[nH]1',
 'Cl.Cl.NCc1nc2c(s1)CCC2',
 'Cl.Cl.NCc1nc2cnccc2s1',
 'Cl.Cl.NCc1ncc[nH]1',
 'Cl.Cl.NCc1nccc2[nH]ccc12',
 'Cl.Cl.NCc1ncccc1F',
 'Cl.Cl.NCc1nccn1-c1ccccc1',
 'Cl.Cl.NCc1nnc(C2CCOC2)[nH]1',
 'Cl.Cl.NCc1nnc2n1CCOCC2',
 'Cl.Cn1c(CN)n[nH]c1=O',
 'Cl.Cn1cc(CN)c(=O)[nH]c1=O',
 'Cl.Cn1cc(CN)c(Br)n1',
 'Cl.Cn1cc(N)ccc1=O',
 'Cl.Cn1nnc(CN)c1C(F)F',
 'Cl.N#CC1(NC(=O)CN)CC1',
 'Cl.N#Cc1ccc(CN)nc1',
 'Cl.NC1CCC(=O)CC1',
 'Cl.NCC(=O)N1CCCO1',
 'Cl.NCC(=O)NC1CCC1',
 'Cl.NCC(=O)NCc1ccncc1',
 'Cl.NCC(F)(F)C(F)(F)F',
 'Cl.NCC(F)(F)C(N)=O',
 'Cl.NCC(F)(F)C1CC1',
 'Cl.NCC(F)(F)CC1CC1',
 'Cl.NCC1(F)CCCCC1',
 'Cl.NCC1(F)CCOC1',
 'Cl.NCC1(O)C2C3CC4C5C3CC2C5C41',
 'Cl.NCC1(OCCO)CCS(=O)(=O)C1',
 'Cl.NCC12CC3CC(CC(C3)C1)C2',
 'Cl.NCC1C=CCC1',
 'Cl.NCC1CC(=O)N(Cc2ccccc2)C1',
 'Cl.NCC1CC(C(N)=O)=NO1',
 'Cl.NCC1CC(CC(N)=O)CO1',
 'Cl.NCC1CC12CCOCC2',
 'Cl.NCC1CC2(C1)CC2(F)F',
 'Cl.NCC1CC2CC1C1CC21',
 'Cl.NCC1CCC2(CCC2)CO1',
 'Cl.NCC1CCCC1',
 'Cl.NCC1CCCC2(CCC2)C1O',
 'Cl.NCC1CCCOC1',
 'Cl.NCC1CCN(c2ccc(Br)cc2)C1',
 'Cl.NCC1CCOC2(CCOCC2)C1',
 'Cl.NCC1CCOCC12CCCC2',
 'Cl.NCC1CNC(=O)C1',
 'Cl.NCC1Cc2ccccc2C1',
 'Cl.NCC1Cc2ccccc2NC1=O',
 'Cl.NCC1OCCc2ccsc21',
 'Cl.NCC=C(Cl)Cl',
 'Cl.NCCC(=O)NCc1ccccc1',
 'Cl.NCCC1CC1',
 'Cl.NCCC1CC2(CCC2)CO1',
 'Cl.NCCC1CCCC1(F)F',
 'Cl.NCCC1CN(c2ncnc3[nH]ncc23)c2ccccc21',
 'Cl.NCCCCF',
 'Cl.NCCCCN1C(=O)c2ccccc2C1=O',
 'Cl.NCCCN1C(=O)CCC1=O',
 'Cl.NCCCNC(=O)C1CCC1',
 'Cl.NCCCNC(=O)c1ccc(F)cc1',
 'Cl.NCCCOc1cccc(F)c1',
 'Cl.NCCC[C@@H]1NC(=O)NC1=O',
 'Cl.NCCN1C(=O)SC(=Cc2cccs2)C1=O',
 'Cl.NCCN1C(=O)c2ccccc2S1(=O)=O',
 'Cl.NCCN1CCCS1(=O)=O',
 'Cl.NCCNC(=O)c1ccc(Cl)cc1',
 'Cl.NCCNC(=O)c1ccccc1F',
 'Cl.NCCNC(=O)c1ccn[nH]1',
 'Cl.NCCNC(=O)c1ccno1',
 'Cl.NCCNC(N)=O',
 'Cl.NCCOC1CCS(=O)(=O)C1',
 'Cl.NCCOCC(F)F',
 'Cl.NCCOc1ccc(F)c(F)c1',
 'Cl.NCCS(=O)(=O)C1CCOCC1',
 'Cl.NCCS(=O)(=O)Nc1ccccc1',
 'Cl.NCCS(=O)(=O)c1ccc(Cl)cc1',
 'Cl.NCCc1c[nH]c2cc(Cl)ccc12',
 'Cl.NCCc1cc(Br)c(Br)s1',
 'Cl.NCCc1ccno1',
 'Cl.NCCn1cnc2sccc2c1=O',
 'Cl.NCCn1cnnn1',
 'Cl.NC[C@@H]1CCCO1',
 'Cl.NC[C@@H]1CCO[C@@H]1c1nc(C2CC2)no1',
 'Cl.NC[C@@H]1C[C@@H]2O[C@H]1[C@H]1C[C@H]12',
 'Cl.NC[C@H]1CC[C@H](C(N)=O)CC1',
 'Cl.NCc1cc(=O)[nH]c2ccccc12',
 'Cl.NCc1cc(Br)no1',
 'Cl.NCc1cc(C(F)(F)F)co1',
 'Cl.NCc1cc(F)c(Cl)cc1F',
 'Cl.NCc1cc2ccccc2[nH]c1=O',
 'Cl.NCc1ccc(-c2c[nH]c(=O)[nH]c2=O)cc1',
 'Cl.NCc1ccc(-c2nnn[nH]2)cc1',
 'Cl.NCc1ccc(CN2C(=O)CNC2=O)cc1',
 'Cl.NCc1ccc[nH]c1=O',
 'Cl.NCc1cccc2cc[nH]c12',
 'Cl.NCc1ccnc(C(N)=O)c1',
 'Cl.NCc1ccsc1C(F)(F)F',
 'Cl.NCc1cnc(Cl)s1',
 'Cl.NCc1cnc2n1CCOC2',
 'Cl.NCc1cnoc1C1CC1',
 'Cl.NCc1cocn1',
 'Cl.NCc1csc(=O)[nH]1',
 'Cl.NCc1cscc1C(F)(F)F',
 'Cl.NCc1nc(-c2ccco2)n[nH]1',
 'Cl.NCc1nc2cc(F)ccc2o1',
 'Cl.NCc1nc2ccccc2c(=O)[nH]1',
 'Cl.NCc1nc2ccccc2o1',
 'Cl.NCc1ncon1',
 'Cl.NCc1nnc(-c2ccncc2)[nH]1',
 'Cl.NCc1nnc2c(=O)[nH]ccn12',
 'Cl.NCc1nnc2ncccn12',
 'Cl.NCc1noc(C2CCOCC2)n1',
 'Cl.NCc1noc2ccc(F)cc12',
 'Cl.Nc1cc(O)ccn1',
 'Cl.Nc1ccc(O)cc1Cl',
 'Cl.Nc1cccc2c(=O)cc(-c3nn[nH]n3)oc12',
 'Cl.Nc1ncns1',
 'Cn1c(=O)cc(N)[nH]c1=O',
 'Cn1c(N)cc(=O)n(C)c1=O',
 'Cn1cc(-c2ccccc2CN)cn1',
 'Cn1cc(C(F)F)c(CN)n1',
 'Cn1cc(C2(CN)CCCO2)cn1',
 'Cn1cc(CN)cn1',
 'Cn1cc(N)cn1',
 'Cn1ccc(N)n1',
 'Cn1ccc(S(=O)(=O)NCCN)c1',
 'Cn1ccc2cc(N)ccc21',
 'Cn1ccnc1N',
 'Cn1cncc1C(CN)N1CCCC1',
 'Cn1ncc(C#N)c1N',
 'Cn1ncc2cc(N)ccc21',
 'Cn1nccc1CN',
 'Cn1nccc1CN1C[C@@H](F)C[C@H]1CN',
 'Cn1nccc1N',
 'Cn1nccc1[C@@H]1OCC[C@H]1CN',
 'Cn1nnc(N)n1',
 'N#CC1=C(N)CCC1',
 'N#Cc1c(N)cccc1F',
 'N#Cc1c(N)sc2c1CCCC2',
 'N#Cc1c[nH]nc1N',
 'N#Cc1cc(F)ccc1N',
 'N#Cc1cc(N)ccc1Cl',
 'N#Cc1cc(N)ccc1F',
 'N#Cc1ccc(N)c([N+](=O)[O-])c1',
 'N#Cc1ccc(N)cc1',
 'N#Cc1ccc(N)cc1C(F)(F)F',
 'N#Cc1ccc2nc(N)sc2c1',
 'N#Cc1cccc(N)n1',
 'N#Cc1ccccc1N',
 'N#Cc1cccnc1N',
 'N#Cc1ccnc(N)c1',
 'N#Cc1ccsc1N',
 'N#Cc1cnc2c(C#N)cnn2c1N',
 'N#Cc1cncnc1N',
 'N#Cc1cnn(-c2ccccc2)c1N',
 'N#Cc1nc[nH]c1N',
 'N#Cc1ncc(N)cc1C(F)(F)F',
 'NC/C=C/Br',
 'NC1=NC(=O)CS1',
 'NC1COC1',
 'NCC(O)COc1cccc(Cl)c1Cl',
 'NCC(O)COc1ccccc1Br',
 'NCC(O)c1ccc(Cl)s1.O=C(O)C(F)(F)F',
 'NCC(c1cccc(F)c1)N1CCOCC1',
 'NCC(c1cccs1)N1CCOCC1',
 'NCC1(C2CC2)CCCO1',
 'NCC1(CO)CC2C=CC1C2',
 'NCC1(CO)CC=CC1',
 'NCC1(CO)CCOC1',
 'NCC1(Cc2ccccc2)CC1',
 'NCC1(N2CCOCC2)CC1',
 'NCC1(O)CC1',
 'NCC1(O)CCCC1',
 'NCC1(O)CCSC1',
 'NCC1(OCCO)CCC1',
 'NCC1(c2ccc(Cl)cc2Cl)CCCC1',
 'NCC1(c2ccc3c(c2)OCO3)CCCCC1',
 'NCC12CC=CC(C1)OC2',
 'NCC12CCCC(CO1)C2',
 'NCC1CC2(CC2)CO1',
 'NCC1CCC(C(F)F)CC1',
 'NCC1CCC(F)(F)CC1',
 'NCC1CCC2CC2C1',
 'NCC1CCCCC(F)(F)C1',
 'NCC1CCCn2ccnc21',
 'NCC1CCN(c2ccncc2)CC1',
 'NCC1CN2CCN1CC2',
 'NCC1CS(=O)(=O)c2ccccc21',
 'NCC1CSCCN1Cc1ccccc1',
 'NCCC(=O)N1CCN(c2ccccn2)CC1',
 'NCCC(=O)NC1CCCC1',
 'NCCC(=O)NCc1ccncc1',
 'NCCC(O)C1CCCC1',
 'NCCC1CCOC1',
 'NCCC1CSC1',
 'NCCC1OCCc2ccccc21',
 'NCCC1SCCS1',
 'NCCCCN1CCCC1',
 'NCCCc1cc(=O)[nH][nH]1',
 'NCCN1CC2CCC1C2',
 'NCCN1CCC2(C1)OCCO2',
 'NCCN1CCCOCC1',
 'NCCN1CC[C@@H](O)C1',
 'NCCNC(=O)c1cccnc1',
 'NCCOc1cccnc1',
 'NCCS(=O)Cc1ccccc1',
 'NCC[C@@H]1COC[C@H]1O',
 'NCCc1ccc(N2CCOCC2)c(F)c1',
 'NCCc1ccncc1F',
 'NCCc1coc2ccccc12',
 'NCCc1ncc(Br)s1',
 'NCCc1nccc(C2CC2)n1',
 'NC[C@@H]1CCC(=O)N1',
 'NC[C@@H]1C[C@@H]2C=C[C@H]1C2',
 'NC[C@]1(CO)COC[C@H]2CCCN21',
 'NCc1c(Br)cncc1Br',
 'NCc1c(F)cccc1N1CCCC1',
 'NCc1c[nH]c2ccccc12',
 'NCc1cc(-c2ccccc2)[nH]n1',
 'NCc1cc(=O)c(O)co1',
 'NCc1cc(C(F)F)on1',
 'NCc1cc(F)cc(F)c1',
 'NCc1cc2c(s1)CCSC2',
 'NCc1ccc(C(=O)N2CCCC2)cc1',
 'NCc1ccc(CN2CCCC2=O)cc1',
 'NCc1ccc(F)nc1',
 'NCc1ccc(N2CCOCC2)cc1C(F)(F)F',
 'NCc1ccc(NC(=O)c2ccco2)cc1',
 'NCc1ccc(Oc2cccnc2)c(F)c1',
 'NCc1ccc2[nH]ccc2c1',
 'NCc1cccc(C(=O)N2CCCC2)c1',
 'NCc1cccc(C(F)(F)F)c1',
 'NCc1cccc(C(F)(F)F)n1',
 'NCc1cccc(N2CCOCC2)c1',
 'NCc1cccc2c1OCO2',
 'NCc1ccccc1CN1CCCC1=O',
 'NCc1ccccc1CS(=O)(=O)N1CCOCC1',
 'NCc1ccccc1N1CCOCC1',
 'NCc1ccccn1',
 'NCc1cccnc1',
 'NCc1cccnc1N1CCC(C(N)=O)CC1',
 'NCc1cccnc1OC(F)F',
 'NCc1cccs1',
 'NCc1ccnc(-n2cncn2)c1',
 'NCc1ccncn1',
 'NCc1ccon1',
 'NCc1ccsc1',
 'NCc1ccsc1Br',
 'NCc1cn(-c2ccccc2)nc1-c1ccncc1',
 'NCc1cn(C(F)F)c2ccccc12',
 'NCc1cn2cc(Cl)ccc2n1',
 'NCc1cn2cccc(F)c2n1',
 'NCc1cnc(-c2ccccc2)o1',
 'NCc1cnc(-c2ccccc2)s1',
 'NCc1cnc(N2CCOCC2)c(F)c1',
 'NCc1cnn(Cc2ccccc2)c1',
 'NCc1cnns1',
 'NCc1nc(-c2ccsc2)no1',
 'NCc1nc2c(s1)CCCC2',
 'NCc1nc2c(s1)CCCCC2',
 'NCc1nc2ccccc2s1',
 'NCc1nccs1',
 'NCc1noc2c1COCC2',
 'NCc1sncc1Br',
 'Nc1[nH]c(=O)ncc1F',
 'Nc1c(F)cccc1F',
 'Nc1c(O)ncnc1O',
 'Nc1c2c(cc3c1CCC3)CCC2',
 'Nc1c2ccccc2nc2ccccc12',
 'Nc1cc(-c2cccc(Br)c2)no1',
 'Nc1cc(-c2ccccc2)[nH]n1',
 'Nc1cc(=O)[nH]c(=O)[nH]1',
 'Nc1cc(=O)[nH]c(=S)[nH]1',
 'Nc1cc(Br)ccc1CO',
 'Nc1cc(Br)ccn1',
 'Nc1cc(Br)cn2ccnc12',
 'Nc1cc(C(F)(F)F)cc(C(F)(F)F)c1',
 'Nc1cc(C(F)(F)F)cnc1Cl',
 'Nc1cc(C2CC2)[nH]n1',
 'Nc1cc(CO)ccn1',
 'Nc1cc(Cl)c(F)c(Cl)c1',
 'Nc1cc(Cl)c(O)c(Cl)c1',
 'Nc1cc(Cl)c([N+](=O)[O-])cn1',
 'Nc1cc(Cl)ccc1F',
 'Nc1cc(Cl)ccn1',
 'Nc1cc(Cl)cnc1Cl',
 'Nc1cc(Cl)nc(Cl)c1[N+](=O)[O-]',
 'Nc1cc(Cl)nc(Cl)n1',
 'Nc1cc(Cl)ncn1',
 'Nc1cc(Cl)nnc1Cl',
 'Nc1cc(F)c(F)c(F)c1',
 'Nc1cc(F)c(F)cc1Br',
 'Nc1cc(F)c(F)cc1[N+](=O)[O-]',
 'Nc1cc(F)cc(F)c1',
 'Nc1cc(F)cc(F)c1[N+](=O)[O-]',
 'Nc1cc(F)ccn1',
 'Nc1cc(N2CCCC2)ccn1',
 'Nc1cc(N2CCNCC2)ccc1[N+](=O)[O-]',
 'Nc1cc2cccnc2c2ncccc12',
 'Nc1cc[nH]c(=O)c1',
 'Nc1cc[nH]c(=O)n1',
 'Nc1cc[nH]n1',
 'Nc1ccc(-c2ccccc2)cn1',
 'Nc1ccc(-c2cnco2)cc1',
 'Nc1ccc(-c2ncc[nH]2)cc1',
 'Nc1ccc(-n2cccc2)cc1',
 'Nc1ccc(-n2cncn2)cc1',
 'Nc1ccc(Br)c(F)n1',
 'Nc1ccc(C(=O)N2CCOCC2)cc1',
 'Nc1ccc(CC2COC(=O)N2)cc1',
 'Nc1ccc(Cl)c(C(F)(F)F)c1',
 'Nc1ccc(Cl)c(F)c1',
 'Nc1ccc(Cl)cc1F',
 'Nc1ccc(Cl)cn1',
 'Nc1ccc(Cl)nc1',
 'Nc1ccc(Cl)nc1Cl',
 'Nc1ccc(Cn2ccnc2)cc1',
 'Nc1ccc(F)c(C(F)(F)F)c1',
 'Nc1ccc(F)c(Cl)c1',
 'Nc1ccc(F)c(Cl)c1F',
 'Nc1ccc(F)c([N+](=O)[O-])c1',
 'Nc1ccc(F)cc1CO',
 'Nc1ccc(F)cc1Cl',
 'Nc1ccc(F)cc1F',
 'Nc1ccc(F)cn1',
 'Nc1ccc(F)nc1',
 'Nc1ccc(N2CCC(N3CCOCC3)CC2)cc1',
 'Nc1ccc(N2CCC=C(N3CCOCC3)C2=O)cc1',
 'Nc1ccc(N2CCCC2=O)cc1',
 'Nc1ccc(N2CCOCC2)c(F)c1',
 'Nc1ccc(N2CCOCC2)cc1',
 'Nc1ccc(N2CCOCC2)cn1',
 'Nc1ccc(N2CCOCC2=O)cc1',
 'Nc1ccc(O)cc1C(F)(F)F',
 'Nc1ccc(O)cn1',
 'Nc1ccc(OC(F)(F)F)c(Cl)c1',
 'Nc1ccc(OC(F)F)cc1',
 'Nc1ccc([N+](=O)[O-])c(Br)c1',
 'Nc1ccc([N+](=O)[O-])c(C(F)(F)F)c1',
 'Nc1ccc([N+](=O)[O-])c(F)c1',
 'Nc1ccc([N+](=O)[O-])cn1',
 'Nc1ccc2[nH]c(=O)[nH]c2c1',
 'Nc1ccc2[nH]ncc2c1',
 'Nc1ccc2c(c1)C(=O)CCC2',
 'Nc1ccc2c(c1)C(O)CCC2',
 'Nc1ccc2c(c1)CC(=O)N2',
 'Nc1ccc2c(c1)CCC2=O',
 'Nc1ccc2c(c1)CCCC2=O',
 'Nc1ccc2c(c1)CNC2=O',
 'Nc1ccc2c(c1)CNCC2',
 'Nc1ccc2c(c1)COC2=O',
 'Nc1ccc2c(c1)OCCCO2',
 'Nc1ccc2c(c1)OCO2',
 'Nc1ccc2c(c1)oc1ccccc12',
 'Nc1ccc2cc(Br)ccc2c1',
 'Nc1ccc2cn[nH]c2c1',
 'Nc1ccc2cnccc2c1',
 'Nc1ccc2ncccc2c1',
 'Nc1ccc2nccn2c1',
 'Nc1ccc2nccnc2c1',
 'Nc1ccc2nccnc2c1Br',
 'Nc1ccc2ncoc2c1',
 'Nc1ccc2nsnc2c1',
 'Nc1cccc(-n2cncn2)c1',
 'Nc1cccc(CN2CCCCC2)c1',
 'Nc1cccc(F)n1',
 'Nc1cccc(N2CCCC2)c1',
 'Nc1cccc(N2CCOCC2)c1',
 'Nc1cccc(OC(F)F)c1',
 'Nc1cccc(OCc2ccccc2)c1',
 'Nc1cccc2[nH]ccc12',
 'Nc1cccc2c(=O)[nH][nH]c(=O)c12',
 'Nc1cccc2c1C(=O)N(C1CCC(=O)NC1=O)C2=O',
 'Nc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O',
 'Nc1cccc2cnccc12',
 'Nc1cccc2ncccc12',
 'Nc1cccc2ocnc12',
 'Nc1ccccc1-c1nnn[nH]1',
 'Nc1cccnc1Cl',
 'Nc1cccnc1F',
 'Nc1cccnc1[N+](=O)[O-]',
 'Nc1ccnc(-c2ccccc2)c1',
 'Nc1ccnc(Cl)c1',
 'Nc1ccnc(Cl)n1',
 'Nc1ccncc1Cl',
 'Nc1ccncc1[N+](=O)[O-]',
 'Nc1ccncn1',
 'Nc1ccsc1.O=C(O)C(=O)O',
 'Nc1cn2nc(Cl)ccc2n1',
 'Nc1cn[nH]c1',
 'Nc1cnc(Br)cn1',
 'Nc1cnc(Cl)c(Cl)c1',
 'Nc1cnc(Cl)cc1Cl',
 'Nc1cnc(Cl)cn1',
 'Nc1cnc(Cl)nc1',
 'Nc1cncc(Cl)n1',
 'Nc1cncc(F)c1',
 'Nc1cnccn1',
 'Nc1cncnc1',
 'Nc1n[nH]c2c(Cl)cc([N+](=O)[O-])cc12',
 'Nc1n[nH]c2c(F)c(F)ccc12',
 'Nc1n[nH]c2cc(Cl)ccc12',
 'Nc1n[nH]c2cc(F)ccc12',
 'Nc1n[nH]c2ccc([N+](=O)[O-])cc12',
 'Nc1n[nH]c2ncccc12',
 'Nc1nc(-c2ccc(Cl)c(Cl)c2)cs1',
 'Nc1nc(-c2ccc(Cl)cc2)cs1',
 'Nc1nc(-c2ccc(F)cc2)cs1',
 'Nc1nc(-c2cccc([N+](=O)[O-])c2)cs1',
 'Nc1nc(-c2ccccc2Cl)cs1',
 'Nc1nc(-c2cccnc2)cs1',
 'Nc1nc(=O)[nH]cc1Br',
 'Nc1nc(Br)cn2ccnc12',
 'Nc1nc(C2CC2)cs1',
 'Nc1nc(Cl)c(C=O)c(Cl)n1',
 'Nc1nc(Cl)c2[nH]cnc2n1',
 'Nc1nc(Cl)c2cc[nH]c2n1',
 'Nc1nc(Cl)c2cn[nH]c2n1',
 'Nc1nc(Cl)cc(Cl)n1',
 'Nc1nc(Cl)ccc1Cl',
 'Nc1nc(Cl)ncc1Cl',
 'Nc1nc(F)cc(F)n1',
 'Nc1nc(F)nc2nc[nH]c12',
 'Nc1nc(NCc2ccc(F)cc2)ccc1[N+](=O)[O-]',
 'Nc1nc(OCc2ccccc2)c2nc[nH]c2n1',
 'Nc1nc2c(Br)cccc2s1',
 'Nc1nc2c(s1)CCCC2',
 'Nc1nc2c(s1)CN(C(=O)OCc1ccccc1)CC2',
 'Nc1nc2c(s1)COCC2',
 'Nc1nc2cc(Br)ccn2n1',
 'Nc1nc2cc([N+](=O)[O-])ccc2[nH]1',
 'Nc1nc2ccc(Cl)cc2o1',
 'Nc1nc2ccc(Cl)cc2s1',
 'Nc1nc2cccc(Br)n2n1',
 'Nc1nc2ccccc2[nH]1',
 'Nc1nc2ccccc2o1',
 'Nc1nc2ccccc2s1',
 'Nc1nc2nc[nH]c2c(=O)[nH]1',
 'Nc1nc[nH]n1',
 'Nc1ncc(-c2ccccc2)s1',
 'Nc1ncc(Cl)c(Cl)n1',
 'Nc1ncc(Cl)cc1Cl',
 'Nc1ncc(Cl)cc1F',
 'Nc1ncc(Cl)nc1Cl',
 'Nc1ncc(F)cc1[N+](=O)[O-]',
 'Nc1ncc(F)cn1',
 'Nc1ncc([N+](=O)[O-])cn1',
 'Nc1ncc([N+](=O)[O-])s1',
 'Nc1ncc[nH]c1=O',
 'Nc1nccc(-c2cccnc2)n1',
 'Nc1nccc(=O)[nH]1',
 'Nc1nccc(Cl)n1',
 'Nc1nccc2cccnc12',
 'Nc1ncccc1C=O',
 'Nc1ncccc1CO',
 'Nc1ncccc1F',
 'Nc1nccnc1Br',
 'Nc1nccnc1Cl',
 'Nc1ncco1',
 'Nc1nccs1',
 'Nc1ncnc(=O)[nH]1',
 'Nc1ncnc(Cl)c1Cl',
 'Nc1ncnc2[nH]cnc12',
 'Nc1ncnc2[nH]ncc12',
 'Nc1ncnc2c1ncn2C1CCCCO1',
 'Nc1ncncc1Br',
 'Nc1nnc(S)s1',
 'Nc1nncs1',
 'Nc1nnn[nH]1',
 'Nc1noc2ccc(F)cc12']

In [None]:
all_training_bbs = pd.Series(list_bb1 + list_bb2 + list_bb3).unique()

## Split-Model-BB Topology

In [None]:
def get_category_encoding_layer(vocabulary=all_training_bbs, max_tokens=None):
    """ dataset needs to be unbatched and without repeat"""
    # Create a layer that turns strings into integer indices.
    index = tf.keras.layers.StringLookup(vocabulary=vocabulary)
    # Encode the integer indices.
    encoder = tf.keras.layers.CategoryEncoding(num_tokens=index.vocabulary_size())

    # Apply multi-hot encoding to the indices. The lambda function captures the
    # layer, so you can use them, or include them in the Keras Functional model later.
    #return lambda feature: encoder(index(feature))
    str_input = tf.keras.Input((1,), dtype=tf.string)
    output = encoder(index(str_input))
    model = tf.keras.Model(inputs=str_input, outputs=output)
    return model

In [None]:
#def clone_model_with_unique_names(model, suffix):
#    config = model.get_config()
#    print(len(config['layers'][0]))
#    print(config['layers'][0]['name'])
#    for layer in config['layers']:
#        layer['config']['name'] = layer['config']['name'] + '_' + suffix
#    cloned_model = tf.keras.models.Model.from_config(config)
#    cloned_model.set_weights(model.get_weights())
#    return cloned_model


def splitted_bb_model(ds):
    """Dataset ds needs to be unbatched and without repeat"""
    ### creates single models for each target protein and concatenates them together
    
    def model_for_one_protein(name=None):
        bb1_input = tf.keras.Input((1,), name='buildingblock1_smiles', dtype=tf.string)
        bb2_input = tf.keras.Input((1,), name='buildingblock2_smiles', dtype=tf.string)
        bb3_input = tf.keras.Input((1,), name='buildingblock3_smiles', dtype=tf.string)
        bb1_enc = get_category_encoding_layer()(bb1_input)
        bb2_enc = get_category_encoding_layer()(bb2_input)
        bb3_enc = get_category_encoding_layer()(bb3_input)
        x = tf.keras.layers.Add()([bb1_enc, bb2_enc, bb3_enc])
        for N in HIDDEN_NEURONS:
            if WITH_DROPOUT:
                x = tf.keras.layers.Dropout(DROPOUT_RATE)(x)
            x = tf.keras.layers.Dense(N, activation=ACTIVATION)(x)
        output = tf.keras.layers.Dense(1)(x)
        return tf.keras.Model(inputs=[bb1_input, bb2_input, bb3_input], outputs=output, name=name)
            
    bb1_input = tf.keras.Input((1,), name='buildingblock1_smiles', dtype=tf.string)
    bb2_input = tf.keras.Input((1,), name='buildingblock2_smiles', dtype=tf.string)
    bb3_input = tf.keras.Input((1,), name='buildingblock3_smiles', dtype=tf.string)
    protein_input = tf.keras.Input((3,), name='oneHot')
    
    #one_core_model = model_for_one_protein()
    #model_BRD4 = clone_model_with_unique_names(one_core_model, "BRD4")([bb1_input, bb2_input, bb3_input])
    #model_HSA  = clone_model_with_unique_names(one_core_model, "HSA")([bb1_input, bb2_input, bb3_input])
    #model_sEH  = clone_model_with_unique_names(one_core_model, "sEH")([bb1_input, bb2_input, bb3_input])
    
    model_BRD4 = model_for_one_protein(name="BRD4model")([bb1_input, bb2_input, bb3_input])
    model_HSA  = model_for_one_protein(name="HSAmodel")([bb1_input, bb2_input, bb3_input])
    model_sEH  = model_for_one_protein(name="sEHmodel")([bb1_input, bb2_input, bb3_input])
    
    
    all_models = tf.keras.layers.Concatenate(axis=1)([model_BRD4, model_HSA, model_sEH])

    output = tf.keras.layers.Dot(axes=[1,1])([protein_input, all_models])
    model = tf.keras.Model(inputs=[bb1_input, bb2_input, bb3_input, protein_input], outputs=output)
    
    # gamma=0 is just the normal BinaryCrossentropy. (But that one does not support training weights alpha.)
    # ATTENTION! The focal cross entropy is much less stable in calculating gradients than the normal binary cross entropy.
    #loss_fn = tf.keras.losses.BinaryFocalCrossentropy(
    #    gamma=0, from_logits=True, apply_class_balancing=True, alpha=1-binds_rate)
    loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    optimizer = tf.keras.optimizers.Adam()
    model.compile(optimizer="adam",
                  loss=loss_fn,
                  metrics=['accuracy']
                 #,run_eagerly=True
                 )
    return model

In [None]:
# if TPU is available:
#tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
#tf.tpu.experimental.initialize_tpu_system(tpu)
#tpu_strategy = tf.distribute.TPUStrategy(tpu)
#with tpu_strategy.scope():

#tf.keras.config.set_floatx('float64') # should use a "DType_Policy" instead
tf.keras.utils.set_random_seed(42)

mirrored_strategy = tf.distribute.MirroredStrategy()
with mirrored_strategy.scope():
    #model = straight_model()
    model = splitted_bb_model(ds)
    #print(model.summary())
    dataset = ds_merged.repeat(N_EPOCHS)
    dataset = dataset.batch(BATCH_SIZE)
    model.fit(dataset,
              epochs=N_EPOCHS,
              steps_per_epoch=STEPS_PER_EPOCH
              #,callbacks=[backup, terminate_on_nan]
              #,callbacks=[reduce_lr_on_nan]
              ,callbacks=[terminate_and_backup, stop_after_5_hours]
             )

## Predict on Test Set

In [None]:
print("Start predicting....")
test_ds = get_ds_csv(test_path, n_samples=N_TEST, labeled=False)
test_ds = test_ds.batch(512)
y = model.predict(test_ds, verbose=0)
print('Done predicting')

## Write Submission File

In [None]:
import pandas as pd
from scipy.special import expit, logit

yy = y[:,0]
d = pd.DataFrame({'id': range(len(yy)), 'binds': yy})
d['id'] = d['id'] + 295246830
d['binds'] = expit(d['binds'])
print("Start writing...")
d.to_csv('submission.csv', index=False, header=True)

print(d['binds'].describe())

## Plot Weight Statistics

In [None]:
import matplotlib.pyplot as plt

BRD4_layers = model.layers[3].layers
BRD4_hidden_layers = BRD4_layers[7 : 7+len(HIDDEN_NEURONS)]
#fig, axs = plt.subplots(len(BRD4_hidden_layers), 2)

for i_layer, layer in enumerate(BRD4_hidden_layers):
    weights, biases = layer.get_weights()
    print("Layer ", i_layer, ":")
    print(pd.Series(weights.flatten()).describe(percentiles=[.25, .5, .75, .95]))
    print()
    #axs[i_layer, 0].hist(weights.flatten(), bins=100)
    #axs[i_layer, 1].hist(biases.flatten(), bins=100)
    
#plt.show()