In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


<h2><b> SMILES tokenization assesses the Tokenizer subsystem on unconditional generation of SMILES and assesses the validity and Tanimoto similarity values. </b></h2>

---

# Install Libraries

In [None]:
!pip install SmilesPE langchain ctransformers torch rdkit

Collecting SmilesPE
  Downloading SmilesPE-0.0.3-py3-none-any.whl (15 kB)
Collecting langchain
  Downloading langchain-0.1.16-py3-none-any.whl (817 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m817.7/817.7 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ctransformers
  Downloading ctransformers-0.2.27-py3-none-any.whl (9.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
Collecting rdkit
  Downloading rdkit-2023.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.4/34.4 MB[0m [31m36.9 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.32 (from lan

In [None]:
from transformers import BertTokenizerFast, BertLMHeadModel
checkpoint = 'unikei/bert-base-smiles'
bert_tokenizer = BertTokenizerFast.from_pretrained(checkpoint)
bert_model = BertLMHeadModel.from_pretrained(checkpoint)

# Initialize empty lists to store encoded inputs
input_ids_list = []
attention_mask_list = []

vocab_file_path = '/content/drive/MyDrive/LLama2HealthCareChatBot-master/SPE_ChEMBL.txt'

tokenizer_config.json:   0%|          | 0.00/315 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/306k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/777k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`


# Default SMILESPE Tokenizer

In [None]:
# SMILESPE Fragment Level Tokenizer
import codecs
from SmilesPE.tokenizer import *

def test_spe():
    vocab_file_path = '/content/drive/MyDrive/LLama2HealthCareChatBot-master/SPE_ChEMBL.txt'
    spe_vob= codecs.open(vocab_file_path)
    spe = SPE_Tokenizer(spe_vob)

    smi = 'CC(=O)Oc1ccccc1C(=O)O'
    smiles = spe.tokenize(smi)

    return smiles

# Update BERT Vocab List

In [None]:
def update_tokens(tokeniz, modell, vocab_file_path):
  # Update BERT tokens
  tokeniz.add_tokens(vocab_file_path)

  # Save the updated tokenizer
  tokeniz.save_pretrained(checkpoint)

  # If you modified the vocabulary file, make sure to load it in the model
  modell.resize_token_embeddings(len(tokeniz))

  # Save updated model
  modell.save_pretrained(checkpoint)

  return tokeniz, modell

# Chemical Validity Check Function (using RDKit)

In [None]:
from rdkit import Chem

def chemical_validity_check(smi, file_to_edit):
  m = Chem.MolFromSmiles(smi, sanitize=False)
  if m is None:
    print('invalid', file=file_to_edit)
    return 0
  else:
    print('valid', file=file_to_edit)
    return 1

# Perform Mask Tokenization



In [None]:
from transformers import BertLMHeadModel, BertTokenizerFast
import torch
from rdkit import Chem
from rdkit.Chem import AllChem
import random

def calculate_tanimoto_similarity(smiles1, smiles2):
    # Convert SMILES to RDKit molecules
    mol1 = Chem.MolFromSmiles(smiles1)
    mol2 = Chem.MolFromSmiles(smiles2)

    # Calculate Morgan fingerprints
    fp1 = AllChem.GetMorganFingerprint(mol1, 2)
    fp2 = AllChem.GetMorganFingerprint(mol2, 2)

    # Calculate Tanimoto similarity
    similarity = AllChem.DataStructs.TanimotoSimilarity(fp1, fp2)

    return similarity

def mask_tokenization_process_default(smiles, maximum_count, output_file):
    checkpoint = 'unikei/bert-base-smiles'
    tokenizer = BertTokenizerFast.from_pretrained(checkpoint)
    model = BertLMHeadModel.from_pretrained(checkpoint, is_decoder=True)

    vocab_file_path = '/content/drive/MyDrive/LLama2HealthCareChatBot-master/SPE_ChEMBL.txt'
    spe_vob= codecs.open(vocab_file_path)
    spe = SPE_Tokenizer(spe_vob)

    tokenizer, model = update_tokens(tokenizer, model, vocab_file_path)

    output = []

    print("SMILES Before:", smiles, file=output_file)

    # Tokenize the input
    input_ids = tokenizer.encode(smiles, return_tensors="pt")

    valid = False

    # Set max_count threshold of possible generations to avoid infinite loop
    actual_count = 0

    while (actual_count != maximum_count):  # Generate 1 sequence
        # Generate sequence
        output_sequences = model.generate(
            input_ids,
            max_new_tokens=1,
            do_sample=True,  # Enable sampling (to allow non-deterministic SMILES completion)
            pad_token_id=tokenizer.eos_token, # May have to use better fragment library with pairwise bracket
            output_hidden_states=True
        )

        # Decode the sequence
        decoded_sequence = tokenizer.decode(output_sequences[0], skip_special_tokens=True)

        # Extract the last token (to replace [MASK])
        token = output_sequences[0][-1].item()
        token_str = tokenizer.decode(token)

        # Tokenize input SMILES
        smi = spe.tokenize(smiles)

        tokenized_string = smi.split()
        token_index = random.randint(0, len(tokenized_string) - 1)
        token_to_replace = tokenized_string[token_index]
        print("Token To Replace:", token_to_replace, file=output_file)
        print("Tokenized String:", tokenized_string, file=output_file)

        # Replace the [MASK] token with the predicted token
        decoded_sequence = decoded_sequence.replace(token_to_replace, token_str)

        x = chemical_validity_check(decoded_sequence.replace(" ", ""), output_file)
        similarity = -1
        if x == 1:
            valid = True
            if (Chem.MolFromSmiles(decoded_sequence.replace(" ", "")) != None):
              similarity = calculate_tanimoto_similarity(smiles, decoded_sequence.replace(" ", ""))
        else:
            valid = False

        print("SMILES After: ", decoded_sequence, file=output_file)
        # Calculate the score for the generated sequence
        with torch.no_grad():
            logits = model.forward(input_ids=output_sequences, return_dict=True).logits
            softmax_scores = torch.softmax(logits[0, -1, :], dim=-1)  # Applying softmax to get probabilities
        score = softmax_scores.max().item()  # Taking the maximum probability as the score

        # Append sequence information to output
        output.append({
            "sequence": decoded_sequence,
            'validity': "valid" if valid else "invalid",
            'similarity': similarity,
            "score": score,
        })

        actual_count += 1

    return output

## Generate pipeline to benchmark performance

In [None]:
import pandas as pd
import statistics

def assess_model_validity_similarity(validity_list, similarity_list):
    validity = output['validity']
    similarity = output['similarity']

def run_model_visualize(num_samples, maximum_count, output_file):
    train_dataset = pd.read_csv('/content/drive/MyDrive/LLama2HealthCareChatBot-master/data/BindingAffinity_data/binding_affinity_train.csv')
    smiles_list = list(train_dataset['Canonical SMILE'])

    validity_count = 0

    validity_list = []
    similarity_list = []

    with open(output_file, "a") as output_file:
      for i in range(num_samples):
          smiles = random.choice(smiles_list)
          output_list = mask_tokenization_process_default(smiles, maximum_count, output_file)
          print(output_list)
          for val in output_list:
            validity = val['validity']
            similarity = val['similarity']

            if (validity == 'valid'):
              validity_count += 1

            if similarity != -1:
                similarity_list.append(similarity)


          validity_list.append(validity_count/maximum_count)

    avg_validity = 0
    avg_similarity = 0
    print(validity_list)
    print(similarity_list)
    if (validity_list):
      avg_validity = statistics.mean(validity_list)
    if (similarity_list):
      avg_similarity = statistics.mean(similarity_list)

    return avg_validity, avg_similarity

## Perform assessment of 100 SMILES generation from each of 20 random SMILES

In [None]:
validity, similarity = run_model_visualize(20, 100, 'output.txt')

[19:46:11] SMILES Parse Error: syntax error while parsing: ((1=((2((((3=(N((4=((=((2=(34)((()()N((1)((
[19:46:11] SMILES Parse Error: Failed parsing SMILES '((1=((2((((3=(N((4=((=((2=(34)((()()N((1)((' for input: '((1=((2((((3=(N((4=((=((2=(34)((()()N((1)(('
[19:46:12] SMILES Parse Error: syntax error while parsing: CC1=CC2C(CC3=CN(C4=CC=CC2=C34)C(C)C)N(C1)C(
[19:46:12] SMILES Parse Error: Failed parsing SMILES 'CC1=CC2C(CC3=CN(C4=CC=CC2=C34)C(C)C)N(C1)C(' for input: 'CC1=CC2C(CC3=CN(C4=CC=CC2=C34)C(C)C)N(C1)C('
[19:46:13] SMILES Parse Error: syntax error while parsing: CC1=CC2C(CC3=CN(C4=CC=CC2=C34)C(C)C)N(C1)C(
[19:46:13] SMILES Parse Error: Failed parsing SMILES 'CC1=CC2C(CC3=CN(C4=CC=CC2=C34)C(C)C)N(C1)C(' for input: 'CC1=CC2C(CC3=CN(C4=CC=CC2=C34)C(C)C)N(C1)C('
[19:46:13] SMILES Parse Error: syntax error while parsing: CC1=(C(CC3=CN(C4=CC=(=C34)C(C)C)N(C1)C(
[19:46:13] SMILES Parse Error: Failed parsing SMILES 'CC1=(C(CC3=CN(C4=CC=(=C34)C(C)C)N(C1)C(' for input: 'CC1=(C(CC3=CN(C4=

[{'sequence': '((1 = ((2( ( ((3 = (N ( (4 = (( = ((2 = (34 ) ( ( ( ) ( ) N ( (1 ) ( (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999562501907349}, {'sequence': 'CC1 = CC2C ( CC3 = CN ( C4 = CC = CC2 = C34 ) C ( C ) C ) N ( C1 ) C (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999562501907349}, {'sequence': 'CC1 = CC2C ( CC3 = CN ( C4 = CC = CC2 = C34 ) C ( C ) C ) N ( C1 ) C (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999562501907349}, {'sequence': 'CC1 = (C ( CC3 = CN ( C4 = CC = ( = C34 ) C ( C ) C ) N ( C1 ) C (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999562501907349}, {'sequence': 'CC1 = (C ( CC3 = CN ( C4 = CC = ( = C34 ) C ( C ) C ) N ( C1 ) C (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999562501907349}, {'sequence': 'CC1 = CC2C ( CC3 = CN ( C4 = CC = CC2 = C34 ) C ( C ) C ) N ( C1 ) C (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999562501907349}, {'sequence': 'CC1 = CC2C ( CC3 = CN ( C4 = CC = CC2 = C34 )

[19:47:00] SMILES Parse Error: syntax error while parsing: CCCS(=O)(=O)NC1=C(C(=C(C=C1)F)C(=O)C2=CNC3=NC=C(C=C23)Cl)F(
[19:47:00] SMILES Parse Error: Failed parsing SMILES 'CCCS(=O)(=O)NC1=C(C(=C(C=C1)F)C(=O)C2=CNC3=NC=C(C=C23)Cl)F(' for input: 'CCCS(=O)(=O)NC1=C(C(=C(C=C1)F)C(=O)C2=CNC3=NC=C(C=C23)Cl)F('
[19:47:00] SMILES Parse Error: syntax error while parsing: CCCS(=O)(=O)NC1=C(C(=C(C=C1)F)C(=O)C2=CNC3=NC=C(C=C23)Cl)F(
[19:47:00] SMILES Parse Error: Failed parsing SMILES 'CCCS(=O)(=O)NC1=C(C(=C(C=C1)F)C(=O)C2=CNC3=NC=C(C=C23)Cl)F(' for input: 'CCCS(=O)(=O)NC1=C(C(=C(C=C1)F)C(=O)C2=CNC3=NC=C(C=C23)Cl)F('
[19:47:01] SMILES Parse Error: syntax error while parsing: CCCS(=O)(=O)NC1=C(C(=C(C=C1)F)C(=O)C2=(C3=NC=C(C=C23)Cl)F(
[19:47:01] SMILES Parse Error: Failed parsing SMILES 'CCCS(=O)(=O)NC1=C(C(=C(C=C1)F)C(=O)C2=(C3=NC=C(C=C23)Cl)F(' for input: 'CCCS(=O)(=O)NC1=C(C(=C(C=C1)F)C(=O)C2=(C3=NC=C(C=C23)Cl)F('
[19:47:01] SMILES Parse Error: syntax error while parsing: CCCS(=O)(=O)NC1=C(C(=C(

[{'sequence': 'CCCS ( = O ) ( = O ) NC1 = C ( C ( = C ( C = C1 ) F ) C ( = O ) C2 = CNC3 = NC = C ( C = C23 ) Cl ) F (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9558146595954895}, {'sequence': 'CCCS ( = O ) ( = O ) NC1 = C ( C ( = C ( C = C1 ) F ) C ( = O ) C2 = CNC3 = NC = C ( C = C23 ) Cl ) F (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9558146595954895}, {'sequence': 'CCCS ( = O ) ( = O ) NC1 = C ( C ( = C ( C = C1 ) F ) C ( = O ) C2 = (C3 = NC = C ( C = C23 ) Cl ) F (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9558146595954895}, {'sequence': 'CCCS ( = O ) ( = O ) NC1 = C ( C ( = C ( C = C1 ) F ) C ( = O ) C2 = CNC3 = NC = C ( C = C23 ) Cl ) F (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9558146595954895}, {'sequence': 'CCCS ( = O ) ( = O ) NC1 = C ( C ( = C ( C = C1 ) F ) C ( = O ) C2 = (C3 = NC = C ( C = C23 ) Cl ) F (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9558146595954895}, {'sequence': 'CCCS ( = O ) ( = O ) NC1 = C ( C

[19:48:00] SMILES Parse Error: syntax error while parsing: C1CCC(C1)C(CC#N)N2C=C(C=N2)C3=C4C=CNC4=NC=N3(
[19:48:00] SMILES Parse Error: Failed parsing SMILES 'C1CCC(C1)C(CC#N)N2C=C(C=N2)C3=C4C=CNC4=NC=N3(' for input: 'C1CCC(C1)C(CC#N)N2C=C(C=N2)C3=C4C=CNC4=NC=N3('
[19:48:01] SMILES Parse Error: syntax error while parsing: C1CCC(C1)C(CC#N)N2C=C(C=N2)C3=C4C=(C4=NC=N3(
[19:48:01] SMILES Parse Error: Failed parsing SMILES 'C1CCC(C1)C(CC#N)N2C=C(C=N2)C3=C4C=(C4=NC=N3(' for input: 'C1CCC(C1)C(CC#N)N2C=C(C=N2)C3=C4C=(C4=NC=N3('
[19:48:01] SMILES Parse Error: syntax error while parsing: C1CCC(C1)C(CC#N)(C=C(C=()C3=C4C=CNC4=NC=N3(
[19:48:01] SMILES Parse Error: Failed parsing SMILES 'C1CCC(C1)C(CC#N)(C=C(C=()C3=C4C=CNC4=NC=N3(' for input: 'C1CCC(C1)C(CC#N)(C=C(C=()C3=C4C=CNC4=NC=N3('
[19:48:02] SMILES Parse Error: syntax error while parsing: C1CCC(C1)C(CC#N)(C=C(C=()C3=C4C=CNC4=NC=N3(
[19:48:02] SMILES Parse Error: Failed parsing SMILES 'C1CCC(C1)C(CC#N)(C=C(C=()C3=C4C=CNC4=NC=N3(' for input: '

[{'sequence': 'C1CCC ( C1 ) C ( CC # N ) N2C = C ( C = N2 ) C3 = C4C = CNC4 = NC = N3 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999489784240723}, {'sequence': 'C1CCC ( C1 ) C ( CC # N ) N2C = C ( C = N2 ) C3 = C4C = (C4 = NC = N3 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999489784240723}, {'sequence': 'C1CCC ( C1 ) C ( CC # N ) (C = C ( C = ( ) C3 = C4C = CNC4 = NC = N3 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999489784240723}, {'sequence': 'C1CCC ( C1 ) C ( CC # N ) (C = C ( C = ( ) C3 = C4C = CNC4 = NC = N3 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999489784240723}, {'sequence': 'C1CCC ( C1 ) C ( CC # N ) N2C = C ( C = N2 ) C3 = C4C = CNC4 = NC = N3 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999489784240723}, {'sequence': 'C1CCC ( C1 ) C ( CC # N ) N2C = C ( C = N2 ) C3 = C4C = CNC4 = NC = N3 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999489784240723}, {'sequence': 'C1CCC ( C1 ) C ( CC # N ) N2C 

[19:48:46] SMILES Parse Error: syntax error while parsing: CN1CCN(CC1)CCCOC2=C(C=C3C(=C2)N=CC(=C3NC4=CC(=C(C=C4Cl)Cl)OC)C#N)OC(
[19:48:46] SMILES Parse Error: Failed parsing SMILES 'CN1CCN(CC1)CCCOC2=C(C=C3C(=C2)N=CC(=C3NC4=CC(=C(C=C4Cl)Cl)OC)C#N)OC(' for input: 'CN1CCN(CC1)CCCOC2=C(C=C3C(=C2)N=CC(=C3NC4=CC(=C(C=C4Cl)Cl)OC)C#N)OC('
[19:48:47] SMILES Parse Error: syntax error while parsing: CN1CCN(CC1)CCCOC2=C(C=C3C(=C2)N=CC(=C3NC4=CC(=C(C=C4Cl)Cl)OC)C#N)OC(
[19:48:47] SMILES Parse Error: Failed parsing SMILES 'CN1CCN(CC1)CCCOC2=C(C=C3C(=C2)N=CC(=C3NC4=CC(=C(C=C4Cl)Cl)OC)C#N)OC(' for input: 'CN1CCN(CC1)CCCOC2=C(C=C3C(=C2)N=CC(=C3NC4=CC(=C(C=C4Cl)Cl)OC)C#N)OC('
[19:48:47] SMILES Parse Error: syntax error while parsing: CN1CCN(CC1)CCCOC2=C(C=C3C(=C2)N=CC(=C3NC4=CC(=C(C=C4Cl)Cl)OC)C#N)OC(
[19:48:47] SMILES Parse Error: Failed parsing SMILES 'CN1CCN(CC1)CCCOC2=C(C=C3C(=C2)N=CC(=C3NC4=CC(=C(C=C4Cl)Cl)OC)C#N)OC(' for input: 'CN1CCN(CC1)CCCOC2=C(C=C3C(=C2)N=CC(=C3NC4=CC(=C(C=C4Cl)Cl)OC)C#N)OC(

[{'sequence': 'CN1CCN ( CC1 ) CCCOC2 = C ( C = C3C ( = C2 ) N = CC ( = C3NC4 = CC ( = C ( C = C4Cl ) Cl ) OC ) C # N ) OC (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9998596906661987}, {'sequence': 'CN1CCN ( CC1 ) CCCOC2 = C ( C = C3C ( = C2 ) N = CC ( = C3NC4 = CC ( = C ( C = C4Cl ) Cl ) OC ) C # N ) OC (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9998596906661987}, {'sequence': 'CN1CCN ( CC1 ) CCCOC2 = C ( C = C3C ( = C2 ) N = CC ( = C3NC4 = CC ( = C ( C = C4Cl ) Cl ) OC ) C # N ) OC (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9998596906661987}, {'sequence': 'CN1CCN ( CC1 ) CCC(2 = C ( C = C3C ( = C2 ) N = CC ( = C3NC4 = CC ( = C ( C = C4Cl ) Cl ) ( ) C # N ) ( (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9998596906661987}, {'sequence': 'CN1CCN ( CC1 ) CCCOC2 = C ( C = C3C ( = C2 ) N = CC ( = C3NC4 = CC ( = C ( C = C4Cl ) Cl ) OC ) C # N ) OC (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9998596906661987}, {'sequence': 'CN1CCN (

[19:49:45] SMILES Parse Error: syntax error while parsing: C1CCC(CC1)CCCCNC(=O)C2=COC(=N2)C3C4CCC(C3CC5=CC=CC=C5CC(=O)O)O4=
[19:49:45] SMILES Parse Error: Failed parsing SMILES 'C1CCC(CC1)CCCCNC(=O)C2=COC(=N2)C3C4CCC(C3CC5=CC=CC=C5CC(=O)O)O4=' for input: 'C1CCC(CC1)CCCCNC(=O)C2=COC(=N2)C3C4CCC(C3CC5=CC=CC=C5CC(=O)O)O4='
[19:49:45] SMILES Parse Error: unclosed ring for input: 'C1CCC(CC1)CCCCNC(=O)C2=COC(=N2)C3C4CCC(C3CC5=CC=CC=C5CC(=O)O)O4C1'
[19:49:46] Explicit valence for atom # 35 O, 3, is greater than permitted
[19:49:47] SMILES Parse Error: syntax error while parsing: C1CCC(CC1)CCCCNC(=O)C2=COC(=N2)(C4CCC((CC5=CC=CC=C5CC(=O)O)O4(
[19:49:47] SMILES Parse Error: Failed parsing SMILES 'C1CCC(CC1)CCCCNC(=O)C2=COC(=N2)(C4CCC((CC5=CC=CC=C5CC(=O)O)O4(' for input: 'C1CCC(CC1)CCCCNC(=O)C2=COC(=N2)(C4CCC((CC5=CC=CC=C5CC(=O)O)O4('
[19:49:48] SMILES Parse Error: syntax error while parsing: C1CCC(CC1)CCCCNC(=O)C2=COC(=N2)C3C4CCC(C3CC5=CC=CC=C5CC(=O)O)O4(
[19:49:48] SMILES Parse Error: Failed pa

[{'sequence': 'C1CCC ( CC1 ) CCCCNC ( = O ) C2 = COC ( = N2 ) C3C4CCC ( C3CC5 = CC = CC = C5CC ( = O ) O ) O4 =', 'validity': 'invalid', 'similarity': -1, 'score': 0.9918720126152039}, {'sequence': 'C1CCC ( CC1 ) CCCCNC ( = O ) C2 = COC ( = N2 ) C3C4CCC ( C3CC5 = CC = CC = C5CC ( = O ) O ) O4C1', 'validity': 'invalid', 'similarity': -1, 'score': 0.061020683497190475}, {'sequence': 'C1CCC ( CC1 ) CCCCNC ( = O ) C2 = COC ( = N2 ) C3C4CCC ( C3CC5 = CC = CC = C5CC ( = O ) O ) O4CN', 'validity': 'valid', 'similarity': -1, 'score': 0.07606103271245956}, {'sequence': 'C1CCC ( CC1 ) CCCCNC ( = O ) C2 = COC ( = N2 ) (C4CCC ( (CC5 = CC = CC = C5CC ( = O ) O ) O4 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999529123306274}, {'sequence': 'C1CCC ( CC1 ) CCCCNC ( = O ) C2 = COC ( = N2 ) C3C4CCC ( C3CC5 = CC = CC = C5CC ( = O ) O ) O4 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999529123306274}, {'sequence': 'C1CCC ( CC1 ) CCCCNC ( = O ) C2 = COC ( = N2 ) C3C4CCC ( C3CC5 = C

[19:50:38] SMILES Parse Error: syntax error while parsing: CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=CC=CC=C5(
[19:50:38] SMILES Parse Error: Failed parsing SMILES 'CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=CC=CC=C5(' for input: 'CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=CC=CC=C5('
[19:50:40] SMILES Parse Error: syntax error while parsing: CN1##NN(##N1)##N(=O)N(C)C2=##N=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=##N=##N=C5N
[19:50:40] SMILES Parse Error: Failed parsing SMILES 'CN1##NN(##N1)##N(=O)N(C)C2=##N=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=##N=##N=C5N' for input: 'CN1##NN(##N1)##N(=O)N(C)C2=##N=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=##N=##N=C5N'
[19:50:41] SMILES Parse Error: syntax error while parsing: CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3==C(C=C(C==)C(=O)OC)NC3=O)C5=CC=CC=C5=
[19:50:41] SMILES Parse Error: Failed parsing SMILES 'CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3==C(C=C(C=

[{'sequence': 'CN1CCN ( CC1 ) CC ( = O ) N ( C ) C2 = CC = C ( C = C2 ) NC ( = C3C4 = C ( C = C ( C = C4 ) C ( = O ) OC ) NC3 = O ) C5 = CC = CC = C5 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999556541442871}, {'sequence': 'CN1CCN ( CC1 ) CC ( = O ) N ( C ) C2 = CC = C ( C = C2 ) NC ( = C3C4 = C ( C = C ( C = C4 ) C ( = O ) OC ) NC3 = O ) C5 = CC = CC = C5C', 'validity': 'valid', 'similarity': 0.8548387096774194, 'score': 0.2563951313495636}, {'sequence': 'CN1##NN ( ##N1 ) ##N ( = O ) N ( C ) C2 = ##N = C ( C = C2 ) NC ( = C3C4 = C ( C = C ( C = C4 ) C ( = O ) OC ) NC3 = O ) C5 = ##N = ##N = C5N', 'validity': 'invalid', 'similarity': -1, 'score': 0.47312411665916443}, {'sequence': 'CN1CCN ( CC1 ) CC ( = O ) N ( C ) C2 = CC = C ( C = C2 ) NC ( = C3= = C ( C = C ( C = = ) C ( = O ) OC ) NC3 = O ) C5 = CC = CC = C5 =', 'validity': 'invalid', 'similarity': -1, 'score': 0.9884144067764282}, {'sequence': 'CN1CCN ( CC1 ) CC ( = O ) N ( C ) C2 = CC = C ( C = C2 ) NC ( = C3C4 = C

[19:51:53] SMILES Parse Error: syntax error while parsing: C1=CC(=CC=C1C2=CC=C(C=C2)N)C(=O)O(
[19:51:53] SMILES Parse Error: Failed parsing SMILES 'C1=CC(=CC=C1C2=CC=C(C=C2)N)C(=O)O(' for input: 'C1=CC(=CC=C1C2=CC=C(C=C2)N)C(=O)O('
[19:51:54] SMILES Parse Error: syntax error while parsing: C1=CC(=CC=C1C2=CC=C(C=C2)N)C(=O)O(
[19:51:54] SMILES Parse Error: Failed parsing SMILES 'C1=CC(=CC=C1C2=CC=C(C=C2)N)C(=O)O(' for input: 'C1=CC(=CC=C1C2=CC=C(C=C2)N)C(=O)O('
[19:51:54] SMILES Parse Error: syntax error while parsing: C1=CC(=CC=C1C2=CC=C(C=C2)N)C(=O)O(
[19:51:54] SMILES Parse Error: Failed parsing SMILES 'C1=CC(=CC=C1C2=CC=C(C=C2)N)C(=O)O(' for input: 'C1=CC(=CC=C1C2=CC=C(C=C2)N)C(=O)O('
[19:51:55] SMILES Parse Error: syntax error while parsing: C1=CC(=CC=C1C2=CC=C(C=C2)N)C(=O)O(
[19:51:55] SMILES Parse Error: Failed parsing SMILES 'C1=CC(=CC=C1C2=CC=C(C=C2)N)C(=O)O(' for input: 'C1=CC(=CC=C1C2=CC=C(C=C2)N)C(=O)O('
[19:51:55] SMILES Parse Error: syntax error while parsing: C1=((=(=C1C2=

[{'sequence': 'C1 = CC ( = CC = C1C2 = CC = C ( C = C2 ) N ) C ( = O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999567270278931}, {'sequence': 'C1 = CC ( = CC = C1C2 = CC = C ( C = C2 ) N ) C ( = O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999567270278931}, {'sequence': 'C1 = CC ( = CC = C1C2 = CC = C ( C = C2 ) N ) C ( = O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999567270278931}, {'sequence': 'C1 = CC ( = CC = C1C2 = CC = C ( C = C2 ) N ) C ( = O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999567270278931}, {'sequence': 'C1 = ( ( = ( = C1C2 = ( = C ( C = C2 ) N ) C ( = O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999567270278931}, {'sequence': 'C1 = ( ( = ( = C1C2 = ( = C ( C = C2 ) N ) C ( = O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999567270278931}, {'sequence': 'C1 = CC ( = CC = C1C2 = CC = C ( C = C2 ) N ) C ( = O ) O (', 'validity': 'invalid', 'similarity': -1, 'score

[19:52:38] SMILES Parse Error: syntax error while parsing: COC1=C(C(=CC=C1)OC)(CC2COC3=C(O2)C(=CC=C3)Cl(
[19:52:38] SMILES Parse Error: Failed parsing SMILES 'COC1=C(C(=CC=C1)OC)(CC2COC3=C(O2)C(=CC=C3)Cl(' for input: 'COC1=C(C(=CC=C1)OC)(CC2COC3=C(O2)C(=CC=C3)Cl('
[19:52:39] SMILES Parse Error: syntax error while parsing: COC1=C(C(=CC=C1)OC)OCCNCC2COC3=C(O2)C(=CC=C3)Cl(
[19:52:39] SMILES Parse Error: Failed parsing SMILES 'COC1=C(C(=CC=C1)OC)OCCNCC2COC3=C(O2)C(=CC=C3)Cl(' for input: 'COC1=C(C(=CC=C1)OC)OCCNCC2COC3=C(O2)C(=CC=C3)Cl('
[19:52:39] SMILES Parse Error: syntax error while parsing: COC1=C(C(=CC=C1)OC)OCCNCC2COC3=C(O2)C(=CC=C3)Cl(
[19:52:39] SMILES Parse Error: Failed parsing SMILES 'COC1=C(C(=CC=C1)OC)OCCNCC2COC3=C(O2)C(=CC=C3)Cl(' for input: 'COC1=C(C(=CC=C1)OC)OCCNCC2COC3=C(O2)C(=CC=C3)Cl('
[19:52:40] SMILES Parse Error: syntax error while parsing: COC1=C(C(=CC=C1)OC)OCCNCC2COC3=C(O2)C(=CC=C3)((
[19:52:40] SMILES Parse Error: Failed parsing SMILES 'COC1=C(C(=CC=C1)OC)OCCNCC2

[{'sequence': 'COC1 = C ( C ( = CC = C1 ) OC ) (CC2COC3 = C ( O2 ) C ( = CC = C3 ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999618530273438}, {'sequence': 'COC1 = C ( C ( = CC = C1 ) OC ) OCCNCC2COC3 = C ( O2 ) C ( = CC = C3 ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999618530273438}, {'sequence': 'COC1 = C ( C ( = CC = C1 ) OC ) OCCNCC2COC3 = C ( O2 ) C ( = CC = C3 ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999618530273438}, {'sequence': 'COC1 = C ( C ( = CC = C1 ) OC ) OCCNCC2COC3 = C ( O2 ) C ( = CC = C3 ) ( (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999618530273438}, {'sequence': 'COC1 = C ( C ( = CC = C1 ) OC ) OCCNCC2COC3 = C ( O2 ) C ( = CC = C3 ) ( (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999618530273438}, {'sequence': 'COC1 = C ( C ( = CC = C1 ) OC ) OCCNCC2COC3 = C ( O2 ) C ( = CC = C3 ) ( (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999618530273438}, {'sequence': '(C1 = C ( C (

[19:53:26] SMILES Parse Error: syntax error while parsing: C1=CC(=CC(=C1)O)C2=NC3=C(N=C2C4=CC(=CC=C4)O)N=C(N=C3N)N(
[19:53:26] SMILES Parse Error: Failed parsing SMILES 'C1=CC(=CC(=C1)O)C2=NC3=C(N=C2C4=CC(=CC=C4)O)N=C(N=C3N)N(' for input: 'C1=CC(=CC(=C1)O)C2=NC3=C(N=C2C4=CC(=CC=C4)O)N=C(N=C3N)N('
[19:53:26] SMILES Parse Error: syntax error while parsing: C1(CC((CC((C1)O)C2(NC3(C(N(C2C4(CC((CC(C4)O)N(C(N(C3N)N(
[19:53:26] SMILES Parse Error: Failed parsing SMILES 'C1(CC((CC((C1)O)C2(NC3(C(N(C2C4(CC((CC(C4)O)N(C(N(C3N)N(' for input: 'C1(CC((CC((C1)O)C2(NC3(C(N(C2C4(CC((CC(C4)O)N(C(N(C3N)N('
[19:53:27] SMILES Parse Error: syntax error while parsing: C1=CC(=CC(=C1)O)C2=NC3=C(N=C2C4=CC(=CC=C4)O)N=C(N=C3N)N(
[19:53:27] SMILES Parse Error: Failed parsing SMILES 'C1=CC(=CC(=C1)O)C2=NC3=C(N=C2C4=CC(=CC=C4)O)N=C(N=C3N)N(' for input: 'C1=CC(=CC(=C1)O)C2=NC3=C(N=C2C4=CC(=CC=C4)O)N=C(N=C3N)N('
[19:53:27] SMILES Parse Error: syntax error while parsing: C1=CC(=CC(=C1)O)C2=NC3=C(N=C2C4=CC(=CC=C4)O)N=C

[{'sequence': 'C1 = CC ( = CC ( = C1 ) O ) C2 = NC3 = C ( N = C2C4 = CC ( = CC = C4 ) O ) N = C ( N = C3N ) N (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999710321426392}, {'sequence': 'C1 ( CC ( ( CC ( ( C1 ) O ) C2 ( NC3 ( C ( N ( C2C4 ( CC ( ( CC ( C4 ) O ) N ( C ( N ( C3N ) N (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999710321426392}, {'sequence': 'C1 = CC ( = CC ( = C1 ) O ) C2 = NC3 = C ( N = C2C4 = CC ( = CC = C4 ) O ) N = C ( N = C3N ) N (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999710321426392}, {'sequence': 'C1 = CC ( = CC ( = C1 ) O ) C2 = NC3 = C ( N = C2C4 = CC ( = CC = C4 ) O ) N = C ( N = C3N ) N (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999710321426392}, {'sequence': 'C1 = CC ( = CC ( = C1 ) O ) C2 = NC3 = C ( N = C2C4 = CC ( = CC = C4 ) O ) N = C ( N = C3N ) N (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999710321426392}, {'sequence': 'C1 = CC ( = CC ( = C1 ) O ) C2 = NC3 = C ( N = C2C4 = CC ( = CC =

[19:54:23] SMILES Parse Error: syntax error while parsing: C1CCCC(CCC1)NC2=C(C(=C(C(=C2S(=O)(=O)N)F)F)SCCO)F(
[19:54:23] SMILES Parse Error: Failed parsing SMILES 'C1CCCC(CCC1)NC2=C(C(=C(C(=C2S(=O)(=O)N)F)F)SCCO)F(' for input: 'C1CCCC(CCC1)NC2=C(C(=C(C(=C2S(=O)(=O)N)F)F)SCCO)F('
[19:54:24] SMILES Parse Error: syntax error while parsing: C1CCCC(CCC1)NC2=C(C(=C(C(=C2S(=O)(=O)N)F)F)SCCO)F(
[19:54:24] SMILES Parse Error: Failed parsing SMILES 'C1CCCC(CCC1)NC2=C(C(=C(C(=C2S(=O)(=O)N)F)F)SCCO)F(' for input: 'C1CCCC(CCC1)NC2=C(C(=C(C(=C2S(=O)(=O)N)F)F)SCCO)F('
[19:54:24] SMILES Parse Error: syntax error while parsing: C1CCCC(CCC1)N(=C(C(=C(C(=(S(=O)(=O)N)F)F)SCCO)F(
[19:54:24] SMILES Parse Error: Failed parsing SMILES 'C1CCCC(CCC1)N(=C(C(=C(C(=(S(=O)(=O)N)F)F)SCCO)F(' for input: 'C1CCCC(CCC1)N(=C(C(=C(C(=(S(=O)(=O)N)F)F)SCCO)F('
[19:54:25] SMILES Parse Error: syntax error while parsing: C1CCCC(CCC1)NC2=C(C(=C(C(=C2S(=O)(=O)N)F)F)SCCO)F(
[19:54:25] SMILES Parse Error: Failed parsing SMILES 'C1

[{'sequence': 'C1CCCC ( CCC1 ) NC2 = C ( C ( = C ( C ( = C2S ( = O ) ( = O ) N ) F ) F ) SCCO ) F (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9950889945030212}, {'sequence': 'C1CCCC ( CCC1 ) NC2 = C ( C ( = C ( C ( = C2S ( = O ) ( = O ) N ) F ) F ) SCCO ) F (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9950889945030212}, {'sequence': 'C1CCCC ( CCC1 ) N( = C ( C ( = C ( C ( = (S ( = O ) ( = O ) N ) F ) F ) SCCO ) F (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9950889945030212}, {'sequence': 'C1CCCC ( CCC1 ) NC2 = C ( C ( = C ( C ( = C2S ( = O ) ( = O ) N ) F ) F ) SCCO ) F (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9950889945030212}, {'sequence': 'C1CCCC ( CCC1 ) NC2 = C ( C ( = C ( C ( = C2S ( = O ) ( = O ) N ) F ) F ) SCCO ) F (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9950889945030212}, {'sequence': 'C1CCCC ( CCC1 ) N( = C ( C ( = C ( C ( = (S ( = O ) ( = O ) N ) F ) F ) SCCO ) F (', 'validity': 'invalid', 'similarity': -1, '

[19:55:17] SMILES Parse Error: syntax error while parsing: C1CCNC(C1)C(CO(=CC=CC=C2)O(
[19:55:17] SMILES Parse Error: Failed parsing SMILES 'C1CCNC(C1)C(CO(=CC=CC=C2)O(' for input: 'C1CCNC(C1)C(CO(=CC=CC=C2)O('
[19:55:17] SMILES Parse Error: syntax error while parsing: C1CCNC(C1)C(CO(=CC=CC=C2)O(
[19:55:17] SMILES Parse Error: Failed parsing SMILES 'C1CCNC(C1)C(CO(=CC=CC=C2)O(' for input: 'C1CCNC(C1)C(CO(=CC=CC=C2)O('
[19:55:17] SMILES Parse Error: syntax error while parsing: C1CCNC(C1)C(COCCCC2=CC=CC=C2)O(
[19:55:17] SMILES Parse Error: Failed parsing SMILES 'C1CCNC(C1)C(COCCCC2=CC=CC=C2)O(' for input: 'C1CCNC(C1)C(COCCCC2=CC=CC=C2)O('
[19:55:17] SMILES Parse Error: syntax error while parsing: (CCNC(()C(COCCCC2=CC=CC=C2)O(
[19:55:17] SMILES Parse Error: Failed parsing SMILES '(CCNC(()C(COCCCC2=CC=CC=C2)O(' for input: '(CCNC(()C(COCCCC2=CC=CC=C2)O('
[19:55:18] SMILES Parse Error: syntax error while parsing: C1CCNC(C1)C(COCCCC2=CC=CC=C2)O(
[19:55:18] SMILES Parse Error: Failed parsing S

[{'sequence': 'C1CCNC ( C1 ) C ( CO( = CC = CC = C2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999159574508667}, {'sequence': 'C1CCNC ( C1 ) C ( CO( = CC = CC = C2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999159574508667}, {'sequence': 'C1CCNC ( C1 ) C ( COCCCC2 = CC = CC = C2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999159574508667}, {'sequence': '(CCNC ( ( ) C ( COCCCC2 = CC = CC = C2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999159574508667}, {'sequence': 'C1CCNC ( C1 ) C ( COCCCC2 = CC = CC = C2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999159574508667}, {'sequence': 'C1CCNC ( C1 ) C ( COCCCC2 = CC = CC = C2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999159574508667}, {'sequence': '(CCNC ( ( ) C ( COCCCC2 = CC = CC = C2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999159574508667}, {'sequence': 'C1CCNC ( C1 ) C ( COCCCC2 = CC = CC = C2 ) O (', 'validity'

[19:55:54] SMILES Parse Error: syntax error while parsing: C1=CC=C(C=C1)C2=C(C(=O)C3=CC=CC=C3O2)O(
[19:55:54] SMILES Parse Error: Failed parsing SMILES 'C1=CC=C(C=C1)C2=C(C(=O)C3=CC=CC=C3O2)O(' for input: 'C1=CC=C(C=C1)C2=C(C(=O)C3=CC=CC=C3O2)O('
[19:55:55] SMILES Parse Error: syntax error while parsing: C1=(=C(C=C1)C2=C(C(=O)C3=(=(=C3O2)O(
[19:55:55] SMILES Parse Error: Failed parsing SMILES 'C1=(=C(C=C1)C2=C(C(=O)C3=(=(=C3O2)O(' for input: 'C1=(=C(C=C1)C2=C(C(=O)C3=(=(=C3O2)O('
[19:55:56] SMILES Parse Error: syntax error while parsing: C1=CC=C(C=C1)C2=C(C(=O)C3=CC=CC=C3O2)O(
[19:55:56] SMILES Parse Error: Failed parsing SMILES 'C1=CC=C(C=C1)C2=C(C(=O)C3=CC=CC=C3O2)O(' for input: 'C1=CC=C(C=C1)C2=C(C(=O)C3=CC=CC=C3O2)O('
[19:55:56] SMILES Parse Error: syntax error while parsing: C1=CC=C(C=C1)C2=C(C(=O)C3=CC=CC=C3O2)O(
[19:55:56] SMILES Parse Error: Failed parsing SMILES 'C1=CC=C(C=C1)C2=C(C(=O)C3=CC=CC=C3O2)O(' for input: 'C1=CC=C(C=C1)C2=C(C(=O)C3=CC=CC=C3O2)O('
[19:55:57] SMILES Par

[{'sequence': 'C1 = CC = C ( C = C1 ) C2 = C ( C ( = O ) C3 = CC = CC = C3O2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999691247940063}, {'sequence': 'C1 = ( = C ( C = C1 ) C2 = C ( C ( = O ) C3 = ( = ( = C3O2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999691247940063}, {'sequence': 'C1 = CC = C ( C = C1 ) C2 = C ( C ( = O ) C3 = CC = CC = C3O2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999691247940063}, {'sequence': 'C1 = CC = C ( C = C1 ) C2 = C ( C ( = O ) C3 = CC = CC = C3O2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999691247940063}, {'sequence': 'C1 = CC = C ( C = C1 ) C2 = C ( C ( = O ) C3 = CC = CC = C3O2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999691247940063}, {'sequence': 'C1 = ( = C ( C = C1 ) C2 = C ( C ( = O ) C3 = ( = ( = C3O2 ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999691247940063}, {'sequence': 'C1 = CC = C ( C = C1 ) C2 = C ( C ( = O ) C3 = CC = CC = C3O2

[19:56:46] SMILES Parse Error: syntax error while parsing: C1=CC(=CC=C1SCC2C(C(C(N2)C3=CNC4=C3N=CN=C4N)O)O)Cl(
[19:56:46] SMILES Parse Error: Failed parsing SMILES 'C1=CC(=CC=C1SCC2C(C(C(N2)C3=CNC4=C3N=CN=C4N)O)O)Cl(' for input: 'C1=CC(=CC=C1SCC2C(C(C(N2)C3=CNC4=C3N=CN=C4N)O)O)Cl('
[19:56:46] SMILES Parse Error: syntax error while parsing: C1=CC(=CC=C1SCC2C(C(C(N2)C3=CNC4=C3N=CN=C4N)O)O)Cl(
[19:56:46] SMILES Parse Error: Failed parsing SMILES 'C1=CC(=CC=C1SCC2C(C(C(N2)C3=CNC4=C3N=CN=C4N)O)O)Cl(' for input: 'C1=CC(=CC=C1SCC2C(C(C(N2)C3=CNC4=C3N=CN=C4N)O)O)Cl('
[19:56:47] SMILES Parse Error: syntax error while parsing: C1=CC(=CC=C1S(C(C(C(N2)C3=CNC4=C3N=CN=C4N)O)O)Cl(
[19:56:47] SMILES Parse Error: Failed parsing SMILES 'C1=CC(=CC=C1S(C(C(C(N2)C3=CNC4=C3N=CN=C4N)O)O)Cl(' for input: 'C1=CC(=CC=C1S(C(C(C(N2)C3=CNC4=C3N=CN=C4N)O)O)Cl('
[19:56:47] SMILES Parse Error: syntax error while parsing: C1=CC(=CC=C1SCC2C(C(C(N2)C3=CNC4=C3N=CN=C4N)O)O)Cl(
[19:56:47] SMILES Parse Error: Failed parsing 

[{'sequence': 'C1 = CC ( = CC = C1SCC2C ( C ( C ( N2 ) C3 = CNC4 = C3N = CN = C4N ) O ) O ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999455213546753}, {'sequence': 'C1 = CC ( = CC = C1SCC2C ( C ( C ( N2 ) C3 = CNC4 = C3N = CN = C4N ) O ) O ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999455213546753}, {'sequence': 'C1 = CC ( = CC = C1S(C ( C ( C ( N2 ) C3 = CNC4 = C3N = CN = C4N ) O ) O ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999455213546753}, {'sequence': 'C1 = CC ( = CC = C1SCC2C ( C ( C ( N2 ) C3 = CNC4 = C3N = CN = C4N ) O ) O ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999455213546753}, {'sequence': 'C1 = CC ( = CC = C1SCC2C ( C ( C ( N2 ) C3 = CNC4 = C3N = CN = C4N ) O ) O ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999455213546753}, {'sequence': 'C1 = CC ( = CC = C1SCC2C ( C ( C ( N2 ) C3 = CNC4 = C3N = CN = C4N ) O ) O ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.999945

[19:57:39] SMILES Parse Error: syntax error while parsing: CC1=CC=C(C=C1)N2C(=CC(=N2)C(C)(C)C)NC(=O)NC3=CC=C(C=C3)N4C=NC(=C4Cl)Cl(
[19:57:39] SMILES Parse Error: Failed parsing SMILES 'CC1=CC=C(C=C1)N2C(=CC(=N2)C(C)(C)C)NC(=O)NC3=CC=C(C=C3)N4C=NC(=C4Cl)Cl(' for input: 'CC1=CC=C(C=C1)N2C(=CC(=N2)C(C)(C)C)NC(=O)NC3=CC=C(C=C3)N4C=NC(=C4Cl)Cl('
[19:57:39] SMILES Parse Error: syntax error while parsing: CC1=CC=C(C=C1)N2C(=CC(=N2)C(C)(C)C)NC(=O)NC3=CC=C(C=C3)N4C=NC(=C4Cl)Cl(
[19:57:39] SMILES Parse Error: Failed parsing SMILES 'CC1=CC=C(C=C1)N2C(=CC(=N2)C(C)(C)C)NC(=O)NC3=CC=C(C=C3)N4C=NC(=C4Cl)Cl(' for input: 'CC1=CC=C(C=C1)N2C(=CC(=N2)C(C)(C)C)NC(=O)NC3=CC=C(C=C3)N4C=NC(=C4Cl)Cl('
[19:57:40] SMILES Parse Error: syntax error while parsing: CC1=CC=C(C=C1)N2C(=CC(=N2)C(C)(C)C)NC(=O)NC3=CC=C(C=C3)N4C=NC(=C4Cl)Cl(
[19:57:40] SMILES Parse Error: Failed parsing SMILES 'CC1=CC=C(C=C1)N2C(=CC(=N2)C(C)(C)C)NC(=O)NC3=CC=C(C=C3)N4C=NC(=C4Cl)Cl(' for input: 'CC1=CC=C(C=C1)N2C(=CC(=N2)C(C)(C)C)NC(=O)NC3

[{'sequence': 'CC1 = CC = C ( C = C1 ) N2C ( = CC ( = N2 ) C ( C ) ( C ) C ) NC ( = O ) NC3 = CC = C ( C = C3 ) N4C = NC ( = C4Cl ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999704360961914}, {'sequence': 'CC1 = CC = C ( C = C1 ) N2C ( = CC ( = N2 ) C ( C ) ( C ) C ) NC ( = O ) NC3 = CC = C ( C = C3 ) N4C = NC ( = C4Cl ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999704360961914}, {'sequence': 'CC1 = CC = C ( C = C1 ) N2C ( = CC ( = N2 ) C ( C ) ( C ) C ) NC ( = O ) NC3 = CC = C ( C = C3 ) N4C = NC ( = C4Cl ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999704360961914}, {'sequence': '(1 = ( = C ( C = C1 ) N2C ( = ( ( = N2 ) C ( C ) ( C ) C ) NC ( = O ) NC3 = ( = C ( C = C3 ) N4C = NC ( = C4Cl ) Cl (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999704360961914}, {'sequence': 'CC1 = CC = C ( C = C1 ) N2C ( = CC ( = N2 ) C ( C ) ( C ) C ) NC ( = O ) NC3 = CC = C ( C = C3 ) N4C = NC ( = C4Cl ) Cl (', 'validity': 'invalid', 'simil

[19:58:49] SMILES Parse Error: syntax error while parsing: C(C1C(C(C(C(O1)SC2C(C(C(C(O2)CO)O)O)O)O)O)O)O(
[19:58:49] SMILES Parse Error: Failed parsing SMILES 'C(C1C(C(C(C(O1)SC2C(C(C(C(O2)CO)O)O)O)O)O)O)O(' for input: 'C(C1C(C(C(C(O1)SC2C(C(C(C(O2)CO)O)O)O)O)O)O)O('
[19:58:49] SMILES Parse Error: syntax error while parsing: C(C1C(C(C(C((1)SC2C(C(C(C((2)C()()()()()()()((
[19:58:49] SMILES Parse Error: Failed parsing SMILES 'C(C1C(C(C(C((1)SC2C(C(C(C((2)C()()()()()()()((' for input: 'C(C1C(C(C(C((1)SC2C(C(C(C((2)C()()()()()()()(('
[19:58:50] SMILES Parse Error: syntax error while parsing: C(C1C(C(C(C(O1)SC2C(C(C(C(O2)CO)O)O)O)O)O)O)O(
[19:58:50] SMILES Parse Error: Failed parsing SMILES 'C(C1C(C(C(C(O1)SC2C(C(C(C(O2)CO)O)O)O)O)O)O)O(' for input: 'C(C1C(C(C(C(O1)SC2C(C(C(C(O2)CO)O)O)O)O)O)O)O('
[19:58:50] SMILES Parse Error: syntax error while parsing: C(C1C(C(C(C(O1)SC2C(C(C(C(O2)CO)O)O)O)O)O)O)O(
[19:58:50] SMILES Parse Error: Failed parsing SMILES 'C(C1C(C(C(C(O1)SC2C(C(C(C(O2)CO)O)O)

[{'sequence': 'C ( C1C ( C ( C ( C ( O1 ) SC2C ( C ( C ( C ( O2 ) CO ) O ) O ) O ) O ) O ) O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999078512191772}, {'sequence': 'C ( C1C ( C ( C ( C ( (1 ) SC2C ( C ( C ( C ( (2 ) C( ) ( ) ( ) ( ) ( ) ( ) ( ) ( (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999078512191772}, {'sequence': 'C ( C1C ( C ( C ( C ( O1 ) SC2C ( C ( C ( C ( O2 ) CO ) O ) O ) O ) O ) O ) O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999078512191772}, {'sequence': 'C ( C1C ( C ( C ( C ( O1 ) SC2C ( C ( C ( C ( O2 ) CO ) O ) O ) O ) O ) O ) O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999078512191772}, {'sequence': 'C ( C1C ( C ( C ( C ( O1 ) SC2C ( C ( C ( C ( O2 ) CO ) O ) O ) O ) O ) O ) O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999078512191772}, {'sequence': 'C ( C1C ( C ( C ( C ( O1 ) SC2C ( C ( C ( C ( O2 ) CO ) O ) O ) O ) O ) O ) O ) O (', 'validity': 'invalid', 'similarity': -1, 'sc

[19:59:47] SMILES Parse Error: syntax error while parsing: (1=C2C=C(C=(2=NN1)C3=((=CN=C3)O(((4=(=(=C4)N(
[19:59:47] SMILES Parse Error: Failed parsing SMILES '(1=C2C=C(C=(2=NN1)C3=((=CN=C3)O(((4=(=(=C4)N(' for input: '(1=C2C=C(C=(2=NN1)C3=((=CN=C3)O(((4=(=(=C4)N('
[19:59:48] SMILES Parse Error: unclosed ring for input: 'CC1=C2C=C(C=CC2=NN1)C3=CC(=CN=C3)OCC(CC4=CC=CC=C4)NC1'
[19:59:48] SMILES Parse Error: syntax error while parsing: CC1(C2C(C(C(CC2(NN1)C3(CC((CN(C3)OCC(CC4(CC(CC(C4)N(
[19:59:48] SMILES Parse Error: Failed parsing SMILES 'CC1(C2C(C(C(CC2(NN1)C3(CC((CN(C3)OCC(CC4(CC(CC(C4)N(' for input: 'CC1(C2C(C(C(CC2(NN1)C3(CC((CN(C3)OCC(CC4(CC(CC(C4)N('
[19:59:49] SMILES Parse Error: syntax error while parsing: CC1=C2C=C(C=CC2=NN1)C3=CC(=CN=C3)OCC(==CC=CC=C4)N=
[19:59:49] SMILES Parse Error: Failed parsing SMILES 'CC1=C2C=C(C=CC2=NN1)C3=CC(=CN=C3)OCC(==CC=CC=C4)N=' for input: 'CC1=C2C=C(C=CC2=NN1)C3=CC(=CN=C3)OCC(==CC=CC=C4)N='
[19:59:49] SMILES Parse Error: syntax error while parsing

[{'sequence': 'CC1 = C2C = C ( C = CC2 = NN1 ) C3 = CC ( = CN = C3 ) OCC ( CC4 = CC = CC = C4 ) Nc1ccccc1', 'validity': 'valid', 'similarity': 0.7019230769230769, 'score': 0.7420322299003601}, {'sequence': '(1 = C2C = C ( C = (2 = NN1 ) C3 = ( ( = CN = C3 ) O( ( (4 = ( = ( = C4 ) N (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999618530273438}, {'sequence': 'CC1 = C2C = C ( C = CC2 = NN1 ) C3 = CC ( = CN = C3 ) OCC ( CC4 = CC = CC = C4 ) NC1', 'validity': 'invalid', 'similarity': -1, 'score': 0.07371588051319122}, {'sequence': 'CC1 ( C2C ( C ( C ( CC2 ( NN1 ) C3 ( CC ( ( CN ( C3 ) OCC ( CC4 ( CC ( CC ( C4 ) N (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999618530273438}, {'sequence': 'CC1 = C2C = C ( C = CC2 = NN1 ) C3 = CC ( = CN = C3 ) OCC ( = = CC = CC = C4 ) N =', 'validity': 'invalid', 'similarity': -1, 'score': 0.996298611164093}, {'sequence': 'CC1 = C2C = C ( C = CC2 = NN1 ) C3 = CC ( = CN = C3 ) OCC ( CC4 = CC = CC = C4 ) N (', 'validity': 'invalid', 'sim

[20:00:46] SMILES Parse Error: syntax error while parsing: CN1C2=NC(=NC=C2C=C(C1=O)C3=C(C=CC=C3Cl)Cl)NC4=CC(=CC=C4)SC(
[20:00:46] SMILES Parse Error: Failed parsing SMILES 'CN1C2=NC(=NC=C2C=C(C1=O)C3=C(C=CC=C3Cl)Cl)NC4=CC(=CC=C4)SC(' for input: 'CN1C2=NC(=NC=C2C=C(C1=O)C3=C(C=CC=C3Cl)Cl)NC4=CC(=CC=C4)SC('
[20:00:46] SMILES Parse Error: syntax error while parsing: CN1C2=NC(=NC=C2C=C(C1=O)C3=C(C=CC=C3Cl)Cl)NC4=CC(=CC=C4)SC(
[20:00:46] SMILES Parse Error: Failed parsing SMILES 'CN1C2=NC(=NC=C2C=C(C1=O)C3=C(C=CC=C3Cl)Cl)NC4=CC(=CC=C4)SC(' for input: 'CN1C2=NC(=NC=C2C=C(C1=O)C3=C(C=CC=C3Cl)Cl)NC4=CC(=CC=C4)SC('
[20:00:47] SMILES Parse Error: syntax error while parsing: CN1C2=NC(=NC=C2C=C(C1=O)C3=C(C=CC=C3Cl)Cl)NC4=CC(=CC=C4)SC(
[20:00:47] SMILES Parse Error: Failed parsing SMILES 'CN1C2=NC(=NC=C2C=C(C1=O)C3=C(C=CC=C3Cl)Cl)NC4=CC(=CC=C4)SC(' for input: 'CN1C2=NC(=NC=C2C=C(C1=O)C3=C(C=CC=C3Cl)Cl)NC4=CC(=CC=C4)SC('
[20:00:48] SMILES Parse Error: syntax error while parsing: CN1C2=NC(=NC=C2C=C(C

[{'sequence': 'CN1C2 = NC ( = NC = C2C = C ( C1 = O ) C3 = C ( C = CC = C3Cl ) Cl ) NC4 = CC ( = CC = C4 ) SC (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999641180038452}, {'sequence': 'CN1C2 = NC ( = NC = C2C = C ( C1 = O ) C3 = C ( C = CC = C3Cl ) Cl ) NC4 = CC ( = CC = C4 ) SC (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999641180038452}, {'sequence': 'CN1C2 = NC ( = NC = C2C = C ( C1 = O ) C3 = C ( C = CC = C3Cl ) Cl ) NC4 = CC ( = CC = C4 ) SC (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999641180038452}, {'sequence': 'CN1C2 = NC ( = NC = C2C = C ( C1 = O ) C3 = C ( C = CC = C3Cl ) Cl ) NC4 = CC ( = CC = C4 ) SC (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999641180038452}, {'sequence': 'CN1C2 = NC ( = NC = C2C = C ( C1 = O ) C3 = C ( C = CC = C3Cl ) Cl ) NC4 = CC ( = CC = C4 ) SC (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999641180038452}, {'sequence': 'CN1C2 = NC ( = NC = C2C = C ( C1 = O ) C3 = C ( C = CC = C3Cl ) C

[20:01:49] SMILES Parse Error: syntax error while parsing: CCC((CC1=CC=C(C=C1)C2=CC=CC=C2C3=(((=(3)C4=(C=(C5=C4(=C(5CC(=O)O(
[20:01:49] SMILES Parse Error: Failed parsing SMILES 'CCC((CC1=CC=C(C=C1)C2=CC=CC=C2C3=(((=(3)C4=(C=(C5=C4(=C(5CC(=O)O(' for input: 'CCC((CC1=CC=C(C=C1)C2=CC=CC=C2C3=(((=(3)C4=(C=(C5=C4(=C(5CC(=O)O('
[20:01:49] SMILES Parse Error: syntax error while parsing: CCCN(CC1=CC=C(C=C1)C2=CC=CC=C2C3=NNN=N3)C4=NC=NC5=C4N=CN5CC(=O)O(
[20:01:49] SMILES Parse Error: Failed parsing SMILES 'CCCN(CC1=CC=C(C=C1)C2=CC=CC=C2C3=NNN=N3)C4=NC=NC5=C4N=CN5CC(=O)O(' for input: 'CCCN(CC1=CC=C(C=C1)C2=CC=CC=C2C3=NNN=N3)C4=NC=NC5=C4N=CN5CC(=O)O('
[20:01:50] SMILES Parse Error: syntax error while parsing: CC((CC1=CC=C(C=C1)C2=CC=CC=C2C3=NNN=N3)C4=NC=NC5=C4N=(5CC(=O)O(
[20:01:50] SMILES Parse Error: Failed parsing SMILES 'CC((CC1=CC=C(C=C1)C2=CC=CC=C2C3=NNN=N3)C4=NC=NC5=C4N=(5CC(=O)O(' for input: 'CC((CC1=CC=C(C=C1)C2=CC=CC=C2C3=NNN=N3)C4=NC=NC5=C4N=(5CC(=O)O('
[20:01:51] SMILES Parse Error: 

[{'sequence': 'CCC( ( CC1 = CC = C ( C = C1 ) C2 = CC = CC = C2C3 = ((( = (3 ) C4 = (C = (C5 = C4( = C(5CC ( = O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.999954104423523}, {'sequence': 'CCCN ( CC1 = CC = C ( C = C1 ) C2 = CC = CC = C2C3 = NNN = N3 ) C4 = NC = NC5 = C4N = CN5CC ( = O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.999954104423523}, {'sequence': 'CC( ( CC1 = CC = C ( C = C1 ) C2 = CC = CC = C2C3 = NNN = N3 ) C4 = NC = NC5 = C4N = (5CC ( = O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.999954104423523}, {'sequence': 'CCCN ( CC1 = CC = C ( C = C1 ) C2 = CC = CC = C2C3 = NNN = N3 ) C4 = NC = NC5 = C4N = CN5CC ( = O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.999954104423523}, {'sequence': 'CCCN ( CC1 = CC = C ( C = C1 ) C2 = CC = CC = C2C3 = NNN = N3 ) C4 = NC = NC( = C4N = CN(CC ( = O ) O (', 'validity': 'invalid', 'similarity': -1, 'score': 0.999954104423523}, {'sequence': 'CCCN ( CC1 = CC = C ( C = C1 ) C2 = C

[20:02:54] SMILES Parse Error: unclosed ring for input: 'CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=CC=CC=C51'
[20:02:55] SMILES Parse Error: extra close parentheses while parsing: CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=CC=CC=C5)
[20:02:55] SMILES Parse Error: Failed parsing SMILES 'CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=CC=CC=C5)' for input: 'CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=CC=CC=C5)'
[20:02:56] SMILES Parse Error: syntax error while parsing: CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=CC=CC=C5=
[20:02:56] SMILES Parse Error: Failed parsing SMILES 'CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=CC=CC=C5=' for input: 'CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3C4=C(C=C(C=C4)C(=O)OC)NC3=O)C5=CC=CC=C5='
[20:02:56] SMILES Parse Error: syntax error while parsing: CN1CCN(CC1)CC(=O)N(C)C2=CC=C(C=C2)NC(=C3C4=C(C=C(C=

[{'sequence': 'CN1CCN ( CC1 ) CC ( = O ) N ( C ) C2 = CC = C ( C = C2 ) NC ( = C3C4 = C ( C = C ( C = C4 ) C ( = O ) OC ) NC3 = O ) C5 = CC = CC = C51', 'validity': 'invalid', 'similarity': -1, 'score': 0.14231126010417938}, {'sequence': 'CN1CCN ( CC1 ) CC ( = O ) N ( C ) C2 = CC = C ( C = C2 ) NC ( = C3C4 = C ( C = C ( C = C4 ) C ( = O ) OC ) NC3 = O ) C5 = CC = CC = C5 )', 'validity': 'invalid', 'similarity': -1, 'score': 0.9672983884811401}, {'sequence': 'CN1CCN ( CC1 ) CC ( = O ) N ( C ) C2 = CC = C ( C = C2 ) NC ( = C3C4 = C ( C = C ( C = C4 ) C ( = O ) OC ) NC3 = O ) C5 = CC = CC = C5 =', 'validity': 'invalid', 'similarity': -1, 'score': 0.9884144067764282}, {'sequence': 'CN1CCN ( CC1 ) CC ( = O ) N ( C ) C2 = CC = C ( C = C2 ) NC ( = C3C4 = C ( C = C ( C = C4 ) C ( = O ) OC ) NC3 = O ) = = CC = CC = = =', 'validity': 'invalid', 'similarity': -1, 'score': 0.9884144067764282}, {'sequence': 'CN1CCN ( CC1 ) CC ( = O ) N ( C ) C2 = CC = C ( C = C2 ) NC ( = C3C4 = C ( C = C ( C = C4 )

[20:04:23] SMILES Parse Error: syntax error while parsing: C1=CC=C(C=C1)C2=NC(=C3C=CC(=O)C=C3)C(=C4C=CNC=C4)N2(
[20:04:23] SMILES Parse Error: Failed parsing SMILES 'C1=CC=C(C=C1)C2=NC(=C3C=CC(=O)C=C3)C(=C4C=CNC=C4)N2(' for input: 'C1=CC=C(C=C1)C2=NC(=C3C=CC(=O)C=C3)C(=C4C=CNC=C4)N2('
[20:04:24] SMILES Parse Error: syntax error while parsing: C1=CC=C(C=C1)C2=NC(=C3C=CC(=O)C=C3)C(=C4C=CNC=C4)N2(
[20:04:24] SMILES Parse Error: Failed parsing SMILES 'C1=CC=C(C=C1)C2=NC(=C3C=CC(=O)C=C3)C(=C4C=CNC=C4)N2(' for input: 'C1=CC=C(C=C1)C2=NC(=C3C=CC(=O)C=C3)C(=C4C=CNC=C4)N2('
[20:04:25] SMILES Parse Error: syntax error while parsing: C1=CC=C(C=C1)C2=NC(=C3C=CC(=O)C=C3)C(=C4C=CNC=C4)N2(
[20:04:25] SMILES Parse Error: Failed parsing SMILES 'C1=CC=C(C=C1)C2=NC(=C3C=CC(=O)C=C3)C(=C4C=CNC=C4)N2(' for input: 'C1=CC=C(C=C1)C2=NC(=C3C=CC(=O)C=C3)C(=C4C=CNC=C4)N2('
[20:04:25] SMILES Parse Error: syntax error while parsing: C1=CC=C(C=C1)C2=NC(=C3C=CC(=O)C=C3)C(=C4C=CNC=C4)N2(
[20:04:25] SMILES Parse Error:

[{'sequence': 'C1 = CC = C ( C = C1 ) C2 = NC ( = C3C = CC ( = O ) C = C3 ) C ( = C4C = CNC = C4 ) N2 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999635219573975}, {'sequence': 'C1 = CC = C ( C = C1 ) C2 = NC ( = C3C = CC ( = O ) C = C3 ) C ( = C4C = CNC = C4 ) N2 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999635219573975}, {'sequence': 'C1 = CC = C ( C = C1 ) C2 = NC ( = C3C = CC ( = O ) C = C3 ) C ( = C4C = CNC = C4 ) N2 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999635219573975}, {'sequence': 'C1 = CC = C ( C = C1 ) C2 = NC ( = C3C = CC ( = O ) C = C3 ) C ( = C4C = CNC = C4 ) N2 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999635219573975}, {'sequence': 'C1 = CC = C ( C = C1 ) C2 = NC ( = C3C = CC ( = O ) C = C3 ) C ( = C4C = CNC = C4 ) N2 (', 'validity': 'invalid', 'similarity': -1, 'score': 0.9999635219573975}, {'sequence': 'C1 = CC = C ( C = C1 ) C2 = NC ( = C3C = CC ( = O ) C = C3 ) C ( = C4C = CNC = C4 ) ( (', 'validity': 'in

## Observe Results

In [None]:
print("Validity Percentage:", validity*100)
print("Similarity Percentage: ", similarity*100)

# For 20 SMILES, each with 100 generations, we obtain validity of 0.3905 and similarity of 0.82891.

Validity Percentage: 27.200000000000003
Similarity Percentage:  78.49713607308159


In [None]:
with open('output.txt', "a") as output_file:
    print("Average Validity: ", validity, file=output_file)
    print("Average Similarity: ", similarity, file=output_file)

# Additional Notes and Comments

> Could Explore SEFLIES tokenization as Future Development

In [1]:
# EXTRA NOTES ON IMPLEMENTATION OF PROJECT
"""
                       Inpainting              |            Generation
                       (edit)

Unconditional          [Validity, Similarity]  |        [Validity, Similarity]

Conditional            [Success]               |            {Success}

We can move anticlockwise from the top right quadrant in shaping the decision-making process / direction for the thesis
At each step, you highlight challenges (representing proteins, making Q&A prompts, limitations on token size, finding educational assessments that fit college-level chemistry questions).

Thesis Format
- Results and discussion of results could be a section of each chapter
- Each subsection breaks down the results and the storyline flows (like the research discovery process, the challenges faced)

Example: orphaned parentheses does not satisfy my style of inpainting. This is a challenge. To solve this partially, we explored SELFIES. There are benefits with SELFIES ...
         If SELFIES does not work, you don't have to find a solution depending on time constraints. You can say there needs to be more work done to improve small molecule representation by ... and leave to other.
"""

IndentationError: unexpected indent (<ipython-input-1-d553d348becf>, line 3)