In [2]:
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import Draw

In [3]:
df_LLM1 = pd.read_csv(r'/Users/matthiasgalka/git/ppchem_project/data/LLM_processed/LLM_processed(1).csv')
df_LLM2 = pd.read_csv(r'/Users/matthiasgalka/git/ppchem_project/data/LLM_processed/LLM_processed(2).csv')
df_LLM31 = pd.read_csv(r'/Users/matthiasgalka/git/ppchem_project/data/LLM_processed/LLM_processed(3.1).csv')
df_LLM32 = pd.read_csv(r'/Users/matthiasgalka/git/ppchem_project/data/LLM_processed/LLM_processed(3.2).csv')
df_LLM4 = pd.read_csv(r'/Users/matthiasgalka/git/ppchem_project/data/LLM_processed/LLM_processed(4).csv')

In [4]:
df_LLM1.head() 
df_LLM2.head() 
df_LLM31.shape 
df_LLM32.head() #dataframe is too small just 4116 instead of 4572!!
df_LLM4.shape 

(9147, 11)

In [127]:
def clean_up(Dataframe: pd.DataFrame, tolerance: float = 1):
    """1. Delets all row whitout Rf value or Rf value over 1.0 and without solvent information.
       2. Converts percentage of solvents in a Dataframe from str to float and check if they add up to 100, else drop them.
       3. Strips productSMILES str to a usable SMILES str.

    Args: 
        Dataframe (_type_): Dataframe containing the processed data from the get_value function.
        Dataframe needs to have following columns: 'productSmiles,' 'Rf', 'Percent_A', 'Percent_B'
        tolerance (_type_): float, default = 1, tolerance for the sum of the percentages of the solvents (default is 100% +- 1%)

    """
    size_pre_cleaning = Dataframe.shape[0]  # get the size of the dataframe
    # Drop rows without Rf values
    Dataframe.dropna(subset=['Rf'], inplace = True) 
    
    # Convert Rf value to float
    Dataframe['Rf'] = Dataframe['Rf'].astype(float)
    
    #finds indicies with Rf values over 1.0
    indices_false_Rf = Dataframe[Dataframe["Rf"] > 1].index 
    Dataframe.drop(indices_false_Rf,inplace = True) #drops rows with false Rf values
    
    #check if at least one solvent (either solvent A or solvent B) is given (so check that solvent A and solvent B are not None)
    Dataframe = Dataframe[Dataframe['Solvent_A'].notnull() & Dataframe['Solvent_B'].notnull()].copy()
    
    #convert 'None' entry to 0
    Dataframe.loc[:, 'Percent_A'] = Dataframe['Percent_A'].apply(lambda x: 0 if x is None else x)
    
    #convert 'None' entry to 0
    Dataframe.loc[:, 'Percent_B'] = Dataframe['Percent_B'].apply(lambda x: 0 if x is None else x)
    
    # convert Percentage to float
    Dataframe.loc[:, 'Percent_A'] = Dataframe['Percent_A'].apply(lambda x: float(x)) 
    
    #convert Percantage to float
    Dataframe.loc[:, 'Percent_B'] = Dataframe['Percent_B'].apply(lambda x: float(x)) 
    
    # Drop rows where 'additive_C' is not None
    Dataframe = Dataframe[Dataframe['Additive_C'].isnull()].copy()
    
    # remove [' and '] from the productSimles
    Dataframe.loc[:, 'productSmiles'] = Dataframe['productSmiles'].apply(lambda x: x[2:-2]) 
    
    # check if the sum of the percentages is 100, this at the same time kicks out entries with additives C (+ consider limitations of floating-point arithmetic)
    Dataframe.loc[:, 'sum'] = Dataframe['Percent_A'] + Dataframe['Percent_B']
    Dataframe = Dataframe[(Dataframe['sum'] >= 100 - tolerance) & (Dataframe['sum'] <= 100 + tolerance)].copy()
    
    Dataframe.reset_index(drop=True, inplace=True) # sets the index new from 1 to end

    size_post_cleaning = Dataframe.shape[0]  # get the size of the dataframe after cleaning
    print(f"Size of the dataframe before cleaning: {size_pre_cleaning}")
    print(f"Size of the dataframe after cleaning: {size_post_cleaning}")
    print(f"Number of rows dropped: {size_pre_cleaning - size_post_cleaning}")
    print(f"Percentage of rows dropped: {(size_pre_cleaning - size_post_cleaning) / size_pre_cleaning * 100}%")
    return Dataframe


In [128]:
# connecting all the Datarfames to one and make a csv file

df_LLM = pd.concat([df_LLM1, df_LLM2, df_LLM31, df_LLM32, df_LLM4], axis=0, ignore_index=True)

#needs to be repeated with df_LLM32 with original size

In [105]:
df_LLM.to_csv(r'/Users/matthiasgalka/git/ppchem_project/data/After_LLM.csv')

In [129]:
df_LLM_clean = clean_up(df_LLM)

Size of the dataframe before cleaning: 36123
Size of the dataframe after cleaning: 16617
Number of rows dropped: 19506
Percentage of rows dropped: 53.998837305871604%


In [99]:

def remove_salts(Dataframe: pd.DataFrame):

    indices_false_Rf = Dataframe[Dataframe["productSmiles"].str.contains('\.')].index 
    Dataframe.drop(indices_false_Rf,inplace = True)

    return Dataframe

In [130]:
df_no_salt = remove_salts(df_LLM_clean)

Now I will try some things to solve the Smiles problem (two product Smiles, Salts, Enatiomers)

In [None]:

def find_rows_with_dot(Dataframe: pd.DataFrame):
    
    ''' Finds all entries with product Smiles which are salts, complexes or Molecuels sperated by a '.' and puts them into a
        new Dataframe.

    Args: 
        Datafarme which contains a column called 'productSmiles' with Smiles.
    '''
    df_salts = pd.DataFrame(columns=df.columns)
    
    # Iteriere über jede Zeile im DataFrame
    for index, row in Dataframe.iterrows():
        # Überprüfe, ob der Wert in der Spalte "productSmiles" einen Punkt enthält
        if '.' in row['productSmiles']:
            # Füge die Zeile zum DataFrame df_salts hinzu
            df_salts = pd.concat([df_salts,pd.DataFrame([row])], ignore_index =False)
    
    return df_salts

In [None]:
df_salt = find_rows_with_dot(df_LLM_clean)

  df_salts = pd.concat([df_salts,pd.DataFrame([row])], ignore_index =False)


In [None]:

def remove_salts(Dataframe: pd.DataFrame):
    '''Removes all rows which contain a '.' in the productSmiles. 

    Args: 
       Dataframes with a column called 'productSmiles' containing Smiles.
    
    '''

    indices_false_Rf = Dataframe[Dataframe["productSmiles"].str.contains('\.')].index 
    Dataframe.drop(indices_false_Rf,inplace = True)

    return Dataframe

In [67]:
def canonicalize_smiles(Dataframe: pd.DataFrame, column_name: str):
    """Function that canonicalizes the SMILES strings in the dataframe.

    Args:
        Dataframe (pd.DataFrame): Dataframe containing the extracted data from the US patents, 
                                preprocessed with the get_values, clean_up, and convert_solvents function.
                                
        column_name (str): name of the column that needs to be canonicalized e.g. productSmiles or Solvent_A_Smiles etc.
    """
    for index, row in Dataframe.iterrows():
        try: 
            smiles_to_canon = row[column_name]
            if smiles_to_canon is not None:
                p_mol = Chem.MolFromSmiles(smiles_to_canon)
                if p_mol is not None:
                    smiles_to_canon = Chem.MolToSmiles(p_mol)
                    Dataframe.at[index, 'productSmiles'] = smiles_to_canon
                else:
                    print(f"Could not canonicalize SMILES for product at index {index}, value is {smiles_to_canon}")
        except Exception as e:
            print(e)
            print(f"Error at index {index}, smiles value is {smiles_to_canon}")
        
    return Dataframe

In [54]:
# Example Strigs (Smiles)

S_a_k = "COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',"
a_S_k_a = "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',"
a_S_a_k_a = "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC','"
a_S_a_k_w_a = "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC', '"
S_k_a_w_a_S = "C(=O)(C(F)(F)F)O', 'O=C(CN1N=C(C2=CC=CC=C12)C(=O)N)N1[C@@H]2C[C@@H]2C[C@H]1C(NC1=NN(C=C1)CC(F)(F)F)=O"
En_ = "[Si](C)(C)(C(C)(C)C)O[C@H]1C[C@H](C[C@H]([C@@H]1O[Si](C)(C)C(C)(C)C)C)C1=C(C=NC=C1)N', '[Si](C)(C)(C(C)(C)C)O[C@@H]1C[C@@H](C[C@@H]([C@H]1O[Si](C)(C)C(C)(C)C)C)C1=C(C=NC=C1)N"

test_1 = [S_a_k, a_S_k_a, a_S_a_k_a, a_S_a_k_w_a]

test_2 = [S_a_k, a_S_k_a, a_S_a_k_a, a_S_a_k_w_a, S_k_a_w_a_S, En_]

dic_test_2 = {'productSmiles': test_2}

df_test = pd.DataFrame(dic_test_2)


Lets find a function which removes all komma, apostroph or whitespace from a Smiles. If there is a second Smiles it should seperate them and put it in another column

In [55]:
df_test.head(6)

Unnamed: 0,productSmiles
0,"COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',"
1,"'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',"
2,"'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC','"
3,"'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC', '"
4,"C(=O)(C(F)(F)F)O', 'O=C(CN1N=C(C2=CC=CC=C12)C(..."
5,[Si](C)(C)(C(C)(C)C)O[C@H]1C[C@H](C[C@H]([C@@H...


In [40]:

special_characters = [',', ' ', "'"]

for Smiles in test:

    for char in special_characters:
        Smiles = Smiles.replace(char, '')
    print(Smiles)
    

COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC
["COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',", "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',", "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC','", "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC', '"]
COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC
["COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',", "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',", "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC','", "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC', '"]
COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC
["COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',", "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',", "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC','", "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC', '"]
COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC
["COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',", "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',", "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC','", "'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC', '"]


In [1]:
def clean_Smiles(productSmiles: str, Dataframe: pd.DataFrame):

    '''


    '''

    Smiles_list = productSmiles.split("', '")

    special_characters = [',', ' ', "'"]

    for Smiles in Smiles_list:

        for char in special_characters:
            Smiles = Smiles.replace(char, '')
    
    if len(Smiles_list) == 2:
        Dataframe.loc[df['productSmiles'] == productSmiles, 'productSmiles'] = Smiles_list[0]
        Dataframe['productSmiles_2'] = Smiles_list[1]
    
    else:
        Dataframe.loc[df['productSmiles'] == productSmiles, 'productSmiles'] = Smiles_list[0]
        Dataframe['productSmiles_2'] = np.NaN


    return Dataframe


NameError: name 'pd' is not defined

In [57]:
df_test

TypeError: clean_Smiles() missing 1 required positional argument: 'Dataframe'

In [132]:
df_can = canonicalize_smiles(df_no_salt, 'productSmiles')

[20:09:45] SMILES Parse Error: syntax error while parsing: COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',
[20:09:45] SMILES Parse Error: Failed parsing SMILES 'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC',' for input: 'COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC','
[20:09:45] SMILES Parse Error: syntax error while parsing: N1=CC(=CC=C1)C1=NC=C(C=N1)C1=CN=C(N1)C=1C=NC=CC1',
[20:09:45] SMILES Parse Error: Failed parsing SMILES 'N1=CC(=CC=C1)C1=NC=C(C=N1)C1=CN=C(N1)C=1C=NC=CC1',' for input: 'N1=CC(=CC=C1)C1=NC=C(C=N1)C1=CN=C(N1)C=1C=NC=CC1','
[20:09:45] SMILES Parse Error: syntax error while parsing: C(C1=CC=CC=C1)N1C(=NC=C1C1=C(N=C(S1)C1=CC=CC=C1)C)C=1C=NC=CC1',
[20:09:45] SMILES Parse Error: Failed parsing SMILES 'C(C1=CC=CC=C1)N1C(=NC=C1C1=C(N=C(S1)C1=CC=CC=C1)C)C=1C=NC=CC1',' for input: 'C(C1=CC=CC=C1)N1C(=NC=C1C1=C(N=C(S1)C1=CC=CC=C1)C)C=1C=NC=CC1','
[20:09:45] SMILES Parse Error: syntax error while parsing: CN1N=C(N=N1)C=1NC2=CC=CC=C2C1C1=CC=C(C=O)C=C1',
[20:09:45] SMILES Parse Error: Failed par

Could not canonicalize SMILES for product at index 272, value is COC1=CC=C(C=C1)[C@@H]1C[C@H](C1)C(=O)OC', 'COC1=CC=C(C=C1)[C@H]1C[C@H](C1)C(=O)OC
Could not canonicalize SMILES for product at index 393, value is N1=CC(=CC=C1)C1=NC=C(C=N1)C1=CN=C(N1)C=1C=NC=CC1', 'C(C1=CC=CC=C1)N1C(=NC=C1)C=1C=NC=CC1
Could not canonicalize SMILES for product at index 406, value is C(C1=CC=CC=C1)N1C(=NC=C1C1=C(N=C(S1)C1=CC=CC=C1)C)C=1C=NC=CC1', 'C(C1=CC=CC=C1)N1C(=NC=C1)C=1C=NC=CC1
Could not canonicalize SMILES for product at index 745, value is CN1N=C(N=N1)C=1NC2=CC=CC=C2C1C1=CC=C(C=O)C=C1', 'CN1NNN=C1C=1NC2=CC=CC=C2C1C1=CC=C(C=O)C=C1
Could not canonicalize SMILES for product at index 1257, value is [Si](C)(C)(C(C)(C)C)O[C@H]1C[C@H](C[C@H]([C@@H]1O[Si](C)(C)C(C)(C)C)C)C1=C(C=NC=C1)N', '[Si](C)(C)(C(C)(C)C)O[C@@H]1C[C@@H](C[C@@H]([C@H]1O[Si](C)(C)C(C)(C)C)C)C1=C(C=NC=C1)N
Could not canonicalize SMILES for product at index 1260, value is C(#CCCCCCCC)C=1C(=NNC1)C=1C=NC=CC1', 'C1(=CC=CC=C1)S(=O)(=O)N1N=C(C(

[20:09:45] SMILES Parse Error: syntax error while parsing: C(C)(=O)O[C@@H](C=O)[C@@H](OC(C)=O)[C@H](OC(C)=O)[C@H](OC(C)=O)COC(C)=O',
[20:09:45] SMILES Parse Error: Failed parsing SMILES 'C(C)(=O)O[C@@H](C=O)[C@@H](OC(C)=O)[C@H](OC(C)=O)[C@H](OC(C)=O)COC(C)=O',' for input: 'C(C)(=O)O[C@@H](C=O)[C@@H](OC(C)=O)[C@H](OC(C)=O)[C@H](OC(C)=O)COC(C)=O','
[20:09:45] SMILES Parse Error: syntax error while parsing: C[C@]12CC[C@H]3[C@H]([C@@H]1[C@H]([C@H]([C@@H]2O)O)O)CCC4=C3C=CC(=C4)O',
[20:09:45] SMILES Parse Error: Failed parsing SMILES 'C[C@]12CC[C@H]3[C@H]([C@@H]1[C@H]([C@H]([C@@H]2O)O)O)CCC4=C3C=CC(=C4)O',' for input: 'C[C@]12CC[C@H]3[C@H]([C@@H]1[C@H]([C@H]([C@@H]2O)O)O)CCC4=C3C=CC(=C4)O','
[20:09:45] SMILES Parse Error: syntax error while parsing: BrC1=CC=C(C(=N1)C1(NC1)C)F',
[20:09:45] SMILES Parse Error: Failed parsing SMILES 'BrC1=CC=C(C(=N1)C1(NC1)C)F',' for input: 'BrC1=CC=C(C(=N1)C1(NC1)C)F','
[20:09:45] SMILES Parse Error: syntax error while parsing: ClC1=NC=CC(=N1)N1CCN(CC1)C=1C(=C

Could not canonicalize SMILES for product at index 1728, value is C(C)(=O)O[C@@H](C=O)[C@@H](OC(C)=O)[C@H](OC(C)=O)[C@H](OC(C)=O)COC(C)=O', '[Br-]
Could not canonicalize SMILES for product at index 1805, value is C[C@]12CC[C@H]3[C@H]([C@@H]1[C@H]([C@H]([C@@H]2O)O)O)CCC4=C3C=CC(=C4)O', 'C[C@]12CC[C@@H]3C=4C=CC(=CC4CC[C@H]3[C@@H]1CCC2=O)O
Could not canonicalize SMILES for product at index 2288, value is BrC1=CC=C(C(=N1)C1(NC1)C)F', 'C=1C=CC(=CC1)P(=O)(C=2C=CC=CC2)C=3C=CC=CC3
Could not canonicalize SMILES for product at index 2714, value is ClC1=NC=CC(=N1)N1CCN(CC1)C=1C(=C(C2=C(CC(O2)(C)C)C1C)C)C', 'ClC1=NC(=NC=C1)N1CCN(CC1)C=1C(=C(C2=C(CC(O2)(C)C)C1C)C)C
Could not canonicalize SMILES for product at index 2976, value is C(C=CCC)(=O)O', 'COC=1C(=C(C=CC1)C=CCCC(=O)O)[N+](=O)[O-]
Could not canonicalize SMILES for product at index 3004, value is [Si](C)(C)(C(C)(C)C)O[C@H]1C[C@H](C[C@H]([C@@H]1O[Si](C)(C)C(C)(C)C)C)C1=C(C=NC=C1)N', '[Si](C)(C)(C(C)(C)C)O[C@@H]1C[C@@H](C[C@@H]([C@H]1O[Si](C)(C)

[20:09:45] SMILES Parse Error: syntax error while parsing: C(C)(=O)C1=C(C=C(C=C1)Br)OS(=O)(=O)C',
[20:09:45] SMILES Parse Error: Failed parsing SMILES 'C(C)(=O)C1=C(C=C(C=C1)Br)OS(=O)(=O)C',' for input: 'C(C)(=O)C1=C(C=C(C=C1)Br)OS(=O)(=O)C','
[20:09:46] SMILES Parse Error: syntax error while parsing: C(C)(=O)C1=C(C=C(C=C1)Br)OS(=O)(=O)C',
[20:09:46] SMILES Parse Error: Failed parsing SMILES 'C(C)(=O)C1=C(C=C(C=C1)Br)OS(=O)(=O)C',' for input: 'C(C)(=O)C1=C(C=C(C=C1)Br)OS(=O)(=O)C','
[20:09:46] SMILES Parse Error: syntax error while parsing: C12C(C3CC(CC(C1)C3)C2)N2C(N(C(C2)C(C)C)CC(C)C)=O',
[20:09:46] SMILES Parse Error: Failed parsing SMILES 'C12C(C3CC(CC(C1)C3)C2)N2C(N(C(C2)C(C)C)CC(C)C)=O',' for input: 'C12C(C3CC(CC(C1)C3)C2)N2C(N(C(C2)C(C)C)CC(C)C)=O','


Could not canonicalize SMILES for product at index 3645, value is C(C)(=O)C1=C(C=C(C=C1)Br)OS(=O)(=O)C', 'C(C)(C)OC(C)C
Could not canonicalize SMILES for product at index 4406, value is C(C)(=O)C1=C(C=C(C=C1)Br)OS(=O)(=O)C', 'C(C)(C)OC(C)C
Could not canonicalize SMILES for product at index 4640, value is C12C(C3CC(CC(C1)C3)C2)N2C(N(C(C2)C(C)C)CC(C)C)=O', 'C12C(C3CC(CC(C1)C3)C2)N2C(N(C(C2)C(C)C)CC2CC2)=O


[20:09:46] SMILES Parse Error: syntax error while parsing: NO',
[20:09:46] SMILES Parse Error: Failed parsing SMILES 'NO',' for input: 'NO','
[20:09:46] SMILES Parse Error: syntax error while parsing: ClC1=C(C=C(C=C1)O)C1=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C=C1Cl',
[20:09:46] SMILES Parse Error: Failed parsing SMILES 'ClC1=C(C=C(C=C1)O)C1=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C=C1Cl',' for input: 'ClC1=C(C=C(C=C1)O)C1=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C=C1Cl','
[20:09:46] SMILES Parse Error: syntax error while parsing: BrC=1C=C(C(=NC1)I)OCOCCOC',
[20:09:46] SMILES Parse Error: Failed parsing SMILES 'BrC=1C=C(C(=NC1)I)OCOCCOC',' for input: 'BrC=1C=C(C(=NC1)I)OCOCCOC','
[20:09:46] SMILES Parse Error: syntax error while parsing: BrC1=CC=C(C[C@@H]2CS(C[C@@H]3N(C(O[C@H]23)=O)C2(CC2)C2=CC(=CC=C2)C(C)(C)C)(=O)=O)C=C1',
[20:09:46] SMILES Parse Error: Failed parsing SMILES 'BrC1=CC=C(C[C@@H]2CS(C[C@@H]3N(C(O[C@H]23)=O)C2(CC2)C2=CC(=CC=C2)C(C)(C)C)(=O)=O)C=C1',' for input: 'BrC1=CC=

Could not canonicalize SMILES for product at index 6974, value is NO', 'ClC=1C=C2C(=C(N(C2=CC1Cl)[C@H]1[C@H](O)[C@H](O)[C@H](O1)CO)Br)C(N)=NO
Could not canonicalize SMILES for product at index 7120, value is ClC1=C(C=C(C=C1)O)C1=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C=C1Cl', 'ClC=1C(=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C1)C1=C(C=CC(=C1)OC)Cl
Could not canonicalize SMILES for product at index 7128, value is BrC=1C=C(C(=NC1)I)OCOCCOC', 'BrC=1C=C(C(=NC1)Cl)OCOCCOC
Could not canonicalize SMILES for product at index 7498, value is BrC1=CC=C(C[C@@H]2CS(C[C@@H]3N(C(O[C@H]23)=O)C2(CC2)C2=CC(=CC=C2)C(C)(C)C)(=O)=O)C=C1', 'N
Could not canonicalize SMILES for product at index 7540, value is COCCCN1N=C(C2=CC=C(C=C12)\\C=C(\\CO)/C(C)C)C', 'N
Could not canonicalize SMILES for product at index 7763, value is C(#CCCCCCCC)C=1C(=NNC1)C=1C=NC=CC1', 'C1(=CC=CC=C1)S(=O)(=O)N1N=C(C(=C1)C#CCCCC1=CC=CC=C1)C=1C=NC=CC1
Could not canonicalize SMILES for product at index 7965, value is C(C1=CC=CC=C1)=O', 'C

[20:09:46] SMILES Parse Error: syntax error while parsing: C1(=CC=CC=C1)C1=NC=C(C=N1)C1=CN=C(N1)C=1C=NC=CC1',
[20:09:46] SMILES Parse Error: Failed parsing SMILES 'C1(=CC=CC=C1)C1=NC=C(C=N1)C1=CN=C(N1)C=1C=NC=CC1',' for input: 'C1(=CC=CC=C1)C1=NC=C(C=N1)C1=CN=C(N1)C=1C=NC=CC1','
[20:09:46] SMILES Parse Error: syntax error while parsing: N1=CC(=CC=C1)C1=NC=C(C=N1)C1=CN=C(N1)C=1C=NC=CC1',
[20:09:46] SMILES Parse Error: Failed parsing SMILES 'N1=CC(=CC=C1)C1=NC=C(C=N1)C1=CN=C(N1)C=1C=NC=CC1',' for input: 'N1=CC(=CC=C1)C1=NC=C(C=N1)C1=CN=C(N1)C=1C=NC=CC1','
[20:09:46] SMILES Parse Error: syntax error while parsing: C(C1=CC=CC=C1)N1C(=NC=C1C1=C(N=C(S1)C1=CC=CC=C1)C)C=1C=NC=CC1',
[20:09:46] SMILES Parse Error: Failed parsing SMILES 'C(C1=CC=CC=C1)N1C(=NC=C1C1=C(N=C(S1)C1=CC=CC=C1)C)C=1C=NC=CC1',' for input: 'C(C1=CC=CC=C1)N1C(=NC=C1C1=C(N=C(S1)C1=CC=CC=C1)C)C=1C=NC=CC1','
[20:09:46] SMILES Parse Error: syntax error while parsing: BrC1=NC(=CC=C1)CN1N=NC=C1',
[20:09:46] SMILES Parse Error: Fai

Could not canonicalize SMILES for product at index 8538, value is C1(=CC=CC=C1)C1=NC=C(C=N1)C1=CN=C(N1)C=1C=NC=CC1', 'C(C1=CC=CC=C1)N1C(=NC=C1)C=1C=NC=CC1
Could not canonicalize SMILES for product at index 8540, value is N1=CC(=CC=C1)C1=NC=C(C=N1)C1=CN=C(N1)C=1C=NC=CC1', 'C(C1=CC=CC=C1)N1C(=NC=C1)C=1C=NC=CC1
Could not canonicalize SMILES for product at index 8557, value is C(C1=CC=CC=C1)N1C(=NC=C1C1=C(N=C(S1)C1=CC=CC=C1)C)C=1C=NC=CC1', 'C(C1=CC=CC=C1)N1C(=NC=C1)C=1C=NC=CC1
Could not canonicalize SMILES for product at index 8603, value is BrC1=NC(=CC=C1)CN1N=NC=C1', 'BrC1=NC(=CC=C1)CN1N=CC=N1
Could not canonicalize SMILES for product at index 8612, value is COC1=CC=C(CN2N=C(C=3C2=NC=CC3OC3=C(C=C(C=C3)N(C(=O)C3(CC3)C(=O)N)C3=CC=C(C=C3)F)F)C=3OC(=CC3Br)CN3CCN(CC3)C)C=C1', 'COC1=CC=C(CN2N=C(C=3C2=NC=CC3OC3=C(C=C(C=C3)N(C(=O)C3(CC3)C(=O)N)C3=CC=C(C=C3)F)F)C3=COC(=C3)CN3CCN(CC3)C)C=C1
Could not canonicalize SMILES for product at index 8644, value is NC(C#N)(CN1N=C2C(N=C(C=C2)Br)=C1)C', 'BrC=

[20:09:46] SMILES Parse Error: syntax error while parsing: ClC1=C(C=C(C=C1)O)C1=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C=C1Cl',
[20:09:46] SMILES Parse Error: Failed parsing SMILES 'ClC1=C(C=C(C=C1)O)C1=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C=C1Cl',' for input: 'ClC1=C(C=C(C=C1)O)C1=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C=C1Cl','
[20:09:47] SMILES Parse Error: syntax error while parsing: CC=1NC=C(C1CC1=C(C=CC=C1)S(=O)(=O)N1CCCC1)C',
[20:09:47] SMILES Parse Error: Failed parsing SMILES 'CC=1NC=C(C1CC1=C(C=CC=C1)S(=O)(=O)N1CCCC1)C',' for input: 'CC=1NC=C(C1CC1=C(C=CC=C1)S(=O)(=O)N1CCCC1)C','
[20:09:47] SMILES Parse Error: syntax error while parsing: ClC1=C(C=C(C=C1)O)C1=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C=C1Cl',
[20:09:47] SMILES Parse Error: Failed parsing SMILES 'ClC1=C(C=C(C=C1)O)C1=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C=C1Cl',' for input: 'ClC1=C(C=C(C=C1)O)C1=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C=C1Cl','
[20:09:47] SMILES Parse Error: syntax error while parsing: C(

Could not canonicalize SMILES for product at index 10181, value is ClC1=C(C=C(C=C1)O)C1=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C=C1Cl', 'ClC=1C(=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C1)C1=C(C=CC(=C1)OC)Cl
Could not canonicalize SMILES for product at index 10343, value is CC=1NC=C(C1CC1=C(C=CC=C1)S(=O)(=O)N1CCCC1)C', 'CC1=C(NC(=C1)C)CC1=C(C=CC=C1)S(=O)(=O)N1CCCC1
Could not canonicalize SMILES for product at index 10641, value is ClC1=C(C=C(C=C1)O)C1=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C=C1Cl', 'ClC=1C(=CC2=C(N=C(N=N2)NC2=CC=C(C=C2)OCCN2CCCC2)C1)C1=C(C=CC(=C1)OC)Cl
Could not canonicalize SMILES for product at index 10770, value is C(C1=CC=CC=C1)C=1C(=NC=C(N1)C1=CC=C(C=C1)O)NS(=O)(=O)C1=CC=C(C=C1)C', 'CC1=CC=C(C=C1)S(=O)(=O)N
Could not canonicalize SMILES for product at index 11035, value is C(C1=CC=CC=C1)OC=1C(=NC(=NC1C)C[C@@H]1OC[C@H](CO1)C1=CC=C(C=C1)F)C(=O)NCC(=O)OCC', 'C(C1=CC=CC=C1)OC=1C(=NC(=NC1C)C[C@@H]1OC[C@@H](CO1)C1=CC=C(C=C1)F)C(=O)NCC(=O)OCC
Could not canonicalize SMI

[20:09:47] SMILES Parse Error: syntax error while parsing: C1C=CC2=CC=CC=C12',
[20:09:47] SMILES Parse Error: Failed parsing SMILES 'C1C=CC2=CC=CC=C12',' for input: 'C1C=CC2=CC=CC=C12','
[20:09:47] SMILES Parse Error: syntax error while parsing: FC(S(=O)(=O)N=[N+]=[N-])(F)F',
[20:09:47] SMILES Parse Error: Failed parsing SMILES 'FC(S(=O)(=O)N=[N+]=[N-])(F)F',' for input: 'FC(S(=O)(=O)N=[N+]=[N-])(F)F','
[20:09:47] SMILES Parse Error: syntax error while parsing: [Si](C)(C)(C(C)(C)C)O[C@H]([C@H](C)NC(OCC1=CC=CC=C1)=O)C#N',
[20:09:47] SMILES Parse Error: Failed parsing SMILES '[Si](C)(C)(C(C)(C)C)O[C@H]([C@H](C)NC(OCC1=CC=CC=C1)=O)C#N',' for input: '[Si](C)(C)(C(C)(C)C)O[C@H]([C@H](C)NC(OCC1=CC=CC=C1)=O)C#N','
[20:09:47] SMILES Parse Error: syntax error while parsing: C(C)(C)(C)OC(=O)NCC=1C=C(C=CC1)C1=CC(=CC=C1)COC1=C(C=CC(=C1)CC1CC1)CC(=O)OC',
[20:09:47] SMILES Parse Error: Failed parsing SMILES 'C(C)(C)(C)OC(=O)NCC=1C=C(C=CC1)C1=CC(=CC=C1)COC1=C(C=CC(=C1)CC1CC1)CC(=O)OC',' for input: 'C

Could not canonicalize SMILES for product at index 12329, value is C1C=CC2=CC=CC=C12', 'CC1(OB(OC1(C)C)C1=CC=C(C=C1)NC(=O)NC1=CC(=CC=C1)C(F)(F)F)C
Could not canonicalize SMILES for product at index 12376, value is FC(S(=O)(=O)N=[N+]=[N-])(F)F', 'N(=[N+]=[N-])CCC1=C(NC2=CC=C(C=C12)Cl)C(=O)NCCC1=CC=C(C=C1)N1CCCCC1
Could not canonicalize SMILES for product at index 12494, value is [Si](C)(C)(C(C)(C)C)O[C@H]([C@H](C)NC(OCC1=CC=CC=C1)=O)C#N', '[Si](C)(C)(C(C)(C)C)O[C@@H]([C@H](C)NC(OCC1=CC=CC=C1)=O)C#N
Could not canonicalize SMILES for product at index 12915, value is C(C)(C)(C)OC(=O)NCC=1C=C(C=CC1)C1=CC(=CC=C1)COC1=C(C=CC(=C1)CC1CC1)CC(=O)OC', 'C(CC=C)C1=CC(=C(C=C1)CC(=O)OC)OCC=1C=C(C=CC1)C1=CC(=CC=C1)CNC(=O)OC(C)(C)C
Could not canonicalize SMILES for product at index 12947, value is CC1=NC2=CC=C(C=C2C1(C)C)S(=O)(=O)O', 'CCOCC
Could not canonicalize SMILES for product at index 13050, value is C(CC(O)(C(=O)[O-])CC(=O)[O-])(=O)[O-]', '[Zn]
Could not canonicalize SMILES for product at index 1

[20:09:47] SMILES Parse Error: syntax error while parsing: FC(C=1C=C(C=C(C1)C(F)(F)F)C(=O)N1C[C@@H]2N(C[C@H]1CC1=CNC3=CC=CC=C13)C[C@@H](CC2)N2CCOCC2)(F)F',
[20:09:47] SMILES Parse Error: Failed parsing SMILES 'FC(C=1C=C(C=C(C1)C(F)(F)F)C(=O)N1C[C@@H]2N(C[C@H]1CC1=CNC3=CC=CC=C13)C[C@@H](CC2)N2CCOCC2)(F)F',' for input: 'FC(C=1C=C(C=C(C1)C(F)(F)F)C(=O)N1C[C@@H]2N(C[C@H]1CC1=CNC3=CC=CC=C13)C[C@@H](CC2)N2CCOCC2)(F)F','
[20:09:47] SMILES Parse Error: syntax error while parsing: FC(C=1C=C(C=C(C1)C(F)(F)F)C(=O)N1C[C@@H]2N(C[C@H]1CC1=CC=CC=C1)C[C@@H](CC2)N2CCOCC2)(F)F',
[20:09:47] SMILES Parse Error: Failed parsing SMILES 'FC(C=1C=C(C=C(C1)C(F)(F)F)C(=O)N1C[C@@H]2N(C[C@H]1CC1=CC=CC=C1)C[C@@H](CC2)N2CCOCC2)(F)F',' for input: 'FC(C=1C=C(C=C(C1)C(F)(F)F)C(=O)N1C[C@@H]2N(C[C@H]1CC1=CC=CC=C1)C[C@@H](CC2)N2CCOCC2)(F)F','
[20:09:47] SMILES Parse Error: syntax error while parsing: C(C)(C)OC1=NC(=C(C(=N1)S(=O)(=O)C)C1=CC=C(C=C1)Cl)C1=C(C=C(C=C1)Cl)Cl',
[20:09:47] SMILES Parse Error: Failed parsing SMILE

Could not canonicalize SMILES for product at index 13863, value is FC(C=1C=C(C=C(C1)C(F)(F)F)C(=O)N1C[C@@H]2N(C[C@H]1CC1=CNC3=CC=CC=C13)C[C@@H](CC2)N2CCOCC2)(F)F', 'FC(C=1C=C(C=C(C1)C(F)(F)F)C(=O)N1C[C@@H]2N(C[C@H]1CC1=CNC3=CC=CC=C13)C[C@H](CC2)N2CCOCC2)(F)F
Could not canonicalize SMILES for product at index 13865, value is FC(C=1C=C(C=C(C1)C(F)(F)F)C(=O)N1C[C@@H]2N(C[C@H]1CC1=CC=CC=C1)C[C@@H](CC2)N2CCOCC2)(F)F', 'FC(C=1C=C(C=C(C1)C(F)(F)F)C(=O)N1C[C@@H]2N(C[C@H]1CC1=CC=CC=C1)C[C@H](CC2)N2CCOCC2)(F)F
Could not canonicalize SMILES for product at index 14148, value is C(C)(C)OC1=NC(=C(C(=N1)S(=O)(=O)C)C1=CC=C(C=C1)Cl)C1=C(C=C(C=C1)Cl)Cl', 'C(C)(C)OC1=NC=NC(=C1C1=CC=C(C=C1)Cl)C1=C(C=C(C=C1)Cl)Cl
Could not canonicalize SMILES for product at index 14186, value is COC1=CC2=C(C3=CC4=CC=C(C=C4C(=C3CC2)O)OC)C=C1', 'ClC=1C2=CC=C(C=C2C(=C2CCC3=C(C12)C=CC(=C3)OC)O)OC
Could not canonicalize SMILES for product at index 14728, value is C(C)OC(=O)C1=CC=NN1C1=CC=C(C=C1)OC1=CC=CC=C1', 'C(C)OC(=O)C1=NN(C

[20:09:47] SMILES Parse Error: syntax error while parsing: CN1C(=C(C2=CC=CC=C12)CC(C)C)C(=O)N[C@@H](C(C)C)C(=O)NC(CC(=O)OC(C)(C)C)C(COC1=C(C(=CC(=C1F)F)F)F)=O',
[20:09:47] SMILES Parse Error: Failed parsing SMILES 'CN1C(=C(C2=CC=CC=C12)CC(C)C)C(=O)N[C@@H](C(C)C)C(=O)NC(CC(=O)OC(C)(C)C)C(COC1=C(C(=CC(=C1F)F)F)F)=O',' for input: 'CN1C(=C(C2=CC=CC=C12)CC(C)C)C(=O)N[C@@H](C(C)C)C(=O)NC(CC(=O)OC(C)(C)C)C(COC1=C(C(=CC(=C1F)F)F)F)=O','
[20:09:47] SMILES Parse Error: syntax error while parsing: C(C1=CC=2OCOC2C=C1)N',
[20:09:47] SMILES Parse Error: Failed parsing SMILES 'C(C1=CC=2OCOC2C=C1)N',' for input: 'C(C1=CC=2OCOC2C=C1)N','
[20:09:47] SMILES Parse Error: syntax error while parsing: COC1=CC2=C(C3=CC4=CC=C(C=C4C(=C3CC2)O)OC)C=C1',
[20:09:47] SMILES Parse Error: Failed parsing SMILES 'COC1=CC2=C(C3=CC4=CC=C(C=C4C(=C3CC2)O)OC)C=C1',' for input: 'COC1=CC2=C(C3=CC4=CC=C(C=C4C(=C3CC2)O)OC)C=C1','
[20:09:47] SMILES Parse Error: syntax error while parsing: N1(N=NN=C1)C=1C(=NC=CC1)C#N',
[20:09:47] 