In [1]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Descriptors, rdMolDescriptors
from rdkit.Chem import Draw

from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem.rdchem import Mol

import matplotlib.pyplot as plt
from PIL import Image, ImageDraw

import pandas as pd

In [2]:
def predict_products(substrate_smiles, smirks_pattern):
    try:
        # Convert the substrate SMILES to a molecule object
        substrate_mol = Chem.MolFromSmiles(substrate_smiles)
        
        # Check if conversion was successful; if not, return an empty list
        if substrate_mol is None:
            return []
        
        # Create a reaction object from the SMIRKS pattern
        reaction = AllChem.ReactionFromSmarts(smirks_pattern)
        
        # Apply the reaction to the substrate molecule
        products_sets = reaction.RunReactants((substrate_mol,))
        
        # If no products are generated, return an empty list
        if not products_sets:
            return []
        
        # Initialize a set to store unique product SMILES
        unique_products_smiles = set()
        
        # Iterate through the product sets and convert each product to a SMILES string
        for product_set in products_sets:
            for product in product_set:
                # Canonicalize the SMILES and add to the set to ensure uniqueness
                product_smiles = Chem.MolToSmiles(product, isomericSmiles=True)
                unique_products_smiles.add(product_smiles)
        
        # If no unique products were added, return an empty list
        if not unique_products_smiles:
            return []
        
        # Convert the set to a list and return
        return list(unique_products_smiles)
    
    except Exception as e:
        # If any error occurs, return an empty list
        return []

In [3]:
def filter_by_mass_difference(product_smiles, substrate_smiles, mass_difference):
    """
    Filters a list of product molecules based on their monoisotopic mass difference relative to a substrate molecule, 
    both represented as SMILES strings. 
    Products are filtered to either those with the minimum positive mass difference (mass_difference = '+') 
    or those with the maximum negative mass difference (mass_difference = '-') from the substrate. 
    Invalid SMILES are ignored.

    Args:
    product_smiles (list of str): List of SMILES strings for product molecules.
    substrate_smiles (str): SMILES string for the substrate molecule.
    mass_difference (str): Specifies the direction of mass difference to filter by ('+' for positive, '-' for negative).

    Returns:
    list of str: SMILES strings of products matching the specified mass difference criteria.

    Raises:
    ValueError: If substrate SMILES is invalid or mass_difference is not '+' or '-'"
    """
    try:
        # Convert substrate SMILES to molecule and calculate its monoisotopic mass
        substrate_mol = Chem.MolFromSmiles(substrate_smiles)
        if substrate_mol is None:
            raise ValueError("Invalid substrate SMILES.")
        
        substrate_mass = Descriptors.ExactMolWt(substrate_mol)
        
        # Initialize a list to hold products and their masses
        product_masses = []
        
        # Iterate through the product SMILES
        for smi in product_smiles:
            # Convert product SMILES to molecule and calculate its monoisotopic mass
            product_mol = Chem.MolFromSmiles(smi)
            if product_mol is None:
                continue  # Skip invalid product SMILES
                
            product_mass = Descriptors.ExactMolWt(product_mol)
            mass_diff = product_mass - substrate_mass
            
            # Store product masses and SMILES
            if (mass_difference == '+' and mass_diff > 0) or (mass_difference == '-' and mass_diff < 0):
                product_masses.append((mass_diff, smi))
        
        # Filter based on mass_difference criteria
        if mass_difference == '+':
            # Find the minimum positive mass difference
            min_mass_diff = min([x[0] for x in product_masses] or [0])
            # Filter products with the minimum mass difference
            matching_products = [smi for diff, smi in product_masses if diff == min_mass_diff]
        elif mass_difference == '-':
            # Find the maximum negative mass difference
            max_mass_diff = max([x[0] for x in product_masses] or [0])
            # Filter products with the maximum mass difference
            matching_products = [smi for diff, smi in product_masses if diff == max_mass_diff]
        else:
            # If mass_difference is not '+' or '-', return an empty list (or handle as needed)
            raise ValueError("Invalid mass_difference value. Use '+' or '-'.")
        
        return matching_products
    
    except Exception as e:
        # Handle exceptions, possibly logging them or returning an error message
        print(f"An error occurred: {e}")  # Optionally log the error message
        return []

In [4]:
def find_sub_structure(smiles, smarts):
    """
    This function finds and highlights the substructure in a molecule defined by a SMARTS pattern.

    Parameters:
    - smiles (str): The SMILES representation of the molecule.
    - smarts (str): The SMARTS pattern to search for within the molecule.

    Returns:
    - An image of the molecule with the matching substructure highlighted, if any matches are found.
    """
    # Convert the SMILES string to an RDKit molecule object
    mol = Chem.MolFromSmiles(smiles)
    if not mol:
        return "Invalid SMILES string."

    # Convert the SMARTS string to an RDKit molecule object
    query = Chem.MolFromSmarts(smarts)
    if not query:
        return "Invalid SMARTS pattern."

    # Find the atoms in the molecule that match the SMARTS pattern
    matches = mol.GetSubstructMatches(query, uniquify=False)

    print(matches)
    img = Draw.MolToImage(mol, highlightAtoms=sum(matches, ()), subImgSize=(500, 500))
    return img

In [5]:
def smarts_to_formula(smarts):
    # Convert the SMARTS string to a molecule object
    molecule = Chem.MolFromSmarts(smarts)
    if molecule is None:
        return "Invalid SMARTS string"
    try:
        # Attempt to sanitize the molecule to ensure properties can be calculated
        Chem.SanitizeMol(molecule)
        # Calculate the molecular formula
        formula = rdMolDescriptors.CalcMolFormula(molecule)
        return formula + ' (ignore H)'
    except Exception as e:
        return f"Error processing molecule: {str(e)}"

In [6]:
# Plot a pipe: substract -> reaction group -> product
# usage: plot_reaction_scheme(substrate, product, smarts)

# def find_sub_structure(substrate, smarts):
#     try:
#         substructure = substrate.GetSubstructMatch(Chem.MolFromSmarts(smarts))
#         if substructure:
#             img = Draw.MolToImage(substrate, highlightAtoms=substructure, size=(300, 300))
#             return img
#         else:
#             raise ValueError("Substructure matching failed.")
#     except Exception as e:
#         # If anything goes wrong, return a placeholder image with "Missing"
#         img = Image.new('RGB', (300, 300), color = (255, 255, 255))
#         d = ImageDraw.Draw(img)
#         d.text((10,10), "Missing", fill=(0,0,0))
#         return img

def plot_reaction_scheme(substrate_smiles, product_smiles, smarts_pattern):
    try:
        # Create RDKit molecule objects
        mol_substrate = Chem.MolFromSmiles(substrate_smiles)
        mol_product = Chem.MolFromSmiles(product_smiles)

        # Generate images for the substrate and product
        substrate_img = Draw.MolToImage(mol_substrate, size=(300, 300))
        product_img = Draw.MolToImage(mol_product, size=(300, 300))

        # Find and generate the sub-structure image
        sub_structure_img = find_sub_structure(mol_substrate, smarts_pattern)
    except Exception as e:
        print(f"Error processing molecules: {e}")
        return

    # Create a figure and axes with 1 row and 5 columns
    fig, ax = plt.subplots(1, 5, figsize=(15, 3))

    # Plotting substrate
    try:
        ax[0].imshow(substrate_img)
    except Exception:
        ax[0].text(0.5, 0.5, 'Missing', horizontalalignment='center', verticalalignment='center')
    ax[0].axis("off")

    # First arrow
    ax[1].text(0.5, 0.5, '→', horizontalalignment='center', verticalalignment='center', fontsize=30, color="darkgrey")
    ax[1].axis("off")

    # Plotting sub-structure
    try:
        ax[2].imshow(sub_structure_img)
    except Exception:
        ax[2].text(0.5, 0.5, 'Missing', horizontalalignment='center', verticalalignment='center')
    ax[2].axis("off")

    # Second arrow
    ax[3].text(0.5, 0.5, '→', horizontalalignment='center', verticalalignment='center', fontsize=30, color="darkgrey")
    ax[3].axis("off")

    # Plotting product
    try:
        ax[4].imshow(product_img)
    except Exception:
        ax[4].text(0.5, 0.5, 'Missing', horizontalalignment='center', verticalalignment='center')
    ax[4].axis("off")

    plt.tight_layout()
    plt.show()

In [7]:
import matplotlib.pyplot as plt
from rdkit import Chem
from rdkit.Chem import Draw
from PIL import Image, ImageDraw
import io

def find_sub_structure(smiles, smarts):
    """
    Finds and highlights the substructure in a molecule defined by a SMARTS pattern.
    """
    mol = Chem.MolFromSmiles(smiles)
    if not mol:
        return "Invalid SMILES string."

    query = Chem.MolFromSmarts(smarts)
    if not query:
        return "Invalid SMARTS pattern."

    matches = mol.GetSubstructMatches(query, uniquify=True)
    if not matches:
        return None  # Adjusted to return None for no matches

    img = Draw.MolToImage(mol, highlightAtoms=sum(matches, ()), size=(300, 300))
    return img

def plot_reaction_scheme(substrate_smiles, product_smiles, smarts_pattern):
    """
    Plots a reaction scheme showing the substrate, sub-structure, and product.
    """
    try:
        mol_substrate = Chem.MolFromSmiles(substrate_smiles)
        substrate_img = Draw.MolToImage(mol_substrate, size=(300, 300))
    except Exception as e:
        substrate_img = "Error"

    try:
        mol_product = Chem.MolFromSmiles(product_smiles)
        product_img = Draw.MolToImage(mol_product, size=(300, 300))
    except Exception as e:
        product_img = "Error"

    # Use the provided find_sub_structure function
    sub_structure_img = find_sub_structure(substrate_smiles, smarts_pattern)
    if isinstance(sub_structure_img, str):  # Check if the function returned an error message
        sub_structure_img = "Error"

    # Create a figure and axes with 1 row and 5 columns
    fig, ax = plt.subplots(1, 5, figsize=(15, 3))

    # Helper function to plot images or placeholders
    def plot_image_or_placeholder(ax, img, placeholder_text="Missing"):
        if img == "Error":
            ax.text(0.5, 0.5, placeholder_text, horizontalalignment='center', verticalalignment='center', fontsize=12)
            ax.axis("off")
        else:
            ax.imshow(img)
            ax.axis("off")

    # Plotting substrate
    plot_image_or_placeholder(ax[0], substrate_img)

    # First arrow
    ax[1].text(0.5, 0.5, '→', horizontalalignment='center', verticalalignment='center', fontsize=30, color="royalblue")
    ax[1].axis("off")

    # Plotting sub-structure
    plot_image_or_placeholder(ax[2], sub_structure_img, "No Match")

    # Second arrow
    ax[3].text(0.5, 0.5, '→', horizontalalignment='center', verticalalignment='center', fontsize=30, color="royalblue")
    ax[3].axis("off")

    # Plotting product
    plot_image_or_placeholder(ax[4], product_img)

    plt.tight_layout()
    plt.show()

In [8]:
def predict_products_group(substrate, subgroup, smirks):
    mol = Chem.MolFromSmiles(substrate)
    pattern = Chem.MolFromSmarts(subgroup)
    matches = mol.GetSubstructMatches(pattern)
    modified_molecules = []

    reaction = AllChem.ReactionFromSmarts(smirks)

    for match in matches:
        # Apply the reaction to the molecule. The reaction expects a list of reactants, but we have only one.
        products = reaction.RunReactants((mol,))

        # Assuming the reaction produces at least one product, take the first product.
        if products:
            product_mol = products[0][0]  # Take the first product
            modified_smiles = Chem.MolToSmiles(product_mol, isomericSmiles=True)
            modified_molecules.append(modified_smiles)

    return modified_molecules

In [9]:
def drop_group(substrate, subgroup, replacement_group=None):
    substrate_mol = Chem.MolFromSmiles(substrate)
    subgroup_mol = Chem.MolFromSmiles(subgroup)
    replacement_mol = Chem.MolFromSmiles(replacement_group) if replacement_group else None
    
    matches = substrate_mol.GetSubstructMatches(subgroup_mol, uniquify=True)
    modified_molecules = set()

    for match in matches:
        editable_mol = Chem.RWMol(Chem.Mol(substrate_mol))
        
        # Collect atoms to remove and check ring status
        atoms_to_remove = []
        ring_atoms_to_modify = []  # List to keep track of ring atoms
        for atom_idx in match:
            atom = editable_mol.GetAtomWithIdx(atom_idx)
            for neighbor in atom.GetNeighbors():
                if neighbor.GetIdx() not in match:
                    if neighbor.IsInRing():
                        ring_atoms_to_modify.append(neighbor.GetIdx())
                    break
            else:
                atoms_to_remove.append(atom_idx)
        
        # Remove specified atoms
        for atom_idx in sorted(atoms_to_remove, reverse=True):
            editable_mol.RemoveAtom(atom_idx)
    
        modified_molecule = editable_mol.GetMol()  # Finalize modifications
        
        modified_smiles = Chem.MolToSmiles(modified_molecule, isomericSmiles=True)
        modified_molecules.add(modified_smiles)
    
    if not modified_molecules:
        return [substrate]  # Return the original if no modifications were made
    
    return list(modified_molecules)

# test

In [10]:
rxn_rules_table = pd.read_csv('/Users/bowen/Desktop/MCID2.0/02_code/02_products_pred/rxs_info.csv', encoding='latin1')

In [11]:
def expand_reaction_rules(df):
    # Using str.split() to split the 'ReactionRule' entries into lists of rules
    df['ReactionRule'] = df['ReactionRule'].str.split(', ')
    # Exploding the lists into separate rows
    df_expanded = df.explode('ReactionRule')
    df_expanded['mass_difference'] = df_expanded['Reaction'].astype(str).str[1]
    df_expanded = df_expanded[df_expanded['ReactionRule'].str.len() >= 1]

    return df_expanded

# Applying the function to the dataframe
rxn_rules = expand_reaction_rules(rxn_rules_table)

In [12]:
substrate = 'CCCCN' 
smirks = '[*:1]-[NH2]>>[*:1](NC(=O)CCC(N)C(=O)O)'
mass_difference = '+'

In [13]:
all_products = predict_products(substrate, smirks)
products = filter_by_mass_difference(all_products, substrate, mass_difference)
print(products)

['CCCCNC(=O)CCC(N)C(=O)O']


# Function

In [14]:
def summarize_reaction_data(substrate, rxn_rules_df):
    """
    Generates a summary table of reactions based on given substrate and a DataFrame of reaction rules.

    :param substrate: The substrate molecule for the reactions.
    :param rxn_rules_df: A DataFrame containing the reaction rules and associated metadata.
    :return: A list of dictionaries, each representing the reaction data for reactions that produce valid products.
    """
    summary_table = []

    # Iterate over each row in the DataFrame
    for index, rule in rxn_rules_df.iterrows():
        smirks = rule['ReactionRule']
        mass_difference = rule['mass_difference']
        description = rule['Description']
        reaction_id = rule['ID']

        # Predict products using the SMIRKS pattern
        all_products = predict_products(substrate, smirks)

        # Filter products by the specified mass difference
        products = filter_by_mass_difference(all_products, substrate, mass_difference)

        if products:
            for product in products:
                summary_table.append({
                    'Substrate': substrate,
                    'Product': product,
                    'Reaction': description,
                    'Reaction rules': smirks,
                    'ReactionID': reaction_id
                })

    result_df = pd.DataFrame(summary_table)
    return result_df

In [15]:
def predict_products_group(substrate):
    subgroup='[#6]1[#6][#6][#6][#6][#6]1[OH]'
    smirks='[OH:1]>>[O:1]-S(=O)(=O)O'
    mol = Chem.MolFromSmiles(substrate)
    pattern = Chem.MolFromSmarts(subgroup)
    matches = mol.GetSubstructMatches(pattern)
    data = []

    reaction = AllChem.ReactionFromSmarts(smirks)

    for match in matches:
        products = reaction.RunReactants((mol,))
        if products:
            product_mol = products[0][0]
            modified_smiles = Chem.MolToSmiles(product_mol, isomericSmiles=True)
            data.append({
                'Substrate': substrate,
                'Product': modified_smiles,
                'Reaction': 'sulfate conjugation',
                'Reaction rules': smirks,
                'ReactionID': 'R26'
            })

    if not data:  # In case no reaction takes place
        data.append({
            'Substrate': substrate,
            'Product': substrate,
            'Reaction': 'sulfate conjugation',
            'Reaction rules': smirks,
            'ReactionID': 'R26'
        })

    return pd.DataFrame(data)

def drop_group(substrate, replacement_group=None):
    subgroup = 'C1C(C(OC1N2C=NC3=C(N=CN=C32)N)CO)O'
    substrate_mol = Chem.MolFromSmiles(substrate)
    subgroup_mol = Chem.MolFromSmiles(subgroup)
    replacement_mol = Chem.MolFromSmiles(replacement_group) if replacement_group else None
    
    matches = substrate_mol.GetSubstructMatches(subgroup_mol, uniquify=True)
    data = []

    for match in matches:
        editable_mol = Chem.RWMol(Chem.Mol(substrate_mol))
        atoms_to_remove = []

        for atom_idx in match:
            atom = editable_mol.GetAtomWithIdx(atom_idx)
            for neighbor in atom.GetNeighbors():
                if neighbor.GetIdx() not in match:
                    break
            else:
                atoms_to_remove.append(atom_idx)
        
        for atom_idx in sorted(atoms_to_remove, reverse=True):
            editable_mol.RemoveAtom(atom_idx)
    
        modified_molecule = editable_mol.GetMol()
        modified_smiles = Chem.MolToSmiles(modified_molecule, isomericSmiles=True)
        data.append({
            'Substrate': substrate,
            'Product': modified_smiles,
            'Reaction': 'loss of deoxyadenosine',
            'Reaction rules': subgroup,
            'ReactionID': 'R59'
        })

    if not data:
        data.append({
            'Substrate': substrate,
            'Product': substrate,
            'Reaction': 'loss of deoxyadenosine',
            'Reaction rules': subgroup,
            'ReactionID': 'R59'
        })

    return pd.DataFrame(data)

In [16]:
def pipeline_function(substrate, rxn_rules_df):
    # Step 1: Execute each function
    try:
        df1 = summarize_reaction_data(substrate, rxn_rules_df)
        df2 = predict_products_group(substrate)
        df3 = drop_group(substrate)
    except Exception as e:
        # Handle potential errors in function execution
        print(f"Error during processing: {e}")
        return None
    
    # Step 2: Concatenate all DataFrames
    result_df = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
    
    return result_df

# run test 

In [17]:
pipeline_function(substrate='C(C1C(C(C(C(O1)O)O)O)O)', rxn_rules_df=rxn_rules)

[18:53:06] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[18:53:06] SMARTS Parse Error: syntax error while parsing: *:1]C(=O)-O
[18:53:06] SMARTS Parse Error: Failed parsing SMARTS '*:1]C(=O)-O' for input: '*:1]C(=O)-O'
[18:53:06] product atom-mapping number 5 not found in reactants.
[18:53:06] product atom-mapping number 6 not found in reactants.
[18:53:06] product atom-mapping number 7 not found in reactants.
[18:53:06] product atom-mapping number 3 not found in reactants.
[18:53:06] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[18:53:06] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 4 
[18:53:06] SMARTS Parse Error: syntax error while parsing: [*:1][CH2]C(=[*:3])NC(C-S)C(O)=O'
[18:53:06] SMARTS Parse Error: Failed parsing SMARTS '[*:1][CH2]C(=[*:3])NC(C-S)C(O)=O'' for input: '[*:1][CH2]C(=[*:3])NC(C-S)C(O)=O''
[18:53:06] product atom-mapping number 5 not 

Unnamed: 0,Substrate,Product,Reaction,Reaction rules,ReactionID
0,C(C1C(C(C(C(O1)O)O)O)O),CC1OC(O)C([O-])C(O)C1O,dehydrogenation,[*:1][O]>>[*:1][O-],R01
1,C(C1C(C(C(C(O1)O)O)O)O),CC1OC([O-])C(O)C(O)C1O,dehydrogenation,[*:1][O]>>[*:1][O-],R01
2,C(C1C(C(C(C(O1)O)O)O)O),CC1OC(O)C(O)C([O-])C1O,dehydrogenation,[*:1][O]>>[*:1][O-],R01
3,C(C1C(C(C(C(O1)O)O)O)O),CC1OC(O)C(O)C(O)C1[O-],dehydrogenation,[*:1][O]>>[*:1][O-],R01
4,C(C1C(C(C(C(O1)O)O)O)O),OC=COC(O)C(O)CO,dehydrogenation,[*:1]-C-C-[*:2]>>[*:1]-C=C-[*:2],R01
5,C(C1C(C(C(C(O1)O)O)O)O),C=C1OC(O)C(O)C(O)C1O,dehydrogenation,[C:1]-C>>[C:1]=[CH2],R01
6,C(C1C(C(C(C(O1)O)O)O)O),CC1OC(O)C(O)C(C(=O)O)C1O,addition of CO,[*:1]-O>>[*:1]-C(=O)O,R14
7,C(C1C(C(C(C(O1)O)O)O)O),CC1OC(O)C(C(=O)O)C(O)C1O,addition of CO,[*:1]-O>>[*:1]-C(=O)O,R14
8,C(C1C(C(C(C(O1)O)O)O)O),CC1OC(C(=O)O)C(O)C(O)C1O,addition of CO,[*:1]-O>>[*:1]-C(=O)O,R14
9,C(C1C(C(C(C(O1)O)O)O)O),CC1OC(O)C(O)C(O)C1C(=O)O,addition of CO,[*:1]-O>>[*:1]-C(=O)O,R14


In [19]:
import pandas as pd

def process_substrate_df(substrate_df, rxn_rules_df):
    # Container for the resulting DataFrames
    results = []

    # Loop through each row in the substrate DataFrame
    for index, row in substrate_df.iterrows():
        substrate = row['SMILES']
        substrate_name = row['Name']

        # Call the pipeline function
        result_df = pipeline_function(substrate, rxn_rules_df)

        if result_df is not None:
            # Add the Substrate_Name column
            result_df['Substrate_Name'] = substrate_name
            results.append(result_df)

    # Concatenate all results into one DataFrame if there are any results
    if results:
        final_df = pd.concat(results, ignore_index=True)
        return final_df
    else:
        return pd.DataFrame()  # Return an empty DataFrame if no results

test = pd.read_csv('/Users/bowen/Desktop/MCID2.0/02_code/02_products_pred/pipeline/output.csv')
process_substrate_df(test, rxn_rules)

[18:53:14] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[18:53:14] SMARTS Parse Error: syntax error while parsing: *:1]C(=O)-O
[18:53:14] SMARTS Parse Error: Failed parsing SMARTS '*:1]C(=O)-O' for input: '*:1]C(=O)-O'
[18:53:14] product atom-mapping number 5 not found in reactants.
[18:53:14] product atom-mapping number 6 not found in reactants.
[18:53:14] product atom-mapping number 7 not found in reactants.
[18:53:14] product atom-mapping number 3 not found in reactants.
[18:53:14] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[18:53:14] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 4 
[18:53:14] SMARTS Parse Error: syntax error while parsing: [*:1][CH2]C(=[*:3])NC(C-S)C(O)=O'
[18:53:14] SMARTS Parse Error: Failed parsing SMARTS '[*:1][CH2]C(=[*:3])NC(C-S)C(O)=O'' for input: '[*:1][CH2]C(=[*:3])NC(C-S)C(O)=O''
[18:53:14] product atom-mapping number 5 not 

[18:53:14] SMARTS Parse Error: syntax error while parsing: *:1]C(=O)-O
[18:53:14] SMARTS Parse Error: Failed parsing SMARTS '*:1]C(=O)-O' for input: '*:1]C(=O)-O'
[18:53:14] product atom-mapping number 5 not found in reactants.
[18:53:14] product atom-mapping number 6 not found in reactants.
[18:53:14] product atom-mapping number 7 not found in reactants.
[18:53:14] product atom-mapping number 3 not found in reactants.
[18:53:14] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[18:53:14] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 4 
[18:53:14] SMARTS Parse Error: syntax error while parsing: [*:1][CH2]C(=[*:3])NC(C-S)C(O)=O'
[18:53:14] SMARTS Parse Error: Failed parsing SMARTS '[*:1][CH2]C(=[*:3])NC(C-S)C(O)=O'' for input: '[*:1][CH2]C(=[*:3])NC(C-S)C(O)=O''
[18:53:14] product atom-mapping number 5 not found in reactants.
[18:53:14] mapped atoms in the reactants were not mapped in the products.
  unmap

[18:53:15] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[18:53:15] SMARTS Parse Error: syntax error while parsing: *:1]C(=O)-O
[18:53:15] SMARTS Parse Error: Failed parsing SMARTS '*:1]C(=O)-O' for input: '*:1]C(=O)-O'
[18:53:15] product atom-mapping number 5 not found in reactants.
[18:53:15] product atom-mapping number 6 not found in reactants.
[18:53:15] product atom-mapping number 7 not found in reactants.
[18:53:15] product atom-mapping number 3 not found in reactants.
[18:53:15] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[18:53:15] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 4 
[18:53:15] SMARTS Parse Error: syntax error while parsing: [*:1][CH2]C(=[*:3])NC(C-S)C(O)=O'
[18:53:15] SMARTS Parse Error: Failed parsing SMARTS '[*:1][CH2]C(=[*:3])NC(C-S)C(O)=O'' for input: '[*:1][CH2]C(=[*:3])NC(C-S)C(O)=O''
[18:53:15] product atom-mapping number 5 not 

[18:53:16] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[18:53:16] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 9 10 16 17
[18:53:16] Can't kekulize mol.  Unkekulized atoms: 36 37 40 42 43 44 45
[18:53:16] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 11 12 49 50
[18:53:16] Explicit valence for atom # 6 C, 6, is greater than permitted
[18:53:16] Can't kekulize mol.  Unkekulized atoms: 36 37 39 40 41 42 43
[18:53:16] Can't kekulize mol.  Unkekulized atoms: 36 37 38 39 41 42 43
[18:53:16] SMARTS Parse Error: syntax error while parsing: *:1]C(=O)-O
[18:53:16] SMARTS Parse Error: Failed parsing SMARTS '*:1]C(=O)-O' for input: '*:1]C(=O)-O'
[18:53:16] product atom-mapping number 5 not found in reactants.
[18:53:16] product atom-mapping number 6 not found in reactants.
[18:53:16] product atom-mapping number 7 not found in reactants.
[18:53:16] product atom-mapping number 3 not found in reactants.
[18:53:16] mapped atoms in the reactants w

[18:53:16] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[18:53:16] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 23 24
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 18 C, 5, is greater than permitted
[18:53:16] Can't kekulize mol.  Unkekulized atoms: 10 11 13 14 15 16 17
[18:53:16] Can't kekulize mol.  Unkekulized atoms: 10 11 12 13 15 16 17
[18:53:16] SMARTS Parse Error: syntax error while parsing: *:1]C(=O)-O
[18:53:16] SMARTS Parse Error: Failed parsing SMARTS '*:1]C(=O)-O' for input: '*:1]C(=O)-O'
[18:53:16] product atom-mapping number 5 not found in reactants.
[18:53:16] product atom-mapping number 6 not found in reactants.
[18:53:16] product atom-mapping number 7 not found in reactants.
[18:53:16] product atom-mapping number 3 not found in reactants.
[18:53:16] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[18:53:16] mapped

[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is greater than permitted
[18:53:16] Explicit valence for atom # 1 C, 5, is g

[18:53:17] product atom-mapping number 5 not found in reactants.
[18:53:17] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[18:53:17] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 4 3 2 5 
[18:53:17] SMARTS Parse Error: unclosed ring for input: '[n+:1]1(c2c(O)c(O)c(CO)o2)ccccc'
[18:53:17] product atom-mapping number 2 not found in reactants.
[18:53:17] product atom-mapping number 4 not found in reactants.
[18:53:17] product atom-mapping number 2 not found in reactants.
[18:53:17] product atom-mapping number 3 not found in reactants.
[18:53:17] product atom-mapping number 2 not found in reactants.
[18:53:17] product atom-mapping number 3 not found in reactants.


Unnamed: 0,Substrate,Product,Reaction,Reaction rules,ReactionID,Substrate_Name
0,O,O,sulfate conjugation,[OH:1]>>[O:1]-S(=O)(=O)O,R26,H2O
1,O,O,loss of deoxyadenosine,C1C(C(OC1N2C=NC3=C(N=CN=C32)N)CO)O,R59,H2O
2,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O...,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O...,dehydrogenation,[*:1][O]>>[*:1][O-],R01,ATP
3,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O...,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O...,dehydrogenation,[*:1][O]>>[*:1][O-],R01,ATP
4,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O...,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O...,dehydrogenation,[*:1][O]>>[*:1][O-],R01,ATP
...,...,...,...,...,...,...
1355,Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O...,NC1=C2N=CN(C3OC(COP(=O)(O)OP(=O)(O)OC[C@H]4O[C...,addition of AMP,[*:1]-P(=O)([OH1])[OH1]>>C1=NC(=C2C(=N1)N(C=N2...,R72,GDP
1356,Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O...,NC(CCC(=O)Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)...,addition of glutamate,[*:1]-[NH2]>>[*:1](NC(=O)CCC(N)C(=O)O),R74,GDP
1357,Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O...,Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OC(=O)CCC(...,addition of glutamate,[*:1]-OP(=O)(O)O>>[*:1]-OP(=O)(O)OC(=O)CCC(N)C...,R74,GDP
1358,Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O...,Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O...,sulfate conjugation,[OH:1]>>[O:1]-S(=O)(=O)O,R26,GDP
