In [None]:
from SynTemp.SynUtils.utils import load_database, load_from_pickle
data = load_from_pickle('./Data/uspto/uspto_its_graph_rules_cluster.pkl.gz')

In [None]:
from SynTemp.SynMØD.naive_cluster import NaiveCluster
node_label_names = ["element", "charge"]
naive_cluster = NaiveCluster(node_label_names=node_label_names, node_label_default=["*", 0], edge_attribute="order")
its_graph_rules_cluster = naive_cluster.process_rules_clustering(data, rule_column='GraphRules')

In [None]:
its_graph_rules_cluster[0]

In [None]:
from SynTemp.SynUtils.utils import stratified_random_sample
import pandas as pd
sampled_data = stratified_random_sample(its_graph_rules_cluster, property_key='naive_cluster', samples_per_class=1, seed=23)
pd.DataFrame(sampled_data)['Reaction Type'].value_counts()

In [None]:
single = [x['GraphRules'][2] for x in sampled_data if x['Reaction Type'] == 'Single Cyclic']
complex = [x['GraphRules'][2] for x in sampled_data if x['Reaction Type'] == 'Complex Cyclic']
neither = [x['GraphRules'][2] for x in sampled_data if x['Reaction Type'] == 'None']
acyclic = [x['GraphRules'][2] for x in sampled_data if x['Reaction Type'] == 'Acyclic']

In [None]:
from SynTemp.SynVis.chemical_graph_vis import ChemicalGraphVisualizer
vis = ChemicalGraphVisualizer()
vis.graph_vis(complex[4], show_node_labels=True)

In [None]:
from SynTemp.SynMØD.graph_rules_decompose import GraphRuleDecompose

In [None]:
from copy import deepcopy
complex_graph = complex[11]
# Add nodes and edges to complex_graph with the required attributes

single_cyclic_graphs = deepcopy(single)
# Define your single cyclic graphs by adding nodes and edges with the required attributes

# Call the function
explained_graphs = GraphRuleDecompose.bfs_remove_isomorphic_subgraphs(complex_graph, single_cyclic_graphs)

if explained_graphs is not None:
    print("List of single cyclic graphs that explain the complex graph:", explained_graphs)
    GraphRuleDecompose.visualize_with_common_subgraphs(complex_graph, explained_graphs)
else:
    print("Some parts of the complex graph could not be explained by any of the single cyclic graphs.")

In [None]:
import pandas as pd 

df = pd.read_csv('./Data/golden/golden_dataset.csv')
df.head(2)

from SynTemp.SynUtils.utils import load_database
data = load_database('./Data/golden/golden_aam_reactions.json.gz')
for key, value in enumerate(data):
    data[key]['ground_truth'] = df.iloc[key,0]

In [None]:
from SynTemp.SynUtils.utils import load_database
data = load_database('./Data/golden/golden_aam_reactions.json.gz')
for key, value in enumerate(data):
    data[key]['ground_truth'] = df.iloc[key,0]

In [None]:
pd.DataFrame(data).info()

In [None]:
from SynTemp.SynAAM.aam_validator import AMMValidator 
for key, value in enumerate(data):
    try:
        AMMValidator.smiles_check(data[key]['ground_truth'], data[key]['rdt'])
    except:
        print(key)

In [None]:
data[366]['rdt']

In [None]:
from SynTemp.SynAAM.aam_validator import AMMValidator  
results = AMMValidator.validate_smiles(data=data, ground_truth_col='ground_truth', 
                                       mapped_cols=['rxn_mapper', 'graphormer', 'local_mapper', 'rdt'], 
                                       check_method='RC', 
                                       ignore_aromaticity=False, n_jobs=4, verbose=0)

In [None]:
pd.DataFrame(results)[['mapper', 'accuracy']]

In [None]:
from SynTemp.SynAAM.aam_validator import AMMValidator  
results = AMMValidator.validate_smiles(data=data, ground_truth_col='Ground turth', 
                                       mapped_cols=['RXNMapper', 'GraphMapper', 'LocalMapper'], 
                                       check_method='RC', 
                                       ignore_aromaticity=False, n_jobs=4, verbose=0)

In [None]:
pd.DataFrame(results)[['mapper', 'accuracy']]

In [None]:
pd.DataFrame(results)[['mapper', 'accuracy']]

In [None]:
from SynTemp.SynUtils.utils import load_database
recon = load_database('./Data/Recon3D/Recon3D_aam_reactions.json.gz')

from SynTemp.SynAAM.aam_validator import AMMValidator  
results, _ = AMMValidator.validate_smiles(data=recon, ground_truth_col='ground_truth', 
                                       mapped_cols=['rxn_mapper', 'graphormer', 'local_mapper', 'rdt'], 
                                       check_method='RC', 
                                       ignore_aromaticity=False, n_jobs=4, verbose=0, ensemble=True)

In [None]:
pd.DataFrame(recon).to_csv('./Data/Recon3D/Recon3D_aam_reactions.csv')

In [None]:
import pandas as pd
pd.DataFrame(results)[['mapper', 'accuracy', 'success_rate']]

In [None]:
import pandas as pd

In [None]:
ecoli = pd.read_csv('./Data/ecoli/ecoli.smiles', header=None)
ecoli.rename({0:'ground_truth'}, axis=1, inplace=True)
ecoli['R-id'] = range(1, len(ecoli) + 1)

In [None]:
from rdkit import Chem
from rdkit.Chem import rdChemReactions
ok = []
bug = []
for key, value in enumerate(ecoli['ground_truth']):
    try:
        rdChemReactions.ReactionFromSmarts(value)
        ok.append(key)
    except:
        bug.append(key)

In [None]:
a,b,c =ecoli['ground_truth'][bug[0]].split('>>')

In [None]:
from rdkit import Chem
Chem.MolFromSmiles(a)

In [None]:
Chem.MolFromSmiles(b)

In [None]:
Chem.MolFromSmiles(c)

In [None]:
print(ecoli.iloc[bug,:]['ground_truth'])

In [None]:
ecoli = ecoli.iloc[ok, :]
ecoli.reset_index(drop=True, inplace=True)
#ecoli = ecoli.to_dict('records')

In [None]:
from rdkit import Chem
from typing import List, Optional
import re

def remove_atom_mapping(smiles: str) -> str:
    """
    Removes atom mapping numbers and simplifies atomic notation in a SMILES string.
    
    This function processes a SMILES string to:
    1. Remove any atom mapping numbers denoted by ':' followed by one or more digits.
    2. Simplify the atomic notation by removing square brackets around atoms that do not need them.
    
    Parameters:
    - smiles (str): The SMILES string to be processed.
    
    Returns:
    - str: The processed SMILES string with atom mappings removed and simplified atomic notations.
    """
    # Remove atom mapping numbers
    pattern = re.compile(r":\d+")
    smiles = pattern.sub("", smiles)
    # Simplify atomic notation by removing unnecessary square brackets
    pattern = re.compile(r"\[(?P<atom>(B|C|N|O|P|S|F|Cl|Br|I){1,2})(?:H\d?)?\]")
    smiles = pattern.sub(r"\g<atom>", smiles)
    return smiles

def mol_from_smiles(smiles: str) -> Optional[Chem.Mol]:
    """
    Converts a SMILES string to an RDKit Mol object, with error handling for invalid strings.
    
    Parameters:
    - smiles (str): The SMILES string to be converted.
    
    Returns:
    - Chem.Mol: An RDKit Mol object created from the given SMILES string. None if conversion fails.
    
    Raises:
    - ValueError: If the SMILES string is invalid and cannot be converted to a Mol object.
    """
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        raise ValueError(f"Invalid SMILES string: {smiles}")
    return mol

def filter_valid_molecules(smiles_list: List[str]) -> List[Chem.Mol]:
    """
    Filters a list of SMILES strings, converting them to RDKit Mol objects, while ignoring invalid or empty molecules.
    
    Parameters:
    - smiles_list (List[str]): A list of SMILES strings to be processed.
    
    Returns:
    - List[Chem.Mol]: A list of RDKit Mol objects derived from valid, non-empty SMILES strings in the input list.
    """
    valid_molecules = []
    for smiles in smiles_list:
        try:
            mol = mol_from_smiles(smiles)
            if mol.GetNumAtoms() > 0:
                valid_molecules.append(mol)
        except ValueError:
            continue
    return valid_molecules

def standardize_rsmi(rsmi: str) -> str:
    """
    Standardizes a reaction SMILES (rSMI) by ensuring that all reactants and products are valid molecules with atoms.
    
    The function splits the reaction into reactants and products, filters and validates them, and then
    assembles them back into a standardized reaction SMILES string.
    
    Parameters:
    - rsmi (str): The reaction SMILES string to be standardized.
    
    Returns:
    - str: The standardized reaction SMILES string with valid and non-empty reactants and products.
    """
    reactants, products = rsmi.split('>>')
    reactant_molecules = filter_valid_molecules(reactants.split('.'))
    product_molecules = filter_valid_molecules(products.split('.'))

    # Convert molecules back to SMILES and assemble the standardized reaction SMILES string
    standardized_reactants = '.'.join(Chem.MolToSmiles(mol) for mol in reactant_molecules)
    standardized_products = '.'.join(Chem.MolToSmiles(mol) for mol in product_molecules)

    return f"{standardized_reactants}>>{standardized_products}"


ecoli['ground_truth'] = ecoli['ground_truth'].apply(standardize_rsmi)


In [None]:
import re
def remove_atom_mapping(smiles: str) -> str:
    pattern = re.compile(r":\d+")
    smiles = pattern.sub("", smiles)
    pattern = re.compile(r"\[(?P<atom>(B|C|N|O|P|S|F|Cl|Br|I){1,2})(?:H\d?)?\]")
    smiles = pattern.sub(r"\g<atom>", smiles)
    return smiles
ecoli['reactions'] = ecoli['ground_truth'].apply(remove_atom_mapping)
ecoli = ecoli.to_dict('records')

In [None]:
from SynTemp.SynUtils.utils import save_database
save_database(ecoli, './Data/ecoli/ecoli_reactions.json.gz')

In [None]:
Chem.MolFromSmiles(ecoli.loc[189, 'reactions'])

In [None]:
bug

In [None]:

rxn = rdChemReactions.ReactionFromSmarts('[C:1](=[O:2])O.[N:3]>>[C:1](=[O:2])[N:3]')
reacts = (Chem.MolFromSmiles('C(=O)O'),Chem.MolFromSmiles('CNC'))
products = rxn.RunReactants(reacts)

In [None]:
from SynTemp.SynUtils.utils import load_database
recon = load_database('./Data/Recon3D/Recon3D_aam_reactions.json.gz')

from SynTemp.SynAAM.aam_validator import AMMValidator  
results, _ = AMMValidator.validate_smiles(data=recon, ground_truth_col='ground_truth', 
                                       mapped_cols=['rxn_mapper', 'graphormer', 'local_mapper'], 
                                       check_method='RC', 
                                       ignore_aromaticity=False, n_jobs=4, verbose=0, ensemble=True)

import pandas as pd
pd.DataFrame(results)[['mapper', 'accuracy', 'success_rate']]

In [None]:
recon[0]

In [None]:
from SynTemp.SynUtils.utils import load_database
recon = load_database('./Data/ecoli/ecoli_aam_reactions.json.gz')

from SynTemp.SynAAM.aam_validator import AMMValidator  
results, _ = AMMValidator.validate_smiles(data=recon, ground_truth_col='ground_truth', 
                                       mapped_cols=['rxn_mapper', 'graphormer', 'local_mapper', 'rdt', 'ground_truth'], 
                                       check_method='RC', 
                                       ignore_aromaticity=False, n_jobs=4, verbose=0, ensemble=False)

import pandas as pd
pd.DataFrame(results)[['mapper', 'accuracy', 'success_rate']]

In [None]:
test = pd.DataFrame(recon).drop(['reactions'], axis =1)
test['local_mapper_result'] = pd.DataFrame(results).loc[2, 'results']
test['rxn_mapper_result'] = pd.DataFrame(results).loc[0, 'results']
test['graphormer_result'] = pd.DataFrame(results).loc[1, 'results']
test['rdt_result'] = pd.DataFrame(results).loc[3, 'results']

In [None]:
test.to_csv('./Data/ecoli/ecoli_aam_reactions.csv')

In [None]:
pd.DataFrame(results)

In [None]:
pd.DataFrame(results).loc[2, 'results']

In [None]:
test_2 = test[['local_mapper_result', 'rxn_mapper_result', 'graphormer_result', 'rdt_result']]

In [None]:
def ensemble_results(df, threshold):
    # Calculate the sum of True values in each row
    true_counts = df.sum(axis=1)
    # Apply the threshold to determine the final result
    final_results = true_counts >= threshold
    return final_results

test_3 = ensemble_results(test_2, 2)

In [None]:
test_3.sum()/273

In [None]:
uspto_sample = pd.read_csv('./Data/aam_benchmark/USPTO_sampled.csv')

In [None]:
uspto_sample['LocalMapper_correct'].sum()

In [None]:
uspto_sample

# Bug

In [None]:
import pandas as pd 

df = pd.read_csv('./Data/aam_benchmark/Golden_mappings.csv')
df = pd.read_csv('./Data/aam_benchmark/benchmark.csv')
df = pd.read_csv('./Data/aam_benchmark/NatComm_mappings.csv')
df = pd.read_csv('./Data/aam_benchmark/USPTO_sampled.csv')
df = pd.read_csv('./Data/ecoli/ecoli_aam_reactions.csv')
#df = pd.read_csv('./Data/Recon3D/Recon3D_aam_reactions.csv')
df.head(1)

In [None]:
from SynTemp.SynAAM.aam_validator import AMMValidator  
results, _ = AMMValidator.validate_smiles(data=df, ground_truth_col='ground_truth', 
                                       mapped_cols=['ground_truth'], 
                                       check_method='RC', 
                                       ignore_aromaticity=False, n_jobs=4, verbose=0, ensemble=False)

import pandas as pd
pd.DataFrame(results)[['mapper', 'accuracy', 'success_rate']]

In [None]:
test_0 = df[['ground_truth', 'R-id']]
test_0['results'] = results[0]['results']

In [None]:
test_0_bug = test_0.loc[test_0['results']==False, :]

In [None]:
rsmi = test_0_bug['ground_truth'][74]

In [None]:
from SynTemp.SynITS.its_construction import ITSConstruction
from SynTemp.SynITS.its_extraction import ITSExtraction
reactants, products = rsmi.split('>>')
G, H = ITSExtraction.graph_from_smiles(reactants), ITSExtraction.graph_from_smiles(products)

In [None]:
rsmi

In [None]:
reactants

In [None]:
from rdkit import Chem
Chem.MolFromSmiles(reactants)

In [None]:
from SynTemp.SynVis.reaction_visualizer import ReactionVisualizer
vis = ReactionVisualizer()

In [None]:
vis.visualize_reaction(test_0_bug.iloc[0]['ground_truth'])

In [None]:
test_0_bug.iloc[0]['ground_truth']

In [None]:
import pandas as pd


df = pd.read_csv('./Data/USPTO_50K/USPTO_50K.csv')
df.drop_duplicates(subset=['reactions'], inplace=True)
df['R-id'] = ['USPTO-' + str(i) for i in range(len(df))]
df.head(2)

# MOD

In [None]:
from SynTemp.SynUtils.utils import load_database, save_database
from SynTemp.SynMØD.MØD_modeling import MØDModeling
database = load_database('./test_database.json.gz')
fw, bw = MØDModeling.reproduce_reactions(database=database, id_col='R-id', rule_file_path='./Data/uspto/Rule',
                                         original_rsmi_col='reactions', repeat_times=1)

In [None]:
import pandas as pd
pd.DataFrame(fw).info()

In [None]:
import pandas as pd
pd.DataFrame(bw).info()

Forward prediction

In [None]:
database = load_database('./test_database.json.gz')
test = MØDModeling.forward_prediction(database=database,  rule_file_path='./Data/uspto/Rule',
                                         original_rsmi_col='reactions', repeat_times=1)

In [None]:
pd.DataFrame(test)

In [None]:
database = load_database('./test_database.json.gz')
test_2 = MØDModeling.backward_prediction(database=database,  rule_file_path='./Data/uspto/Rule',
                                         original_rsmi_col='reactions', repeat_times=1)

In [None]:
pd.DataFrame(test_2)

In [None]:
from SynTemp.SynVis.reaction_visualizer import ReactionVisualizer
vis = ReactionVisualizer()
for i in range(0, 14):
    display(vis.visualize_reaction(test_2[0]['backward_predictions'][i]))

In [None]:
vis.visualize_reaction(test[0]['forward_predictions'][1])

In [None]:
test[0]

In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
from scipy.constants import kilo, calorie
import numpy as np

from rdkit import Chem
from rdkit.Chem import AllChem
from scipy.constants import kilo, calorie
import numpy as np

def calculate_delta_g(reaction_smiles: str, temperature: float = 298.15) -> float:
    """
    Calculate the Gibbs free energy change (ΔG) of a chemical reaction based on its reaction SMILES.

    Parameters:
        reaction_smiles (str): The reaction SMILES string.
        temperature (float, optional): The temperature in Kelvin. Defaults to 298.15 K.

    Returns:
        float: The Gibbs free energy change (ΔG) of the reaction in kcal/mol.
    """
    # Parse reaction SMILES
    rxn = AllChem.ReactionFromSmarts(reaction_smiles)

    # Calculate the molecular energies of reactants and products
    reactant_mols = [mol for mol in rxn.GetReactants()]
    product_mols = [mol for mol in rxn.GetProducts()]
    
    reactant_energies = [AllChem.UFFGetMoleculeForceField(mol, vdwThresh=10.0) for mol in reactant_mols]
    product_energies = [AllChem.UFFGetMoleculeForceField(mol, vdwThresh=10.0) for mol in product_mols]

    # Calculate the overall energy change (ΔE) of the reaction
    delta_e = np.sum(product_energies) - np.sum(reactant_energies)

    # Convert ΔE to ΔG using ΔG = ΔE - TΔS, assuming ΔS is negligible
    delta_g = delta_e * kilo * (calorie / 1000) / temperature

    return delta_g

# Example usage:


# Example usage:
reaction_smiles = 'C=C1C(=C)C2OC1C(=C)C2=C.C=CC(C)=O>>C=C1C(=C)C2OC1C1=C2CC(C(C)=O)CC1'
delta_g = calculate_delta_g(reaction_smiles)
print(f"ΔG for the reaction {reaction_smiles}: {delta_g:.2f} kcal/mol")


In [None]:
rxn = AllChem.ReactionFromSmarts(reaction_smiles, useSmiles= True)

# Calculate the molecular energies of reactants and products
reactant_mols = [mol for mol in rxn.GetReactants()]
product_mols = [mol for mol in rxn.GetProducts()]


In [None]:
reactant_mols[0]

In [None]:

reactant_energies = [AllChem.UFFGetMoleculeForceField(mol, vdwThresh=10.0) for mol in reactant_mols]
product_energies = [AllChem.UFFGetMoleculeForceField(mol, vdwThresh=10.0) for mol in product_mols]


In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem

# Load molecule from SMILES
mol = Chem.MolFromSmiles("CCO")  # Example SMILES, replace with your molecule

# Generate 3D coordinates
mol = Chem.AddHs(mol)  # Add hydrogens
AllChem.EmbedMolecule(mol, AllChem.ETKDG())  # Generate 3D coordinates

# Calculate energy
ff = AllChem.UFFGetMoleculeForceField(mol)
energy = ff.CalcEnergy()

print("Energy:", energy)


In [None]:
import glob
import copy
from typing import List, Any, Dict, Tuple
from SynTemp.SynMØD.MØD_postprocess import MØDPostprocess
from mod import DG, ruleGMLString, smiles, graphGMLString, addUniverse, addSubset, repeat, rightPredicate
from SynTemp.SynUtils.graph_utils import load_gml_as_text



class MØDModeling:
    """
    The MØDModeling class encapsulates functionalities for reaction modeling using the MØD toolkit.
    It provides methods for forward and backward prediction based on templates library.
    """
    @staticmethod
    def smilesFromProduct(product: Any) -> str:
        """
        Converts a product object from a reaction into a SMILES string representation.

        The product object, typically a graph representing a molecular structure, is traversed to construct a GML graph
        string. This GML string is then converted into a graph object, from which the SMILES representation is derived.

        Parameters:
        - product (Any): The product object to be converted into SMILES. The object structure is assumed to have vertices
                         and edges compatible with molecular graph representations.

        Returns:
        - str: The SMILES string representation of the product.
        """
        graphString = "graph [\n"
        for i, v in enumerate(product.vertices):
            graphString += f'  node [ id {i} label "{v.stringLabel}" ]\n'
            for e in v.incidentEdges:
                if v.id < e.target.id:
                    graphString += f'  edge [ source {v.id} target {e.target.id} label "{e.stringLabel}" ]\n'
        graphString += "]"
        graph = graphGMLString(graphString, name="Nan")  
        return graph.smiles
    
    @staticmethod
    def categorize_reactions(reactions: List[str], target_reaction: str) -> Tuple[List[str], List[str]]:
        """
        Categorizes the reactions into matches and not matches based on the target reaction.

        Args:
            reactions (List[str]): A list of reaction SMILES strings to categorize.
            target_reaction (str): The target reaction SMILES string to compare against.

        Returns:
            Tuple[List[str], List[str]]: A tuple containing two lists: matched reactions and non-matched reactions.
        """
        match, not_match = [], []
        target_reaction = MØDPostprocess.standardize_rsmi(target_reaction)
        for reaction_smiles in reactions:
            reaction_smiles = MØDPostprocess.standardize_rsmi(reaction_smiles)
            if reaction_smiles == target_reaction:
                match.append(reaction_smiles)
            else:
                not_match.append(reaction_smiles)
        return match, list(set(not_match))
    
    @staticmethod
    def reproduce_reactions(database: List[Dict], id_col: str, rule_file_path: str,
                            original_rsmi_col: str = 'reactions', repeat_times: int = 1,
                            max_solutions: int = 10) -> List[Dict]:
        """
        Processes the database by performing reactions, categorizing them, and updating the database entries.

        Args:
            database (List[Dict]): The database to process, represented as a list of dictionaries.

        Returns:
            List[Dict]: The updated database after processing.
        """
        database_fw = copy.deepcopy(database)
        for entry in database_fw:
            rule_name = entry[id_col]
            rule_file = f'{rule_file_path}/{rule_name}.gml'
            initial_smiles_list = entry[original_rsmi_col].split('>>')[0].split('.')

            # Process reactions
            reactions = MØDModeling.perform_reaction(rule_file_path=rule_file, invert_rule=False, 
                                                    initial_smiles=initial_smiles_list, repeat_times=repeat_times, type='fw',
                                                    max_solutions = max_solutions)

            # Categorize reactions
            match, not_match = MØDModeling.categorize_reactions(reactions, entry[original_rsmi_col])

            # Update database entry
            entry['positive_reactions'] = match[0] if match else None
            entry['negative_reactions'] = not_match

        database_bw = copy.deepcopy(database)
        for entry in database_bw:
            rule_name = entry[id_col]
            rule_file = f'{rule_file_path}/{rule_name}.gml'
            initial_smiles_list = entry[original_rsmi_col].split('>>')[1].split('.')

            # Process reactions
            reactions = MØDModeling.perform_reaction(rule_file_path=rule_file, invert_rule=True, 
                                                    initial_smiles=initial_smiles_list, repeat_times=repeat_times, type='bw',
                                                    max_solutions = max_solutions)

            # Categorize reactions
            match, not_match = MØDModeling.categorize_reactions(reactions, entry[original_rsmi_col])

            # Update database entry
            entry['positive_reactions'] = match[0] if match else None
            entry['negative_reactions'] = not_match

        return database_fw, database_bw
    
    @staticmethod
    def forward_prediction(database: List[Dict], rule_file_path: str,
                        original_rsmi_col: str = 'reactions', repeat_times: int = 1) -> List[Dict]:
        """
        Processes the database by performing forward reaction predictions for each entry, using the specified rule files.
        The function creates a copy of the input database and adds the forward predictions to each entry in the copy, 
        thereby not modifying the input database in-place.

        Args:
            database (List[Dict]): The database to process, represented as a list of dictionaries.
            rule_file_path (str): The file path where the rule files are stored.
            original_rsmi_col (str, optional): The column name in the database that contains the original reaction SMILES. Defaults to 'reactions'.
            repeat_times (int, optional): The number of times to repeat the reaction process. Defaults to 1.

        Returns:
            List[Dict]: A new database list, with each entry updated to include forward reaction predictions.
        """
        # Create a deep copy of the database to avoid modifying the input in-place
        database_copy = copy.deepcopy(database)

        for entry in database_copy:
            reactants_list = entry.get(original_rsmi_col, '').split('>>')[0].split('.')
            predictions = []
            for rule_file in glob.glob(f'{rule_file_path}/*.gml'):
                # Process reactions using each rule file
                reactions = MØDModeling.perform_reaction(rule_file_path=rule_file, invert_rule=False, 
                                                        initial_smiles=reactants_list, repeat_times=repeat_times)
                predictions.extend(reactions)
            
            # Update the entry with aggregated predictions from all rule files
            entry['forward_predictions'] = predictions
            entry['number_predictions'] = len(predictions)

        return database_copy

    @staticmethod
    def backward_prediction(database: List[Dict], rule_file_path: str,
                        original_rsmi_col: str = 'reactions', repeat_times: int = 1) -> List[Dict]:
        """
        Processes the database by performing forward reaction predictions for each entry, using the specified rule files.
        The function creates a copy of the input database and adds the forward predictions to each entry in the copy, 
        thereby not modifying the input database in-place.

        Args:
            database (List[Dict]): The database to process, represented as a list of dictionaries.
            rule_file_path (str): The file path where the rule files are stored.
            original_rsmi_col (str, optional): The column name in the database that contains the original reaction SMILES. Defaults to 'reactions'.
            repeat_times (int, optional): The number of times to repeat the reaction process. Defaults to 1.

        Returns:
            List[Dict]: A new database list, with each entry updated to include forward reaction predictions.
        """
        # Create a deep copy of the database to avoid modifying the input in-place
        database_copy = copy.deepcopy(database)

        for entry in database_copy:
            products_list = entry.get(original_rsmi_col, '').split('>>')[1].split('.')
            predictions = []
            for rule_file in glob.glob(f'{rule_file_path}/*.gml'):
                # Process reactions using each rule file
                reactions = MØDModeling.perform_reaction(rule_file_path=rule_file, invert_rule=True, 
                                                        initial_smiles=products_list, repeat_times=repeat_times, type ='bw')
                predictions.extend(reactions)
            
            # Update the entry with aggregated predictions from all rule files
            entry['backward_predictions'] = predictions
            entry['number_predictions'] = len(predictions)

        return database_copy
    
    @staticmethod
    def generate_reaction_smiles(temp_results: List[str], base_smiles: str, is_forward: bool = True) -> List[str]:
        """
        Generate reaction SMILES strings based on the temporary results, given the base SMILES string.

        Parameters:
            temp_results (List[str]): List of temporary result SMILES strings.
            base_smiles (str): Base SMILES string representing the reactants or products.
            is_forward (bool, optional): Indicates whether the reaction is forward (True) or backward (False). Defaults to True.

        Returns:
            List[str]: List of reaction SMILES strings.
        """
        results = []
        for comb in MØDPostprocess.generate_smiles_combinations(temp_results, base_smiles, True):
            joined_smiles = '.'.join(comb)
            reaction_smiles = f"{base_smiles}>>{joined_smiles}" if is_forward else f"{joined_smiles}>>{base_smiles}"
            results.append(reaction_smiles)
        return results


    @staticmethod
    def perform_reaction(rule_file_path: str, invert_rule: bool, initial_smiles: List[str], 
                     repeat_times: int = 1, type: str = 'fw', max_solutions: int = 10) -> List[str]:
        """
        Loads a reaction rule from a GML file, applies it to specified SMILES strings, and generates the resulting products or reaction SMILES.

        Parameters:
        - rule_file_path (str): Path to the GML file containing the reaction rule.
        - invert_rule (bool): Whether to invert the reaction rule. Useful for backward reactions.
        - initial_smiles (List[str]): List of initial molecules represented as SMILES strings.
        - repeat_times (int): Number of times to repeat the reaction rule. Defaults to 1.
        - type (str): Types of prediction: forward (fw) or backward (bw). Defaults to 'fw'.
        - max_solutions (int): maximum number of solutions 

        Returns:
        - List[str]: SMILES strings of the resulting molecules or reactions after applying the rule.
        """

        # Convert SMILES strings to molecule objects
        initial_molecules = []
        max_vertice = 0
        for smile in initial_smiles:
            initial_molecules.append(smiles(smile))
            max_vertice += smiles(smile).numVertices
            #print(smiles(smile).numVertices)
        # Load the rule from the GML file
        gml_content = load_gml_as_text(rule_file_path)
        reaction_rule = ruleGMLString(gml_content, invert=invert_rule)

        if len (initial_molecules) > 1:
        #Define the strategy
            # strategy = (addUniverse(initial_molecules[0]) 
            #             >> addSubset(initial_molecules[1:]) 
            #             >> rightPredicate[
            #                 lambda derivation: all(g.numVertices <= max_vertice for g in derivation.right)
            #             ]
            #             >> repeat[repeat_times]([reaction_rule]))
            strategy = (addUniverse(initial_molecules[0]) >> addSubset(initial_molecules[1:]) >> repeat[repeat_times]([reaction_rule]))
        else:
            strategy = (addUniverse(initial_molecules[0]) >> addSubset(initial_molecules[0]) >> repeat[repeat_times]([reaction_rule]))

        # Initialize the derivation graph with the initial molecules
        dg = DG(graphDatabase=initial_molecules)
        dg.build().execute(strategy)


        # Collect resulting products or reactions
        temp_results = [MØDModeling.smilesFromProduct(graph) for graph in dg.products]
        if type == 'fw':
            reactant_smiles = '.'.join(initial_smiles)
            if len(temp_results) <= max_solutions:
                return MØDModeling.generate_reaction_smiles(temp_results, reactant_smiles)
            else:
                # Handle cases with more than 10 temporary results separately
                return [f"{reactant_smiles}>>{smiles}" for smiles in temp_results
                        if MØDPostprocess.get_combined_molecular_formula([smiles]) ==
                        MØDPostprocess.get_combined_molecular_formula([reactant_smiles])]
        elif type == 'bw':
            product_smiles = '.'.join(initial_smiles)
            if len(temp_results) <= max_solutions:
                return MØDModeling.generate_reaction_smiles(temp_results, product_smiles, is_forward=False)
            else:
                # Handle cases with more than 10 temporary results separately
                return [f"{smiles}>>{product_smiles}" for smiles in temp_results
                        if MØDPostprocess.get_combined_molecular_formula([smiles]) ==
                        MØDPostprocess.get_combined_molecular_formula([product_smiles])]

        return []  # Return an empty list for unsupported reaction types
