In [2]:
from SynTemp.SynUtils.utils import load_from_pickle, load_database
data = load_database('./Data/DPO/uspto/demo_database.json.gz')

In [4]:
data[0]

{'R-id': 'USPTO_50K_31',
 'reactions': 'C=C1C(=C)C2OC1C(=C)C2=C.C=CC(C)=O>>C=C1C(=C)C2OC1C1=C2CC(C(C)=O)CC1'}

In [5]:
from SynTemp.SynUtils.graph_utils import load_gml_as_text
rule_1 = load_gml_as_text('./Data/DPO/uspto/Rule/USPTO_50K_31.gml')

In [8]:
rule_1

'rule [\n   ruleID "USPTO_50K_31"\n   left [\n      edge [ source 1 target 2 label "-" ]\n      edge [ source 1 target 6 label "=" ]\n      edge [ source 2 target 3 label "=" ]\n      edge [ source 4 target 5 label "=" ]\n   ]\n   context [\n      node [ id 1 label "C" ]\n      node [ id 2 label "C" ]\n      node [ id 3 label "C" ]\n      node [ id 4 label "C" ]\n      node [ id 5 label "C" ]\n      node [ id 6 label "C" ]\n   ]\n   right [\n      edge [ source 1 target 2 label "=" ]\n      edge [ source 1 target 6 label "-" ]\n      edge [ source 2 target 3 label "-" ]\n      edge [ source 3 target 4 label "-" ]\n      edge [ source 4 target 5 label "-" ]\n      edge [ source 5 target 6 label "-" ]\n   ]\n]'

In [7]:
import mod 
from mod import *

In [35]:
from SynTemp.SynUtils.utils import load_from_pickle, load_database
data = load_database('./Data/DPO/USPTO_50K/test.json.gz')

In [82]:
data[10]

{'R-id': 5224,
 'reactions': 'CCCCS(=O)(=O)Cl.COc1cc(C(=O)N2Cc3cccn3Cc3ccccc32)ccc1N>>CCCCS(=O)(=O)Nc1ccc(C(=O)N2Cc3cccn3Cc3ccccc32)cc1OC.Cl',
 'local_mapper': '[CH3:1][CH2:2][CH2:3][CH2:4][S:5](=[O:6])(=[O:7])[Cl:33].[CH3:32][O:31][c:30]1[cH:29][c:12]([C:13](=[O:14])[N:15]2[CH2:16][c:17]3[cH:18][cH:19][cH:20][n:21]3[CH2:22][c:23]3[cH:24][cH:25][cH:26][cH:27][c:28]32)[cH:11][cH:10][c:9]1[NH2:8]>>[CH3:1][CH2:2][CH2:3][CH2:4][S:5](=[O:6])(=[O:7])[NH:8][c:9]1[cH:10][cH:11][c:12]([C:13](=[O:14])[N:15]2[CH2:16][c:17]3[cH:18][cH:19][cH:20][n:21]3[CH2:22][c:23]3[cH:24][cH:25][cH:26][cH:27][c:28]32)[cH:29][c:30]1[O:31][CH3:32].[ClH:33]',
 'rxn_mapper': '[CH3:1][CH2:2][CH2:3][CH2:4][S:5](=[O:6])(=[O:7])[Cl:33].[CH3:32][O:31][c:30]1[cH:29][c:12]([C:13](=[O:14])[N:15]2[CH2:16][c:17]3[cH:18][cH:19][cH:20][n:21]3[CH2:22][c:23]3[cH:24][cH:25][cH:26][cH:27][c:28]32)[cH:11][cH:10][c:9]1[NH2:8]>>[CH3:1][CH2:2][CH2:3][CH2:4][S:5](=[O:6])(=[O:7])[NH:8][c:9]1[cH:10][cH:11][c:12]([C:13](=[O:14])[N:15]2[CH2

In [56]:
hier_temp = load_from_pickle('./Data/DPO/USPTO_50K/Hydrogen/hier_rules.pkl.gz')

In [83]:
initial_molecules = [smiles(smile) for smile in set(['CCCCS(=O)(=O)Cl', 'COc1cc(C(=O)N2Cc3cccn3Cc3ccccc32)ccc1N'])]
gml_content = load_gml_as_text('./Data/DPO/USPTO_50K/Hydrogen/R0/Rules_good/2.gml')
reaction_rule = ruleGMLString(gml_content, invert=False)

In [75]:
def rule_apply(initial_molecules, reaction_rule):
    """
    Apply a reaction rule to a set of initial molecules and return the resulting products.

    Args:
    initial_molecules (list): A list of initial molecule structures.
    reaction_rule (object): The reaction rule to be applied.

    Returns:
    list: A list of lists containing the SMILES representation of each product from each reaction.
          Returns an empty list if no products are formed or an error occurs.
    """
    try:
        # Initialize the DG with the given initial molecules
        dg = DG(graphDatabase=initial_molecules)
        dg.build().apply(initial_molecules, reaction_rule)
        
        # Collect products from all reactions
        products = []
        for edge in dg.edges:
            # Collecting all SMILES strings for each target product
            product_smiles = [vertex.graph.smiles for vertex in edge.targets]
            products.append(product_smiles)

        # Check if any products were generated and return accordingly
        return products if products else []
    except Exception as e:
        print(f"An error occurred: {e}")
        return []



In [123]:
def hier_rule_apply(initial_molecules, reaction_rule_folder, hier_temp, radius, rule_id=2):
    """
    Recursively apply hierarchical reaction rules based on specified parameters.

    Args:
    initial_molecules (list): List of initial molecule structures.
    reaction_rule_folder (str): Path to the folder containing reaction rules.
    hier_temp (dict): Hierarchical template dictating the application of rules.
    radius (int): Current radius level for rule application.
    rule_id (int, optional): Identifier for the specific rule to apply. Defaults to 2.

    Returns:
    list: A flattened list of unique products from the applied hierarchical rules.
    """
    try:
        # Load the reaction rule GML content
        rule_path = f'./{reaction_rule_folder}/R{radius}/Rules_good/{rule_id}.gml'
        gml_content = load_gml_as_text(rule_path)
        reaction_rule = ruleGMLString(gml_content, invert=False)

        # Apply the reaction rule
        temp_results = rule_apply(initial_molecules, reaction_rule)

        # Decide next step based on the number of results and the radius
        if len(temp_results) < 2 or radius > 2:
            return temp_results
        else:
            # Get the new rule IDs based on the current radius from the hierarchical template
            try:
                new_rule_ids = [entry['Child'] for entry in hier_temp[radius] if entry['Cluster_id'] == rule_id][0]
            except:
                new_rule_ids = []
            print(new_rule_ids)
            # Recursively apply new rules and gather results
            results = []
            for entry in new_rule_ids:
                print(entry)
                print(radius + 1)
                result = hier_rule_apply(initial_molecules, reaction_rule_folder, hier_temp, radius + 1, entry)
                print(result)
                results.extend(result)
            results = list(set(results))
            if len(results) > 0: 
                return list(set(results))
            else:
                return temp_results
    except Exception as e:
        print(f"Error in processing: {e}")
        return []



In [124]:
for i in range(6,7):
    try:
        print(hier_rule_apply(initial_molecules, './Data/DPO/USPTO_50K/Hydrogen', hier_temp, radius=0, rule_id = i))
    except:
        continue

[174, 175, 176, 177, 178, 179]
174
1
[499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523]
499
2
[51, 52, 53, 54, 55]
51
3
52
3
53
3
54
3
55
3
Error in processing: unhashable type: 'list'
500
2
[6059, 6060]
6059
3
6060
3
Error in processing: unhashable type: 'list'
501
2
[8364, 8365]
8364
3
8365
3
Error in processing: unhashable type: 'list'
502
2
[8958]
8958
3
Error in processing: unhashable type: 'list'
503
2
[10593, 10594]
10593
3
10594
3
Error in processing: unhashable type: 'list'
504
2
[11067]
11067
3
Error in processing: unhashable type: 'list'
505
2
[11113, 11114, 11115, 11116]
11113
3
11114
3
11115
3
11116
3
Error in processing: unhashable type: 'list'
506
2
[11301, 11302, 11303]
11301
3
11302
3
11303
3
Error in processing: unhashable type: 'list'
507
2
[11388]
11388
3
Error in processing: unhashable type: 'list'
508
2
[11946, 11947, 11948]
11946
3
11947
3
11948
3
Error in processing: unhashable type: 'lis

In [87]:
hier_rule_apply(initial_molecules, './Data/DPO/USPTO_50K/Hydrogen', hier_temp, radius=0, rule_id = 0)

[]

In [74]:
hier_temp[0]

[{'Cluster_id': 0,
  'Parent': [],
  'Child': [0,
   1,
   2,
   3,
   4,
   5,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   14,
   15,
   16,
   17,
   18,
   19,
   20,
   21,
   22,
   23,
   24,
   25,
   26,
   27,
   28,
   29,
   30,
   31,
   32,
   33,
   34,
   35,
   36,
   37,
   38,
   39,
   40,
   41,
   42,
   43,
   44,
   45,
   46,
   47,
   48,
   49,
   50,
   51,
   52,
   53,
   54]},
 {'Cluster_id': 1,
  'Parent': [],
  'Child': [55,
   56,
   57,
   58,
   59,
   60,
   61,
   62,
   63,
   64,
   65,
   66,
   67,
   68,
   69,
   70,
   71,
   72,
   73,
   74,
   75,
   76,
   77]},
 {'Cluster_id': 2,
  'Parent': [],
  'Child': [78,
   79,
   80,
   81,
   82,
   83,
   84,
   85,
   86,
   87,
   88,
   89,
   90,
   91,
   92,
   93,
   94,
   95,
   96,
   97,
   98,
   99,
   100,
   101,
   102,
   103,
   104,
   105,
   106,
   107,
   108,
   109,
   110,
   111,
   112,
   113,
   114,
   115,
   116,
   117,
   118,
   119,
   120,
   121

In [64]:
gml_content = load_gml_as_text('./Data/DPO/USPTO_50K/Hydrogen/R1/Rules_good/154.gml')
reaction_rule = ruleGMLString(gml_content, invert=False)

In [65]:
rule_check(initial_molecules, reaction_rule)

1


(True,
 [['C(C1:C:C(:C:C:C:1)C2:C:C:C(:C:C:2)C(NCCCCN3CCC4:C:C:C(:C:C:4C3)OC)=O)#N',
   'O']])

In [30]:
dg = DG(graphDatabase=initial_molecules)
# dg.build().execute(strategy, verbosity=8)
dg.build().apply(initial_molecules, reaction_rule)

temp_results = []
for e in dg.edges:
    productSmiles = [v.graph.smiles for v in e.targets]
    temp_results.append(productSmiles)

In [31]:
len(temp_results)

1