# **1. Consensus atom mapping**

In [None]:
import sys

sys.path.append("../../")
from syntemp.pipeline import run_aam
from syntemp.SynUtils.utils import load_database

data = load_database(f"../../Data/Testcase/demo.json.gz")[:10]
mapped_reactions = run_aam(
    data,
    mapper_types=["local_mapper", "rxn_mapper", "graphormer"],
    rsmi_column="reactions",
)

# **2. ITSG and Rule Extraction**

Assume that you run consensus atom mapping (take time)

## 2.1. Extraction

In [None]:
import sys

sys.path.append("../../")
from syntemp.SynAAM.aam_postprocess import AMMPostprocessor
from syntemp.SynITS.its_extraction import ITSExtraction
from syntemp.SynUtils.utils import load_database, save_database

data = mapped_reactions
mapper_name = ["rxn_mapper", "graphormer", "local_mapper"]

# make sure reactions all have atom mapping
check_valid = AMMPostprocessor.parallel_postprocess(
    data, mapper_name, threshold=3, n_jobs=5, verbose=1
)
valid_data = [reaction for reaction in check_valid if reaction.get("Valid")]

In [None]:
# Extract rules and graph rules based on Imaginary Transition State (ITS)
its_graph, its_graph_wrong = ITSExtraction.parallel_process_smiles(
    data, mapper_name, n_jobs=5, verbose=1, export_full=False, check_method="RC"
)

len(its_graph)

In [None]:
from syntemp.SynVis.chemical_graph_visualizer import ChemicalGraphVisualizer

vis_graph = ChemicalGraphVisualizer(seed=42)
fig = vis_graph.visualize_all(its_graph[0]["ITSGraph"], its_graph[0]["GraphRules"])
fig

## 2.2. Rules Extension

In [None]:
from syntemp.SynRule.rules_extraction import RuleExtraction

its_graph_rules = RuleExtraction.process_rules_extraction(
    its_graph, mapper_type="ITSGraph", n_jobs=1, verbose=0, extend=True, n_knn=1
)
vis_graph.vis_three_graph(
    its_graph_rules[0]["GraphRules"],
    left_graph_title="L",
    right_graph_title="R",
    k_graph_title="K",
)

# **3. ITS Completation**

Make sure ITSG be a cyclic graph

## 3.1. Cyclic ITS Graph

In [None]:
from syntemp.SynITS.its_hadjuster import ITSHAdjuster

from copy import deepcopy

test_list = deepcopy(its_graph)
print(len(test_list))

process_graph_data = ITSHAdjuster.process_graph_data_parallel(
    test_list, "ITSGraph", n_jobs=5, verbose=2
)
process_graph_data = [
    value for _, value in enumerate(process_graph_data) if value["ITSGraph"] is not None
]
len(process_graph_data)

In [None]:
vis_graph.visualize_all(
    process_graph_data[2]["ITSGraph"], process_graph_data[2]["GraphRules"]
)

## 3.2. Ambiguous hydrogen

In [None]:
test = "[CH:10]=1[CH:11]=[CH:12][C:7](=[CH:8][CH:9]=1)[N:5]([OH:6])[C:3](=[O:4])[O:2][CH3:1].[Cl:16][C:14]([Cl:13])([Cl:15])[C:17]#[N:18]>>[Cl:13][C:14]([Cl:16])([Cl:15])[C:17]([NH:18][C:12]=1[C:7](=[CH:8][CH:9]=[CH:10][CH:11]=1)[NH:5][C:3]([O:2][CH3:1])=[O:4])=[O:6]"
from syntemp.SynVis.chemical_reaction_visualizer import ChemicalReactionVisualizer

vis = ChemicalReactionVisualizer()
vis.visualize_reaction(test, show_atom_map=True, img_size=(1000, 300))

In [None]:
test_arbitrary = [{"R-id": "C1", "mapper": test}]
from syntemp.SynITS.its_extraction import ITSExtraction

mapper_names = ["mapper"]
correct, incorrect = ITSExtraction.parallel_process_smiles(
    test_arbitrary,
    mapper_names=mapper_names,
    check_method="RC",
)
react_graph, product_graph, rule_graph = (
    correct[0]["ITSGraph"][0],
    correct[0]["ITSGraph"][1],
    correct[0]["ITSGraph"][2],
)

In [None]:
from syntemp.SynITS.its_hadjuster import ITSHAdjuster
from syntemp.SynITS.its_construction import ITSConstruction

variations = ITSHAdjuster.add_hydrogen_nodes_multiple(react_graph, product_graph)
its_list = [ITSConstruction.ITSGraph(i[0], i[1]) for i in variations]

group_1, group_2 = variations[0] + (its_list[0],), variations[1] + (its_list[1],)
rules_1 = RuleExtraction.extract_reaction_rules(*group_1, extend=False, n_knn=1)
rules_2 = RuleExtraction.extract_reaction_rules(*group_2, extend=False, n_knn=1)

In [None]:
from syntemp.SynVis.chemical_graph_visualizer import ChemicalGraphVisualizer

vis_graph = ChemicalGraphVisualizer(seed=42)
vis_graph.vis_three_graph(
    rules_1, left_graph_title="L", right_graph_title="R", k_graph_title="K"
)

In [None]:
vis_graph.vis_three_graph(
    rules_2, left_graph_title="L", right_graph_title="R", k_graph_title="K"
)

# **4. Rule Clustering**

In [None]:
import sys

sys.path.append("../")
from syntemp.SynUtils.utils import load_database, load_from_pickle
from syntemp.SynRule.rules_extraction import RuleExtraction
from syntemp.SynVis.chemical_graph_visualizer import ChemicalGraphVisualizer
from syntemp.SynRule.rule_cluster import RuleCluster

process_graph_data[0]

In [None]:
from syntemp.SynRule.hierarchical_clustering import HierarchicalClustering

node_label_names = ["element", "charge"]
hier_cluster = HierarchicalClustering(
    node_label_names=node_label_names,
    node_label_default=["*", 0],
    edge_attribute="order",
)

reaction_dicts, templates, hier_templates = hier_cluster.fit(
    process_graph_data, "ITSGraph"
)

In [None]:
for i in range(len(templates)):
    print(f"Number of templates within radii {i}", len(templates[i]))

# **5. Graph Modelling Language - MØD_rules** 

In [None]:
from syntemp.SynRule.rule_writing import RuleWriting

results = RuleWriting.auto_extraction(
    process_graph_data,
    reindex=True,
    save_path=None,
    rule_column="GraphRules",
    n_jobs=1,
    attributes=["charge", "isomer"],
)
print(results[0])