### Tutorial for Precedent Finder

This tutorial goes through step-by-step how to 

* Load the necessary input to the Precedent Finder
* Assign atom-mapping to a query reaction
* Run Precedent Finder
* Visualize results

It requires you to setup an environment with the `rxnmapper` tool and an environment for the Precedent Finder tool (see README.md at the repository top-level)

In [None]:
import base64
import os
from io import BytesIO
from collections import defaultdict
from pathlib import Path

import pandas as pd
from scipy import sparse
from rdkit.Chem import AllChem, Draw
from IPython.display import HTML

from rxnutils.pipeline.actions.reaction_mod import RxnMapper

import sys
sys.path.append("..")
from precedent_finder import find_precedents

pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_colwidth", None)

# To use rxnmapper, this assumes your conda and the 
# environments are installed in your home directory - update if necessary
conda_path = os.path.expanduser("~") + "/miniconda3"
os.environ["RXNMAPPER_ENV_PATH"] = f"{conda_path}/envs/rxnmapper/"

In [None]:
def atom_map_reaction(rsmi):
    df = pd.DataFrame({
        "rsmi": [rsmi]
    })
    df2 = RxnMapper(in_column="rsmi")(df)
    return df2["RxnmapperRxnSmiles"].iloc[0]

In [None]:
def image_formatter(im):
    with BytesIO() as buffer:
        im.save(buffer, "jpeg")
        image_base64 = base64.b64encode(buffer.getvalue()).decode()
    return f'<img src="data:image/jpeg;base64,{image_base64}">'

This loads the precomputed fingerprints, reaction metadata and SMARTS library

In [None]:
rfp = sparse.load_npz(Path("../data") / "rfp_precomp.npz")
rcfp = sparse.load_npz(Path("../data") / "rcfp_precomp.npz")
data = pd.read_csv(Path("../data") / "metadata_cleaned.csv.gz", sep="\t")
smartslib_path = str(Path("../data") / "group_smarts.json")

Loads and visualize our query reaction

In [None]:
#with open("bh-reaction.smi", "r") as fileobj:  # This loads a simple Buchwald-Hartwig reaction
with open("rxninsight-reaction.smi", "r") as fileobj: # This loads a cycle-forming reaction from the RXN Insight publication
    rsmi_query = fileobj.read().splitlines()[0]
rxn = AllChem.ReactionFromSmarts(rsmi_query, useSmiles=True)
Draw.ReactionToImage(rxn, subImgSize=(200, 300))

and then assign atom-mapper using `rxnmapper`

In [None]:
rsmi_query_mapped = atom_map_reaction(rsmi_query)
rxn = AllChem.ReactionFromSmarts(rsmi_query_mapped, useSmiles=True)
Draw.ReactionToImage(rxn, subImgSize=(200, 300))

Now, we can run Precedent Finder and visulize the results

In [None]:
precedent_data = find_precedents(
    rsmi_query_mapped, 
    data, rfp, rcfp, smartslib_path
)

In [None]:
results = defaultdict(list)
for _, row in precedent_data.iterrows():
    reactants, reagents, products = row.ReactionSmilesClean.split(">")
    rxn = AllChem.ReactionFromSmarts(">>".join([reactants, products]), useSmiles=True)
    AllChem.RemoveMappingNumbersFromReactions(rxn)
    reagent_mol = AllChem.MolFromSmiles(reagents)

    results["ID"].append(row.ID.replace(";", ";<BR>"))
    results["Yield"].append(row.CuratedYield)
    results["Year"].append(row.Year)
    results["Reaction similarity"].append(row.global_similarity)
    results["Center similarity"].append(row.center_similarity)
    results["Godness"].append(row.goodness)
    results["Reaction"].append(Draw.ReactionToImage(rxn, subImgSize=(200, 500)))
    results["Reagents"].append(Draw.MolToImage(reagent_mol))
    #if len(results["Reagents"]) == 10:  # Limit the number of displayed reactions
    #    break
results = pd.DataFrame(results)
HTML(
    results.to_html(
        formatters={"Reaction": image_formatter, "Reagents": image_formatter},
        escape=False,
        index=False,
    )
)