In [15]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw, rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from IPython.display import display, HTML

opts = rdMolDraw2D.MolDrawOptions()
#opts.legendFontSize     = 10
opts.annotationFontScale = 1

def annotate_ligand_grid(
    df,
    ligand_col: str = "ligand_name",
    smiles_col: str = "smiles",
    rpos_col: str = "rpos",
    energy_col: str = "dE",
    cols: int = 3,
    subImgSize: tuple = (300, 300)
):
    """
    Group df by ligand_col, annotate each ligand at all its rpos with energy_col,
    and draw them in a grid.

    Returns an RDKit PIL image.
    """
    mols = []
    legends = []
    highlights = []

    for ligand, grp in df.groupby(ligand_col):
        smi       = grp[smiles_col].iloc[0]
        pos_list  = grp[rpos_col].tolist()
        e_list    = grp[energy_col].tolist()

        mol = Chem.MolFromSmiles(smi)
        if mol is None:
            raise ValueError(f"Invalid SMILES for {ligand}: {smi}")

        for pos, e in zip(pos_list, e_list):
            mol.GetAtomWithIdx(pos).SetProp("atomNote", f"{e:.2f}")

        mols.append(mol)
        legends.append(f"{ligand}: " + ", ".join(f"{p}={e:.2f}" for p, e in zip(pos_list, e_list)))
        highlights.append(pos_list)

    grid = Draw.MolsToGridImage(
        mols,
        legends=legends,
        highlightAtomLists=highlights,
        molsPerRow=cols,
        subImgSize=subImgSize,
        maxMols=len(mols),
        returnPNG=False,
        drawOptions=opts
    )
    return grid


def _svg_annotated_smi(
        smi, pos_list, dE_list,
        size=(250, 250), highlight_color=(1, 0, 0)):
    """Return an SVG string of the molecule with per-atom ΔE labels."""

    mol = Chem.MolFromSmiles(smi)
    rdDepictor.Compute2DCoords(mol)

    for p, e in zip(pos_list, dE_list):
        mol.GetAtomWithIdx(int(p)).SetProp("atomNote", f"{e:.2f}")

    drawer = rdMolDraw2D.MolDraw2DSVG(*size)
    opts = drawer.drawOptions()
    opts.drawAtomNotes       = True
    opts.annotationFontScale = 0.9 

    drawer.DrawMolecule(
        mol,
        #highlightAtoms      =[int(p) for p in pos_list],
        #highlightAtomColors ={int(p): highlight_color for p in pos_list},
    )
    drawer.FinishDrawing()
    return drawer.GetDrawingText()

def build_annotated_frame(df,
                          ligand_col="ligand_name",
                          smi_col="smiles",
                          pos_col="rpos",
                          energy_col="dE"):
    """One row per ligand + an SVG column with all ΔE annotations."""
    rows = []
    for lig, grp in df.groupby(ligand_col):
        smi  = grp[smi_col].iloc[0]
        pos  = grp[pos_col].astype(int).tolist()
        dE   = grp[energy_col].tolist()
        svg  = _svg_annotated_smi(smi, pos, dE)
        rows.append({ligand_col: lig, smi_col: smi, "annotated_svg": svg})
    return pd.DataFrame(rows)

In [16]:
conversion_factor = 627.509474
dimer = -97.050557670754 * conversion_factor
print(dimer)

-60900.1443953815


In [17]:
df_ir_lig = pd.read_parquet("output_files/ir_ligs_v01.parquet", columns=["ligand_name", "xtb-gfn-opt-electronic_energy"])
df_ir_ts  = pd.read_parquet("output_files/ir_ts_v01.parquet", columns=["ligand_name", "rpos", "smiles", "xtb-gfn-opt-electronic_energy"])

In [18]:
df_ir_ts

Unnamed: 0,ligand_name,rpos,smiles,xtb-gfn-opt-electronic_energy
0,1-fluoro-3-(trifluoromethyl)benzene,2,Fc1cccc(C(F)(F)F)c1,-84.462175
1,1-fluoro-3-(trifluoromethyl)benzene,3,Fc1cccc(C(F)(F)F)c1,-84.461393
2,1-fluoro-3-(trifluoromethyl)benzene,4,Fc1cccc(C(F)(F)F)c1,-84.454076
3,1-fluoro-3-(trifluoromethyl)benzene,10,Fc1cccc(C(F)(F)F)c1,-84.454208
4,1-fluoro-3-methoxybenzene,3,COc1cccc(F)c1,-75.847383
...,...,...,...,...
324,ethyl_5-methoxy-1H-indole-2-carboxylate,13,CCOC(=O)c1cc2cc(OC)ccc2[nH]1,-95.983645
325,methyl_4-methoxy-1H-indole-2-carboxylate,5,COC(=O)c1cc2c(OC)cccc2[nH]1,-92.817163
326,methyl_4-methoxy-1H-indole-2-carboxylate,10,COC(=O)c1cc2c(OC)cccc2[nH]1,-92.810875
327,methyl_4-methoxy-1H-indole-2-carboxylate,11,COC(=O)c1cc2c(OC)cccc2[nH]1,-92.811576


In [19]:
df_ir = df_ir_ts.merge(df_ir_lig, on='ligand_name', how='left', suffixes=('_ts', '_lig'))

In [20]:
energy_cols = [col for col in df_ir.columns if 'energy' in col]
df_ir[energy_cols] = df_ir[energy_cols] * conversion_factor

In [21]:
df_ir["dE"] = (2 * (df_ir["xtb-gfn-opt-electronic_energy_ts"] - df_ir["xtb-gfn-opt-electronic_energy_lig"]) - dimer) / 2
df_ir["dE"] = 1.213 * df_ir["dE"] + 12.480

In [22]:
df_ir

Unnamed: 0,ligand_name,rpos,smiles,xtb-gfn-opt-electronic_energy_ts,xtb-gfn-opt-electronic_energy_lig,dE
0,1-fluoro-3-(trifluoromethyl)benzene,2,Fc1cccc(C(F)(F)F)c1,-53000.814706,-22564.377065,29.018718
1,1-fluoro-3-(trifluoromethyl)benzene,3,Fc1cccc(C(F)(F)F)c1,-53000.324034,-22564.377065,29.613903
2,1-fluoro-3-(trifluoromethyl)benzene,4,Fc1cccc(C(F)(F)F)c1,-52995.732778,-22564.377065,35.183096
3,1-fluoro-3-(trifluoromethyl)benzene,10,Fc1cccc(C(F)(F)F)c1,-52995.815330,-22564.377065,35.082961
4,1-fluoro-3-methoxybenzene,3,COc1cccc(F)c1,-47594.951641,-17158.464328,28.958465
...,...,...,...,...,...,...
324,ethyl_5-methoxy-1H-indole-2-carboxylate,13,CCOC(=O)c1cc2cc(OC)ccc2[nH]1,-60230.646397,-29791.658125,25.924802
325,methyl_4-methoxy-1H-indole-2-carboxylate,5,COC(=O)c1cc2c(OC)cccc2[nH]1,-58243.649431,-27804.172595,25.332173
326,methyl_4-methoxy-1H-indole-2-carboxylate,10,COC(=O)c1cc2c(OC)cccc2[nH]1,-58239.703297,-27804.172595,30.118834
327,methyl_4-methoxy-1H-indole-2-carboxylate,11,COC(=O)c1cc2c(OC)cccc2[nH]1,-58240.143113,-27804.172595,29.585337


In [23]:
# img = annotate_ligand_grid(
#     df_ir,
#     ligand_col="ligand_name",
#     smiles_col="smiles",
#     rpos_col="rpos",
#     energy_col="dE",
#     cols=2,
#     subImgSize=(400,400)
# )
# display(img)

In [None]:
annotated_df = build_annotated_frame(df_ir)

html = annotated_df.to_html(
    escape=False,
    formatters={"annotated_svg": lambda x: x},
    index=False
)
HTML(html)

annotated_df.to_html("/Users/jacobmolinnielsen/Developer/FrustActivationProject/Presentations/01 Preliminary TS/assets/ts_an_tab.html", escape=False, formatters={"annotated_svg": lambda x: x}, index=False)