In [77]:
import pandas as pd
import numpy as np
import frust.vis as vis

conversion_factor       =  627.509474

In [78]:
df_ligs = pd.read_parquet("output_files/full_ir_xtb/ir_ligs_xtb.parquet")
df_generics = pd.read_parquet("output_files/full_font_xtb/generics.parquet")

df_ts1 = pd.read_parquet("output_files/full_ir_xtb/ir_ts1_xtb.parquet")
df_ts2 = pd.read_parquet("output_files/full_ir_xtb/ir_ts2_xtb.parquet")
df_ts3 = pd.read_parquet("output_files/full_ir_xtb/ir_ts3_xtb.parquet")
df_ts4 = pd.read_parquet("output_files/full_ir_xtb/ir_ts4_xtb.parquet")

In [79]:
df_ligs = df_ligs[["ligand_name", "rpos", "DFT-pre-SP-wB97X-D3-6-31G**-NoSym-electronic_energy"]].rename(columns={"DFT-pre-SP-wB97X-D3-6-31G**-NoSym-electronic_energy": "DFT_SP_EE"})
df_generics = df_generics[["ligand_name", "rpos", "DFT-pre-SP-wB97X-D3-6-31G**-NoSym-electronic_energy"]].rename(columns={"DFT-pre-SP-wB97X-D3-6-31G**-NoSym-electronic_energy": "DFT_SP_EE"})
df_ts1 = df_ts1[["ligand_name", "rpos", "smiles", "DFT-pre-SP-wB97X-D3-6-31G**-NoSym-electronic_energy"]].rename(columns={"DFT-pre-SP-wB97X-D3-6-31G**-NoSym-electronic_energy": "DFT_SP_EE"})
df_ts2 = df_ts2[["ligand_name", "rpos", "smiles", "DFT-pre-SP-wB97X-D3-6-31G**-NoSym-electronic_energy"]].rename(columns={"DFT-pre-SP-wB97X-D3-6-31G**-NoSym-electronic_energy": "DFT_SP_EE"})
df_ts3 = df_ts3[["ligand_name", "rpos", "smiles", "DFT-pre-SP-wB97X-D3-6-31G**-NoSym-electronic_energy"]].rename(columns={"DFT-pre-SP-wB97X-D3-6-31G**-NoSym-electronic_energy": "DFT_SP_EE"})
df_ts4 = df_ts4[["ligand_name", "rpos", "smiles", "DFT-pre-SP-wB97X-D3-6-31G**-NoSym-electronic_energy"]].rename(columns={"DFT-pre-SP-wB97X-D3-6-31G**-NoSym-electronic_energy": "DFT_SP_EE"})

In [80]:
for df in [df_ligs, df_generics, df_ts1, df_ts2, df_ts3, df_ts4]:
    df["DFT_SP_EE"] *= conversion_factor

dimer_dft = df_generics.loc[df_generics["ligand_name"] == "dimer", "DFT_SP_EE"].iloc[0]
H2_dft = df_generics.loc[df_generics["ligand_name"] == "HH", "DFT_SP_EE"].iloc[0]
cat_dft = df_generics.loc[df_generics["ligand_name"] == "catalyst", "DFT_SP_EE"].iloc[0]
HBpinmol_dft = df_generics.loc[df_generics["ligand_name"] == "HBpin-mol", "DFT_SP_EE"].iloc[0]
print(dimer_dft, H2_dft, cat_dft, HBpinmol_dft)    

-835242.3030976757 -736.0345816026088 -417604.86146744783 -258394.18701306346


In [81]:
df_ligs.drop(columns=["rpos"], inplace=True)

In [82]:
df_ts1 = df_ts1.merge(df_ligs, on=["ligand_name"], suffixes=["_ts1", "_lig"])
df_ts2 = df_ts2.merge(df_ligs, on=["ligand_name"], suffixes=["_ts2", "_lig"])
df_ts3 = df_ts3.merge(df_ligs, on=["ligand_name"], suffixes=["_ts3", "_lig"])
df_ts4 = df_ts4.merge(df_ligs, on=["ligand_name"], suffixes=["_ts4", "_lig"])

In [83]:
df_ts1["dE"] = (2*(df_ts1["DFT_SP_EE_ts1"] - df_ts1["DFT_SP_EE_lig"]) - dimer_dft) / 2
df_ts2["dE"] = (2*(df_ts2["DFT_SP_EE_ts2"] - df_ts2["DFT_SP_EE_lig"]) - dimer_dft) / 2
df_ts3["dE"] = (2*(df_ts3["DFT_SP_EE_ts3"] - df_ts3["DFT_SP_EE_lig"] - HBpinmol_dft + H2_dft) - dimer_dft) / 2
df_ts4["dE"] = (2*(df_ts4["DFT_SP_EE_ts4"] - df_ts4["DFT_SP_EE_lig"] - HBpinmol_dft + H2_dft) - dimer_dft) / 2

df_ts1["dE"] = (df_ts1["dE"] - 2.537739) / 0.865432 # [INFO]: Linear relation: y = 0.865432x + 2.537739
df_ts2["dE"] = (df_ts2["dE"] - 8.306087) / 0.647606 # [INFO]: Linear relation: y = 0.647606x + 8.306087
df_ts3["dE"] = (df_ts3["dE"] - 0.056540) / 0.564222 # [INFO]: Linear relation: y = 0.564222x + 0.056540
df_ts4["dE"] = (df_ts4["dE"] + 4.528334) / 0.640283 # [INFO]: Linear relation: y = 0.640283x - 4.528334

In [None]:
keys = ["ligand_name", "rpos", "smiles"]

def pick_and_rename(df, tag):
    cols = keys + ["dE"]
    return df[cols].rename(columns={"dE": f"dE_{tag}"})

ts1 = pick_and_rename(df_ts1, "ts1")
ts2 = pick_and_rename(df_ts2, "ts2")
ts3 = pick_and_rename(df_ts3, "ts3")
ts4 = pick_and_rename(df_ts4, "ts4")

# Outer-join step by step so missing rows are kept as NaN
df_all = (
    ts1
    .merge(ts2, on=keys, how="outer")
    .merge(ts3, on=keys, how="outer")
    .merge(ts4, on=keys, how="outer")
)

In [87]:
df_ts4

Unnamed: 0,ligand_name,rpos,smiles,DFT_SP_EE_ts4,DFT_SP_EE_lig,dE
0,"2,4-dimethyl-1,3-benzoxazole",6,Cc1nc2c(C)cccc2o1,-9.753653e+05,-300101.948877,31.927543
1,"2,4-dimethyl-1,3-benzoxazole",7,Cc1nc2c(C)cccc2o1,-9.753643e+05,-300101.948877,33.574543
2,"2,4-dimethyl-1,3-benzoxazole",8,Cc1nc2c(C)cccc2o1,-9.753683e+05,-300101.948877,27.260673
3,"2-(4-methoxyphenyl)-1,3-benzoxazole",10,COc1ccc(-c2nc3ccccc3o2)cc1,-1.142827e+06,-467565.123976,34.807938
4,"2-(4-methoxyphenyl)-1,3-benzoxazole",11,COc1ccc(-c2nc3ccccc3o2)cc1,-1.142828e+06,-467565.123976,33.075460
...,...,...,...,...,...,...
275,4-(trifluoromethyl)benzonitrile,4,N#Cc1ccc(C(F)(F)F)cc1,-1.090258e+06,-415005.407776,49.128433
276,4-iodobenzonitrile,3,N#Cc1ccc(I)cc1,,,
277,4-iodobenzonitrile,4,N#Cc1ccc(I)cc1,,,
278,N-(4-cyanophenyl)acetamide,5,CC(=O)Nc1ccc(C#N)cc1,-1.009330e+06,-334062.693094,25.692335


In [88]:
# df_all = df_all = pd.concat([df_ts1["dE"], df_ts2["dE"], df_ts3["dE"], df_ts4["dE"]], keys=["dE_ts1", "dE_ts2", "dE_ts3", "dE_ts4"], axis=1)
# df_all = pd.concat([df_ts1[["ligand_name", "rpos", "smiles"]], df_all], axis=1)

In [89]:
df_all

Unnamed: 0,ligand_name,rpos,smiles,dE_ts1,dE_ts2,dE_ts3,dE_ts4
0,"1,2,3-trimethoxybenzene",3,COc1cccc(OC)c1OC,31.150525,22.676954,20.251882,26.738283
1,"1,2,3-trimethoxybenzene",4,COc1cccc(OC)c1OC,35.626788,23.581770,20.166158,29.219762
2,"1,3-di(propan-2-yl)benzene",4,CC(C)c1cccc(C(C)C)c1,37.240077,26.175952,20.370357,28.767762
3,"1,3-di(propan-2-yl)benzene",5,CC(C)c1cccc(C(C)C)c1,36.138138,22.088191,19.985457,30.657343
4,"1,3-di(propan-2-yl)benzene",11,CC(C)c1cccc(C(C)C)c1,49.031289,34.927556,27.623331,37.913594
...,...,...,...,...,...,...,...
285,trimethyl-(2-methylquinolin-7-yl)silane,2,Cc1ccc2ccc([Si](C)(C)C)cc2n1,36.073168,23.169693,22.251166,32.311914
286,trimethyl-(2-methylquinolin-7-yl)silane,3,Cc1ccc2ccc([Si](C)(C)C)cc2n1,39.739064,24.518477,23.386770,39.242595
287,trimethyl-(2-methylquinolin-7-yl)silane,5,Cc1ccc2ccc([Si](C)(C)C)cc2n1,33.914252,21.901040,17.496870,28.663030
288,trimethyl-(2-methylquinolin-7-yl)silane,6,Cc1ccc2ccc([Si](C)(C)C)cc2n1,41.005519,24.788204,27.165210,34.284375


In [90]:
df_all

Unnamed: 0,ligand_name,rpos,smiles,dE_ts1,dE_ts2,dE_ts3,dE_ts4
0,"1,2,3-trimethoxybenzene",3,COc1cccc(OC)c1OC,31.150525,22.676954,20.251882,26.738283
1,"1,2,3-trimethoxybenzene",4,COc1cccc(OC)c1OC,35.626788,23.581770,20.166158,29.219762
2,"1,3-di(propan-2-yl)benzene",4,CC(C)c1cccc(C(C)C)c1,37.240077,26.175952,20.370357,28.767762
3,"1,3-di(propan-2-yl)benzene",5,CC(C)c1cccc(C(C)C)c1,36.138138,22.088191,19.985457,30.657343
4,"1,3-di(propan-2-yl)benzene",11,CC(C)c1cccc(C(C)C)c1,49.031289,34.927556,27.623331,37.913594
...,...,...,...,...,...,...,...
285,trimethyl-(2-methylquinolin-7-yl)silane,2,Cc1ccc2ccc([Si](C)(C)C)cc2n1,36.073168,23.169693,22.251166,32.311914
286,trimethyl-(2-methylquinolin-7-yl)silane,3,Cc1ccc2ccc([Si](C)(C)C)cc2n1,39.739064,24.518477,23.386770,39.242595
287,trimethyl-(2-methylquinolin-7-yl)silane,5,Cc1ccc2ccc([Si](C)(C)C)cc2n1,33.914252,21.901040,17.496870,28.663030
288,trimethyl-(2-methylquinolin-7-yl)silane,6,Cc1ccc2ccc([Si](C)(C)C)cc2n1,41.005519,24.788204,27.165210,34.284375


In [91]:
from frust.utils.analytics import build_annotated_frame
from IPython.display import HTML

print([col for col in df_all.columns if "dE" in col])

_, html = build_annotated_frame(
    df_all,
    "ligand_name",
    energy_cols=[col for col in df_all.columns if "dE" in col],
    fixed_bond_px=40,
    note_font_px=15,
    annotation_scale=0.7,
    size=(400,400)
)
HTML(html)

['dE_ts1', 'dE_ts2', 'dE_ts3', 'dE_ts4']


  step_idx = vals.idxmax(axis=1, skipna=True)


ligand_name,smiles,annotated_svg
"1,2,3-trimethoxybenzene",COc1cccc(OC)c1OC,
"1,3-di(propan-2-yl)benzene",CC(C)c1cccc(C(C)C)c1,
"1,3-dimethoxybenzene",COc1cccc(OC)c1,
1-benzofuran,c1ccc2occc2c1,
1-bromo-3-methylbenzene,Cc1cccc(Br)c1,
1-diethoxyphosphoryl-2-(trifluoromethyl)benzene,CCOP(=O)(OCC)c1ccccc1C(F)(F)F,
1-fluoro-3-(trifluoromethyl)benzene,Fc1cccc(C(F)(F)F)c1,
1-fluoro-3-methoxybenzene,COc1cccc(F)c1,
1-fluoro-3-phenylbenzene,Fc1cccc(-c2ccccc2)c1,
1H-indol-2-yl(trimethyl)silane,C[Si](C)(C)c1cc2ccccc2[nH]1,
