In [1]:
%load_ext autoreload
%autoreload 2

In [32]:
from frust.transformers import transformer_ts2
from frust.embedder import embed_ts
from frust.stepper import Stepper
from tooltoad.vis import MolTo3DGrid
from tooltoad.chemutils import ac2mol

In [3]:
# smi_list = ["CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C", "Cc1nc2cc(Br)ccc2o1"]
smi_list = ["CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C"]

In [4]:
ts_guess_struct="../structures/ts2_guess.xyz"

ts_dict = {}
for i, smi in enumerate(smi_list):
    if i < 100:
        ts_mol = transformer_ts2(
            ligand_smiles=smi,
            ts_guess_struct=ts_guess_struct,
            embed_ready=True
        )
    
        ts_with_smi = {
            name: (mol, idxs, smi)
            for name, (mol, idxs) in ts_mol.items()
        }
        ts_dict.update(ts_with_smi)

In [5]:
ts_dict

{'TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4))': (<rdkit.Chem.rdchem.RWMol at 0x16c2c7f10>,
  [38, 10, 39, 58, 61, 66],
  'CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C'),
 'TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5))': (<rdkit.Chem.rdchem.RWMol at 0x16c2c7a10>,
  [38, 10, 39, 58, 61, 67],
  'CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C')}

In [6]:
m = ts_dict.get('TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4))')[0]
MolTo3DGrid(m)

In [7]:
embeds = embed_ts(ts_dict, ts_type="ts2", n_confs=5, optimize=True)

Embedded 5 conformers on atom 66
Embedded 5 conformers on atom 67


In [22]:
m = embeds.get("TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4))")[0]
MolTo3DGrid(m, show_confs=False, cell_size=(600,600), background_color="lightblue")

In [9]:
step = Stepper(smi_list, save_output_dir=False)
df0 = step.build_initial_df(embeds)

In [27]:
df_test = df0.head(5)

In [28]:
df_test

Unnamed: 0,custom_name,ligand_name,rpos,constraint_atoms,cid,smiles,atoms,coords_embedded,energy_uff
0,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)),tri(propan-2-yl)-pyrrol-1-ylsilane,4,"[38, 10, 39, 58, 61, 66]",0,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(2.8998563714805776, 0.5274315395440613, 0.93...",14700.853885
1,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)),tri(propan-2-yl)-pyrrol-1-ylsilane,4,"[38, 10, 39, 58, 61, 66]",1,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(-3.041082370115009, 1.419519728027352, 0.210...",13656.144129
2,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)),tri(propan-2-yl)-pyrrol-1-ylsilane,4,"[38, 10, 39, 58, 61, 66]",2,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(1.0900141475170695, -2.2599385542466126, 1.2...",12608.965385
3,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)),tri(propan-2-yl)-pyrrol-1-ylsilane,4,"[38, 10, 39, 58, 61, 66]",3,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(3.007382288125595, -1.1767602329139801, 0.67...",12794.20313
4,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)),tri(propan-2-yl)-pyrrol-1-ylsilane,4,"[38, 10, 39, 58, 61, 66]",4,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(2.1357219835235863, 1.2827623657188731, -2.1...",12394.356256


In [47]:
df1 = step.xtb(df0, options={"gfnff": None, "opt": None}, constraint=True)

2025-06-16 10:32:43 INFO  frust.stepper: [xtb-gfnff-opt] row 0 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…
2025-06-16 10:32:43 INFO  frust.stepper: [xtb-gfnff-opt] row 1 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…
2025-06-16 10:32:44 INFO  frust.stepper: [xtb-gfnff-opt] row 2 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…
2025-06-16 10:32:45 INFO  frust.stepper: [xtb-gfnff-opt] row 3 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…
2025-06-16 10:32:46 INFO  frust.stepper: [xtb-gfnff-opt] row 4 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…
2025-06-16 10:32:47 INFO  frust.stepper: [xtb-gfnff-opt] row 5 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)))…
2025-06-16 10:32:47 INFO  frust.stepper: [xtb-gfnff-opt] row 6 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)))…
2025-06-16 10:32:48 INFO  frust.stepper: [xtb-gfnff-opt] row 7 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)))…
2025-06-16 10:32:49 INFO  frust.stepper: [xtb-gfnff-opt] row 8 (TS2(tri(propan-2-yl)-pyr

In [48]:
df1

Unnamed: 0,custom_name,ligand_name,rpos,constraint_atoms,cid,smiles,atoms,coords_embedded,energy_uff,xtb-gfnff-opt-electronic_energy,xtb-gfnff-opt-normal_termination,xtb-gfnff-opt-opt_coords
0,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)),tri(propan-2-yl)-pyrrol-1-ylsilane,4,"[38, 10, 39, 58, 61, 66]",0,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(2.8998563714805776, 0.5274315395440613, 0.93...",14700.853885,-14.862978,True,"[[2.99495940545348, 0.04977543846297, 0.844528..."
1,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)),tri(propan-2-yl)-pyrrol-1-ylsilane,4,"[38, 10, 39, 58, 61, 66]",1,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(-3.041082370115009, 1.419519728027352, 0.210...",13656.144129,-14.866723,True,"[[-2.75678001485794, 1.23861286382723, 0.47793..."
2,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)),tri(propan-2-yl)-pyrrol-1-ylsilane,4,"[38, 10, 39, 58, 61, 66]",2,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(1.0900141475170695, -2.2599385542466126, 1.2...",12608.965385,-14.868997,True,"[[1.0471931232569, -2.02875518647872, 1.264634..."
3,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)),tri(propan-2-yl)-pyrrol-1-ylsilane,4,"[38, 10, 39, 58, 61, 66]",3,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(3.007382288125595, -1.1767602329139801, 0.67...",12794.20313,-14.872517,True,"[[2.94971565010302, -0.89312933671977, 0.60211..."
4,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)),tri(propan-2-yl)-pyrrol-1-ylsilane,4,"[38, 10, 39, 58, 61, 66]",4,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(2.1357219835235863, 1.2827623657188731, -2.1...",12394.356256,-14.86943,True,"[[1.87717416175063, 1.29243005550735, -2.08496..."
5,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)),tri(propan-2-yl)-pyrrol-1-ylsilane,5,"[38, 10, 39, 58, 61, 67]",0,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(0.3766725057280274, -3.6400622861623884, -0....",23794.981354,-14.878619,True,"[[0.7505102173256, -3.63094216838639, -0.12838..."
6,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)),tri(propan-2-yl)-pyrrol-1-ylsilane,5,"[38, 10, 39, 58, 61, 67]",1,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(-2.172147064252931, 1.2387866810825339, -0.9...",15563.311252,-14.744971,True,"[[-2.08689931354607, 1.58060146413598, -1.0873..."
7,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)),tri(propan-2-yl)-pyrrol-1-ylsilane,5,"[38, 10, 39, 58, 61, 67]",2,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(1.53488244345258, -2.7139327793120387, -0.66...",17374.408832,-14.889856,True,"[[2.09505577763259, -2.2042289138528, -0.74498..."
8,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)),tri(propan-2-yl)-pyrrol-1-ylsilane,5,"[38, 10, 39, 58, 61, 67]",3,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(-3.6177693506299398, -0.8793636546362515, 0....",13451.122295,-14.886965,True,"[[-3.34046302358697, -0.11576885568783, 0.2704..."
9,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)),tri(propan-2-yl)-pyrrol-1-ylsilane,5,"[38, 10, 39, 58, 61, 67]",4,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(-0.844300732603068, 2.5946637291993166, -1.0...",19479.641515,-14.887154,True,"[[-0.59214978414933, 2.84199555939119, -1.2715..."


In [49]:
from rdkit.Chem.rdchem import RWMol
loc = 1
atoms = df1["atoms"].iloc[loc]
coords = df1["xtb-gfnff-opt-opt_coords"].iloc[loc]
coords_e = df1["coords_embedded"].iloc[loc]

mol1 = ac2mol(atoms, coords)
mol2 = ac2mol(atoms, coords_e)

mol1_RW = RWMol(mol1)
mol2_RW = RWMol(mol2)

mol1_RW.RemoveBond(38, 61)
mol1_RW.RemoveBond(38, 58)
mol1_RW.RemoveBond(58, 66)
mol1_RW.RemoveBond(38, 66)

mol2_RW.RemoveBond(38, 61)
mol2_RW.RemoveBond(38, 58)
mol2_RW.RemoveBond(58, 66)
mol2_RW.RemoveBond(38, 66)

MolTo3DGrid([mol1_RW, mol2_RW], legends=['xtb', 'embed'], cell_size=(500,500), background_color="lightblue")

In [50]:
df2 = step.xtb(df1, options={"gfn": 2})
df2.sort_values(by=['ligand_name', 'rpos', 'xtb-gfn-electronic_energy'], inplace=True)
df2 = df2.groupby(['ligand_name', 'rpos']).head(2)

2025-06-16 10:34:25 INFO  frust.stepper: [xtb-gfn] row 0 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…
2025-06-16 10:34:25 INFO  frust.stepper: [xtb-gfn] row 1 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…
2025-06-16 10:34:25 INFO  frust.stepper: [xtb-gfn] row 2 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…
2025-06-16 10:34:26 INFO  frust.stepper: [xtb-gfn] row 3 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…
2025-06-16 10:34:26 INFO  frust.stepper: [xtb-gfn] row 4 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…
2025-06-16 10:34:27 INFO  frust.stepper: [xtb-gfn] row 5 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)))…
2025-06-16 10:34:27 INFO  frust.stepper: [xtb-gfn] row 6 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)))…
2025-06-16 10:34:27 INFO  frust.stepper: [xtb-gfn] row 7 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)))…
2025-06-16 10:34:28 INFO  frust.stepper: [xtb-gfn] row 8 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)))…
2025-06-16 10:34:28 INFO  fr

In [None]:
df3 = step.xtb(df2, options={"gfn": 2, "opt": None}, constraint=True)
df3.sort_values(by=['ligand_name', 'rpos', 'xtb-gfn-opt-electronic_energy'], inplace=True)
df3 = df3.groupby(['ligand_name', 'rpos']).head(1)

2025-06-16 10:34:43 INFO  frust.stepper: [xtb-gfn-opt] row 4 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…
2025-06-16 10:35:51 INFO  frust.stepper: [xtb-gfn-opt] row 0 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…
2025-06-16 10:36:55 INFO  frust.stepper: [xtb-gfn-opt] row 8 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)))…
2025-06-16 10:37:54 INFO  frust.stepper: [xtb-gfn-opt] row 5 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)))…


KeyError: 'xtb-gfn-ohess-gibbs_energy'

In [52]:
df3

Unnamed: 0,custom_name,ligand_name,rpos,constraint_atoms,cid,smiles,atoms,coords_embedded,energy_uff,xtb-gfnff-opt-electronic_energy,xtb-gfnff-opt-normal_termination,xtb-gfnff-opt-opt_coords,xtb-gfn-electronic_energy,xtb-gfn-normal_termination,xtb-gfn-opt-electronic_energy,xtb-gfn-opt-normal_termination,xtb-gfn-opt-opt_coords
4,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)),tri(propan-2-yl)-pyrrol-1-ylsilane,4,"[38, 10, 39, 58, 61, 66]",4,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(2.1357219835235863, 1.2827623657188731, -2.1...",12394.356256,-14.86943,True,"[[1.87717416175063, 1.29243005550735, -2.08496...",-120.448,True,-120.519756,True,"[[1.91794051007874, 0.8994865302163, -2.459756..."
0,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)),tri(propan-2-yl)-pyrrol-1-ylsilane,4,"[38, 10, 39, 58, 61, 66]",0,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(2.8998563714805776, 0.5274315395440613, 0.93...",14700.853885,-14.862978,True,"[[2.99495940545348, 0.04977543846297, 0.844528...",-120.445522,True,-120.515259,True,"[[3.03876801638065, 0.00335731651485, 0.913511..."
8,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)),tri(propan-2-yl)-pyrrol-1-ylsilane,5,"[38, 10, 39, 58, 61, 67]",3,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(-3.6177693506299398, -0.8793636546362515, 0....",13451.122295,-14.886965,True,"[[-3.34046302358697, -0.11576885568783, 0.2704...",-120.478167,True,-120.52571,True,"[[-3.34581669159117, -0.10813453580425, 0.3007..."
5,TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(5)),tri(propan-2-yl)-pyrrol-1-ylsilane,5,"[38, 10, 39, 58, 61, 67]",0,CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C,"[C, C, C, C, C, C, H, H, H, H, N, C, C, C, C, ...","[(0.3766725057280274, -3.6400622861623884, -0....",23794.981354,-14.878619,True,"[[0.7505102173256, -3.63094216838639, -0.12838...",-120.472316,True,-120.525146,True,"[[0.80128680340237, -3.50199578508164, -0.1328..."


In [53]:
from rdkit.Chem.rdchem import RWMol
loc = 1
atoms = df3["atoms"].iloc[loc]
coords = df3["xtb-gfnff-opt-opt_coords"].iloc[loc]
coords_e = df3["xtb-gfn-opt-opt_coords"].iloc[loc]

mol1 = ac2mol(atoms, coords)
mol2 = ac2mol(atoms, coords_e)

mol1_RW = RWMol(mol1)
mol2_RW = RWMol(mol2)

mol1_RW.RemoveBond(38, 61)
mol1_RW.RemoveBond(38, 58)
mol1_RW.RemoveBond(58, 66)
mol1_RW.RemoveBond(38, 66)

mol2_RW.RemoveBond(38, 61)
mol2_RW.RemoveBond(38, 58)
mol2_RW.RemoveBond(58, 66)
mol2_RW.RemoveBond(38, 66)

MolTo3DGrid([mol1_RW, mol2_RW], legends=['gfnff', 'gfn2'], cell_size=(500,500), background_color="lightblue")

In [54]:
detailed_inp = """%geom\nCalc_Hess true\nend"""

options = {
    "wB97X-D3" : None,
    "6-31G**"  : None,
    "TightSCF" : None,
    "SlowConv" : None,
    "OptTS"    : None,
    "Freq"     : None,
    "NoSym"    : None,
}

fast_options = {
    "HF"       : None,
    "MINIX"    : None,
    "LooseSCF" : None,
    "LooseOpt" : None,
    "OptTS"    : None,
    "Freq"     : None,
    "NoSym"    : None,
}

df4 = step.orca(df3.head(1), options=fast_options, xtra_inp_str=detailed_inp)

2025-06-16 10:54:08 INFO  frust.stepper: [orca-HF-MINIX-OptTS] row 4 (TS2(tri(propan-2-yl)-pyrrol-1-ylsilane_rpos(4)))…


Failed to read property detailed_contributions: list index out of range


In [72]:
fregs = [x.get('frequency') for x in list(df4["orca-HF-MINIX-OptTS-vibs"])[0]]

In [None]:
from rdkit.Chem.rdchem import RWMol
loc = 0
atoms = df4["atoms"].iloc[loc]
coords = df4["xtb-gfn-opt-opt_coords"].iloc[loc]
coords_e = df4["orca-HF-MINIX-OptTS-opt_coords"].iloc[loc]

mol1 = ac2mol(atoms, coords)
mol2 = ac2mol(atoms, coords_e)

mol1_RW = RWMol(mol1)
mol2_RW = RWMol(mol2)

mol1_RW.RemoveBond(38, 61)
mol1_RW.RemoveBond(38, 58)
mol1_RW.RemoveBond(58, 66)
mol1_RW.RemoveBond(38, 66)

mol2_RW.RemoveBond(38, 61)
mol2_RW.RemoveBond(38, 58)
mol2_RW.RemoveBond(58, 66)
mol2_RW.RemoveBond(38, 66)
mol2_RW.RemoveBond(58, 61)

MolTo3DGrid([mol1_RW, mol2_RW], legends=['gfn2', 'dft'], cell_size=(500,500), background_color="lightblue")

: 