In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
from frust.stepper import Stepper
from frust.embedder import embed_ts, embed_mols
from frust.transformers import transformer_mols
from frust.utils.io import read_ts_type_from_xyz

from rdkit.Chem.rdchem import Mol

In [6]:
ligand_smiles_list = ["CN1C=CC=C1"]
ts_guess_xyz="../structures/ts2.xyz"
n_confs=1
n_cores=5
debug=False
save_output_dir=False
DFT=False
top_n=1
out_dir = None

In [7]:
ts_type = read_ts_type_from_xyz(ts_guess_xyz)

if ts_type == 'TS1':
    from frust.transformers import transformer_ts1
    transformer_ts = transformer_ts1
elif ts_type == 'TS2':
    from frust.transformers import transformer_ts2
    transformer_ts = transformer_ts2
elif ts_type == 'TS3':
    from frust.transformers import transformer_ts3
    transformer_ts = transformer_ts3
elif ts_type == 'TS4':
    from frust.transformers import transformer_ts4
    transformer_ts = transformer_ts4
elif ts_type == 'INT3':
    from frust.transformers import transformer_int3
    transformer_ts = transformer_int3
else:
    raise ValueError(f"Unrecognized TS type: {ts_type}")

ts_structs = {}
for smi in ligand_smiles_list:
    ts_mols = transformer_ts(smi, ts_guess_xyz)
    ts_structs.update(ts_mols)    

embedded = embed_ts(ts_structs, ts_type=ts_type, n_confs=n_confs, optimize=not debug)

step = Stepper(
ligand_smiles_list,
n_cores=n_cores,
debug=debug,
output_base=out_dir,
save_output_dir=save_output_dir,
)
df0 = step.build_initial_df(embedded)
# df1 = step.xtb(df0, options={"gfnff": None, "opt": None}, constraint=True)
# df2 = step.xtb(df1, options={"gfn": 2})
# df3 = step.xtb(df2, options={"gfn": 2, "opt": None}, constraint=True, lowest=top_n)

# last_energy = [c for c in df3.columns if c.endswith("_energy")][-1]
# df3_filt = (
#     df3.sort_values(["ligand_name", "rpos", last_energy])
#         .groupby(["ligand_name", "rpos"])
#         .head(1)
# )

Embedded 1 conformers on atom 44
Embedded 1 conformers on atom 45
2025-07-21 09:55:23 INFO  frust.stepper: Working dir: .


In [None]:
# df3

Unnamed: 0,custom_name,ligand_name,rpos,constraint_atoms,cid,smiles,atoms,coords_embedded,energy_uff,xtb-gfnff-opt-electronic_energy,xtb-gfnff-opt-normal_termination,xtb-gfnff-opt-opt_coords,xtb-gfn-electronic_energy,xtb-gfn-normal_termination,xtb-gfn-opt-electronic_energy,xtb-gfn-opt-normal_termination,xtb-gfn-opt-opt_coords
0,TS2(1-methylpyrrole_rpos(2)),1-methylpyrrole,2,"[10, 17, 39, 41, 40, 44]",0,CN1C=CC=C1,"[H, C, C, C, C, H, C, H, C, H, B, H, H, H, H, ...","[(2.079045957273134, -3.5662312379579753, -2.7...",897.196895,-9.13539,True,"[[1.54430965650478, -3.74512527354709, -3.0177...",-65.662218,True,-65.679443,True,"[[1.41301559777663, -3.78558232039237, -2.9814..."
1,TS2(1-methylpyrrole_rpos(3)),1-methylpyrrole,3,"[10, 17, 39, 41, 40, 45]",0,CN1C=CC=C1,"[H, C, C, C, C, H, C, H, C, H, B, H, H, H, H, ...","[(1.8332372726649244, -4.501921491200516, 0.04...",897.124283,-9.125102,True,"[[1.82222818814142, -4.45977169681794, 0.01562...",-65.663716,True,-65.682001,True,"[[1.76322227849212, -4.41624192329891, 0.04911..."


In [8]:
# from tooltoad.chemutils import ac2mol
# from tooltoad.vis import MolTo3DGrid

# idx = 0

# atoms = df3["atoms"].iloc[idx]

# coords1 = df3["coords_embedded"].iloc[idx]
# coords2 = df3["xtb-gfnff-opt-opt_coords"].iloc[idx]
# coords3 = df3["xtb-gfn-opt-opt_coords"].iloc[idx]

# all_coords = [coords1, coords2, coords3]
# all_mols = [ac2mol(atoms, c) for c in all_coords]

# MolTo3DGrid(all_mols, legends=['embed', 'xtb-ff', 'xtb-opt'])

In [9]:
options = {
    "HF": None,
    "STO-3G": None,
    "SP": None,
}

df4 = step.orca(df0.head(1), name="DFT-pre-SP", options=options, save_step=False)

2025-07-21 09:55:29 INFO  frust.stepper: [DFT-pre-SP-HF-STO-3G] row 0 (TS2(1-methylpyrrole_rpos(2)))…


In [10]:
def run_ts_per_lig(
    ligand_smiles_list: list[str],
    ts_guess_xyz: str,
    *,
    n_confs: int | None = None,
    n_cores: int = 4,
    debug: bool = False,
    top_n: int = 10,
    out_dir: str | None = None,
    output_parquet: str | None = None,
    save_output_dir: bool = True,
    DFT: bool = False,
):
    
    ts_type = read_ts_type_from_xyz(ts_guess_xyz)

    if ts_type == 'TS1':
        from frust.transformers import transformer_ts1
        transformer_ts = transformer_ts1
    elif ts_type == 'TS2':
        from frust.transformers import transformer_ts2
        transformer_ts = transformer_ts2
    elif ts_type == 'TS3':
        from frust.transformers import transformer_ts3
        transformer_ts = transformer_ts3
    elif ts_type == 'TS4':
        from frust.transformers import transformer_ts4
        transformer_ts = transformer_ts4
    elif ts_type == 'INT3':
        from frust.transformers import transformer_int3
        transformer_ts = transformer_int3
    else:
        raise ValueError(f"Unrecognized TS type: {ts_type}")

    ts_structs = {}
    for smi in ligand_smiles_list:
        ts_mols = transformer_ts(smi, ts_guess_xyz)
        ts_structs.update(ts_mols)    

    embedded = embed_ts(ts_structs, ts_type=ts_type, n_confs=n_confs, optimize=not debug)

    step = Stepper(
    ligand_smiles_list,
    n_cores=n_cores,
    debug=debug,
    output_base=out_dir,
    save_output_dir=save_output_dir,
    )
    df0 = step.build_initial_df(embedded)
    df1 = step.xtb(df0, options={"gfnff": None, "opt": None}, constraint=True)
    df2 = step.xtb(df1, options={"gfn": 2})
    df3 = step.xtb(df2, options={"gfn": 2, "opt": None}, constraint=True, lowest=top_n)

    last_energy = [c for c in df3.columns if c.endswith("_energy")][-1]
    df3_filt = (
        df3.sort_values(["ligand_name", "rpos", last_energy])
           .groupby(["ligand_name", "rpos"])
           .head(1)
    )
    
    if not DFT:
        if output_parquet:
            df3_filt.to_parquet(output_parquet)            
        return df3_filt

    # ↓↓↓↓↓↓↓↓ This code only executes if DFT is True ↓↓↓↓↓↓↓↓
    options = {
        "wB97X-D3": None,
        "6-31+G**": None,
        "TightSCF": None,
        "SP": None,
        "NoSym": None,
    }
    
    df4 = step.orca(df3, name="DFT-pre-SP", options=options, save_step=False)
    
    if ts_type == "INT3":
        opt = "Opt"
    else:
        opt = "OptTS"

    detailed_inp = """%geom\nCalc_Hess true\nend"""
    options = {
        "wB97X-D3" : None,
        "6-31G**"  : None,
        "TightSCF" : None,
        "SlowConv" : None,
        opt        : None,
        "Freq"     : None,
        "NoSym"    : None,
    }

    df5 = step.orca(df4, name="DFT", options=options, xtra_inp_str=detailed_inp, save_step=True, lowest=1)

    detailed_inp = """%CPCM\nSMD TRUE\nSMDSOLVENT "chloroform"\nend"""
    options = {
        "wB97X-D3": None,
        "6-31+G**": None,
        "TightSCF": None,
        "SP"      : None,
        "NoSym"   : None,
    }

    df6 = step.orca(df5, name="DFT-SP", options=options, xtra_inp_str=detailed_inp, save_step=True)
    
    if output_parquet:
        df6.to_parquet(output_parquet)
    return df6

In [None]:
run_ts_per_lig(
    ligand_smiles_list,
    ts_guess_xyz,
    n_confs=n_confs,
    debug=False,
    top_n=1,
    out_dir="noob",
    DFT=True,
)

Embedded 1 conformers on atom 44
Embedded 1 conformers on atom 45
2025-07-18 09:51:35 INFO  frust.stepper: Working dir: .
2025-07-18 09:51:35 INFO  frust.stepper: [xtb-gfnff-opt] row 0 (TS2(1-methylpyrrole_rpos(2)))…
2025-07-18 09:51:35 INFO  frust.stepper: [xtb-gfnff-opt] row 1 (TS2(1-methylpyrrole_rpos(3)))…
2025-07-18 09:51:36 INFO  frust.stepper: [xtb-gfn] row 0 (TS2(1-methylpyrrole_rpos(2)))…
2025-07-18 09:51:36 INFO  frust.stepper: [xtb-gfn] row 1 (TS2(1-methylpyrrole_rpos(3)))…
2025-07-18 09:51:36 INFO  frust.stepper: [xtb-gfn-opt] row 0 (TS2(1-methylpyrrole_rpos(2)))…
2025-07-18 09:51:40 INFO  frust.stepper: [xtb-gfn-opt] row 1 (TS2(1-methylpyrrole_rpos(3)))…
2025-07-18 09:51:45 INFO  frust.stepper: [DFT-pre-SP-wB97X-D3-6-31+G**-NoSym] row 0 (TS2(1-methylpyrrole_rpos(2)))…


: 