In [1]:
# frust/pipes/run_ts_per_rpos.py
from pathlib import Path
from frust.stepper import Stepper
from frust.embedder import embed_ts, embed_mols
from frust.transformers import transformer_mols
from rdkit.Chem.rdchem import Mol
import os
import pandas as pd

# ─── SHARED SETTINGS (inherit across steps) ─────────────────────────────
FUNCTIONAL = "PBE" # "wB97X-D3"
BASISSET = "def2-SVP" # "6-31G**"
BASISSET_SOLV = "6-31+G**"  # for solvent SP

name = "run_ts_per_rpos"

In [2]:
from frust.utils.mols import create_ts_per_rpos

job_inputs = create_ts_per_rpos(["CN1C=CC=C1"], "../structures/ts1.xyz")
job_inputs = job_inputs[0]

In [3]:
# try:
#     if "SLURM_JOB_ID" in os.environ:
#         from nuse import start_monitoring
#         start_monitoring(filter_cgroup=True)
# except ImportError:
#     pass

def _best_rows(df):
    last_energy = [c for c in df.columns if c.endswith("_energy")][-1]
    return (df.sort_values(["ligand_name", "rpos", last_energy])
              .groupby(["ligand_name", "rpos"]).head(1))


def run_init(
    ts_struct: dict[str, tuple[Mol, list, str]],
    *,
    n_confs: int | None = None,
    n_cores: int = 4,
    mem_gb: int = 20,
    debug: bool = False,
    top_n: int = 10,
    out_dir: str | None = None,
    work_dir: str | None = None,
    output_parquet: str | None = None,
    save_output_dir: bool = True,
):
    import re
    pattern = re.compile(
    r'^(?:(?P<prefix>(?:TS|INT)\d*|Mols)\()?'
    r'(?P<ligand>.+?)_rpos\('        
    r'(?P<rpos>\d+)\)\)?$'           
    )

    name = list(ts_struct.keys())[0]
    m = pattern.match(name)
    ts_type = m.group("prefix")
    
    embedded = embed_ts(ts_struct, ts_type=ts_type, n_confs=n_confs, optimize=not debug)

    ligand_smiles = list(ts_struct.values())[0][2]

    step = Stepper(
    ligand_smiles,
    n_cores=n_cores,
    memory_gb=mem_gb,
    debug=debug,
    output_base=out_dir,
    save_calc_dirs=False,
    save_output_dir=save_output_dir,
    work_dir=work_dir,
    )
    
    df = step.build_initial_df(embedded)
    df = step.xtb(df, options={"gfnff": None, "opt": None}, constraint=True)
    # df = step.xtb(df, options={"gfn": 2})
    # df = step.xtb(df, options={"gfn": 2, "opt": None}, constraint=True, lowest=top_n)

    # df = step.orca(df, name="DFT-pre-SP", options={
    #     FUNCTIONAL  : None,
    #     BASISSET    : None,
    #     "TightSCF"  : None,
    #     "SP"        : None,
    #     "NoSym"     : None,
    # })

    last_energy = [c for c in df.columns if c.endswith("_energy")][-1]
    df = (df.sort_values(["ligand_name", "rpos", last_energy]
                        ).groupby(["ligand_name", "rpos"]).head(1))
    
    if output_parquet:
        df.to_parquet(output_parquet)
    
    return df


def run_hess(
    parquet_path: str,
    *,
    n_cores: int = 2,
    mem_gb: int = 32,
    debug: bool = False,
    out_dir: str | None = None,
    work_dir: str | None = None,
):
    """Compute a (numerical) Hessian to seed OptTS. Low RAM vs analytic Freq."""

    df = pd.read_parquet(parquet_path)
    if df.empty:
        return df

    df = _best_rows(df)

    ligand_smiles = list(dict.fromkeys(df["smiles"].tolist()))
    step = Stepper(
        ligand_smiles,
        n_cores=n_cores,
        memory_gb=mem_gb,
        debug=debug,
        output_base=out_dir,
        save_calc_dirs=False,
        save_output_dir=True,
        work_dir=work_dir,
    )

    df = step.orca(df, name="Hess", options={
        FUNCTIONAL: None,
        BASISSET: None,
        "TightSCF": None,
        "Freq": None,
        "NoSym": None,
    }, save_files=["orca.out", "input.hess"])

    stem = os.path.splitext(parquet_path)[0]
    out_parquet = stem + ".hess.parquet"
    df.to_parquet(out_parquet)

    return df


def run_OptTS():
    pass

def run_freq():
    pass

def run_solv_SP():
    pass

In [4]:
run_init(job_inputs, n_confs=1, out_dir="run_init", output_parquet=f"run_init/{name}.init.parquet")
run_hess(f"run_init/{name}.init.parquet", out_dir="run_hess", n_cores=10,)

Embedded 1 conformers on atom 44
2025-11-03 18:38:21 INFO  frust.stepper: Working dir: .
2025-11-03 18:38:21 INFO  frust.stepper: [xtb-gfnff-opt] row 0 (TS1(1-methylpyrrole_rpos(2)))…
2025-11-03 18:38:22 INFO  frust.stepper: Working dir: .
2025-11-03 18:38:22 INFO  frust.stepper: [Hess-PBE-def2-SVP-Freq] row 0 (TS1(1-methylpyrrole_rpos(2)))…


Unnamed: 0,custom_name,ligand_name,rpos,constraint_atoms,cid,smiles,atoms,coords_embedded,energy_uff,xtb-gfnff-opt-electronic_energy,xtb-gfnff-opt-normal_termination,xtb-gfnff-opt-opt_coords,Hess-PBE-def2-SVP-Freq-electronic_energy,Hess-PBE-def2-SVP-Freq-gibbs_energy,Hess-PBE-def2-SVP-Freq-normal_termination,Hess-PBE-def2-SVP-Freq-vibs
0,TS1(1-methylpyrrole_rpos(2)),1-methylpyrrole,2,"[10, 11, 39, 40, 41, 44]",0,CN1C=CC=C1,"[C, C, C, C, C, C, H, H, H, H, B, N, C, C, C, ...","[[0.6059933236389633, -1.5466997319319582, -0....",4005.972371,-8.965172,True,"[[0.45309365533354, -1.49203036271959, -0.1251...",-913.181551,-912.763692,True,"[{'frequency': -366.06, 'mode': [[-0.000682, -..."
