In [None]:
# frust/pipes/run_ts_per_rpos.py
from pathlib import Path
from frust.stepper import Stepper
from frust.embedder import embed_ts, embed_mols
from frust.transformers import transformer_mols
from rdkit.Chem.rdchem import Mol
import os
import inspect
import pandas as pd

# ─── SHARED SETTINGS (inherit across steps) ─────────────────────────────
FUNCTIONAL = "PBE" # "wB97X-D3"
BASISSET = "def2-SVP" # "6-31G**"
BASISSET_SOLV = "6-31+G**"  # for solvent SP

name = "run_ts_per_rpos"

In [105]:
from frust.utils.mols import create_ts_per_rpos

job_inputs = create_ts_per_rpos(["CN1C=CC=C1"], "../structures/ts1.xyz")
job_inputs = job_inputs[0]

In [112]:
# try:
#     if "SLURM_JOB_ID" in os.environ:
#         from nuse import start_monitoring
#         start_monitoring(filter_cgroup=True)
# except ImportError:
#     pass

def _best_rows(df):
    last_energy = [c for c in df.columns if c.endswith("_energy")][-1]
    return (df.sort_values(["ligand_name", "rpos", last_energy])
              .groupby(["ligand_name", "rpos"]).head(1))


def run_init(
    ts_struct: dict[str, tuple[Mol, list, str]],
    *,
    n_confs: int | None = None,
    n_cores: int = 4,
    mem_gb: int = 20,
    debug: bool = False,
    top_n: int = 10,
    out_dir: str | None = None,
    work_dir: str | None = None,
    save_output_dir: bool = True,
):
    import re
    pattern = re.compile(
    r'^(?:(?P<prefix>(?:TS|INT)\d*|Mols)\()?'
    r'(?P<ligand>.+?)_rpos\('        
    r'(?P<rpos>\d+)\)\)?$'           
    )

    name = list(ts_struct.keys())[0]
    m = pattern.match(name)
    ts_type = m.group("prefix")
    
    embedded = embed_ts(ts_struct, ts_type=ts_type, n_confs=n_confs, optimize=not debug)

    ligand_smiles = list(ts_struct.values())[0][2]

    step = Stepper(
    ligand_smiles,
    n_cores=n_cores,
    memory_gb=mem_gb,
    debug=debug,
    output_base=out_dir,
    save_calc_dirs=False,
    save_output_dir=save_output_dir,
    work_dir=work_dir,
    )
    
    df = step.build_initial_df(embedded)
    df = step.xtb(df, options={"gfnff": None, "opt": None}, constraint=True, save_step=True)
    # df = step.xtb(df, options={"gfn": 2})
    # df = step.xtb(df, options={"gfn": 2, "opt": None}, constraint=True, lowest=top_n)

    # df = step.orca(df, name="DFT-pre-SP", options={
    #     FUNCTIONAL  : None,
    #     BASISSET    : None,
    #     "TightSCF"  : None,
    #     "SP"        : None,
    #     "NoSym"     : None,
    # })

    last_energy = [c for c in df.columns if c.endswith("_energy")][-1]
    df = (df.sort_values(["ligand_name", "rpos", last_energy]
                        ).groupby(["ligand_name", "rpos"]).head(1))
    
    fn_name = inspect.currentframe().f_code.co_name
    parquet_name = fn_name.split("_")[1]
    df.to_parquet(f"{out_dir}/{parquet_name}.parquet")
    
    return df


def run_hess(
    parquet_path: str,
    *,
    n_cores: int = 2,
    mem_gb: int = 32,
    debug: bool = False,
    out_dir: str | None = None,
    work_dir: str | None = None,
):
    
    df = pd.read_parquet(f"{name}/{parquet_path}")
    if df.empty:
        return df

    df = _best_rows(df)

    ligand_smiles = list(dict.fromkeys(df["smiles"].tolist()))
    step = Stepper(
        ligand_smiles,
        n_cores=n_cores,
        memory_gb=mem_gb,
        debug=debug,
        output_base=out_dir,
        save_calc_dirs=False,
        save_output_dir=True,
        work_dir=work_dir,
    )

    df = step.orca(df, name="Hess", options={
        "XTB2": None,
        #BASISSET: None,
        #"TightSCF": None,
        "Freq": None,
        "NoSym": None,
    }, read_files=["input.hess"])

    stem = parquet_path.rsplit('.', 1)[0]
    out_parquet = stem + ".hess.parquet"
    df.to_parquet(f"{name}/{out_parquet}")

    return df


def run_OptTS(
    parquet_path: str,
    *,
    n_cores: int = 8,
    mem_gb: int = 40,
    debug: bool = False,
    out_dir: str | None = None,
    work_dir: str | None = None,
):
    df = pd.read_parquet(f"{name}/{parquet_path}")

    ligand_smiles = list(dict.fromkeys(df["smiles"].tolist()))
    step = Stepper(
        ligand_smiles,
        n_cores=n_cores,
        memory_gb=mem_gb,
        debug=debug,
        output_base=out_dir,
        save_calc_dirs=True,
        save_output_dir=True,
        work_dir=work_dir,
    )

    # Read previously computed Hessian (*.hess from ORCA)
    df = step.orca(df, name="DFT-OptTS", options={
        "XTB2": None,
        #BASISSET: None,
        "TightSCF": None,
        #"SlowConv": None,
        "OptTS": None,
        #"NoSym": None,
    }, use_last_hess=True)

    stem = parquet_path.rsplit('.', 1)[0]
    out_parquet = stem + ".optts.parquet"
    df.to_parquet(f"{name}/{out_parquet}")

    return df


def run_freq(
    parquet_path: str,
    *,
    n_cores: int = 8,
    mem_gb: int = 40,
    debug: bool = False,
    out_dir: str | None = None,
    work_dir: str | None = None,        
):
    df = pd.read_parquet(f"{name}/{parquet_path}")

    ligand_smiles = list(dict.fromkeys(df["smiles"].tolist()))
    step = Stepper(
        ligand_smiles,
        n_cores=n_cores,
        memory_gb=mem_gb,
        debug=debug,
        output_base=out_dir,
        save_calc_dirs=True,
        save_output_dir=True,
        work_dir=work_dir,
    )

    df = step.orca(df, name="DFT-OptTS", options={
        "XTB2": None,
        #BASISSET: None,
        "TightSCF": None,
        #"SlowConv": None,
        "Freq": None,
        #"NoSym": None,
    })

    stem = parquet_path.rsplit('.', 1)[0]
    out_parquet = stem + ".freq.parquet"
    df.to_parquet(f"{name}/{out_parquet}")

    return df


def run_solv(
    parquet_path: str,
    *,
    n_cores: int = 8,
    mem_gb: int = 40,
    debug: bool = False,
    out_dir: str | None = None,
    work_dir: str | None = None,        
):
    df = pd.read_parquet(f"{name}/{parquet_path}")

    ligand_smiles = list(dict.fromkeys(df["smiles"].tolist()))
    step = Stepper(
        ligand_smiles,
        n_cores=n_cores,
        memory_gb=mem_gb,
        debug=debug,
        output_base=out_dir,
        save_calc_dirs=True,
        save_output_dir=True,
        work_dir=work_dir,
    )

    df = step.orca(df, name="DFT-solv", options={
        "XTB2": None,
        #BASISSET: None,
        "TightSCF": None,
        #"SlowConv": None,
        "SP": None,
        #"NoSym": None,
    })

    stem = parquet_path.rsplit('.', 1)[0]
    out_parquet = stem + ".solv.parquet"
    df.to_parquet(f"{name}/{out_parquet}")

    return df

def run_cleanup(out_dir):
    print("[INFO]: Cleanup initiated...")

    """Deletes residual .parquet files"""
    parquet_files = list(Path(out_dir).glob("*.parquet"))
    if not parquet_files:
        print("No parquet files found.")
        return

    if len(parquet_files) < 2:
        print("Passing parquet cleanup, only 1 parquat file found.")
        return
    
    parquet_len = {}
    for f in parquet_files:
        c = len(f.name.split('.'))
        parquet_len[f] = c
        
    parquet_len_sorted = dict(sorted(parquet_len.items(), key=lambda x: x[1], reverse=False))
    dont_delete = max(parquet_len_sorted.values())

    for f, c in parquet_len_sorted.items():
        if c < dont_delete:
            print(f"[INFO]: Removing residual parquet file {f}")
            Path(f).unlink()

    print("[INFO]: Cleanup done!")

In [111]:
_ = run_init(job_inputs, n_confs=1, out_dir=name)
_ = run_hess("init.parquet", out_dir=name, n_cores=10)
_ = run_OptTS("init.hess.parquet", out_dir=name, n_cores=10)
_ = run_freq("init.hess.optts.parquet", out_dir=name, n_cores=10)
_ = run_solv("init.hess.optts.freq.parquet", out_dir=name, n_cores=10)

Embedded 1 conformers on atom 44
2025-11-04 15:19:24 INFO  frust.stepper: Working dir: .
2025-11-04 15:19:24 INFO  frust.stepper: [xtb-gfnff-opt] row 0 (TS1(1-methylpyrrole_rpos(2)))…
2025-11-04 15:19:25 INFO  frust.stepper: Working dir: .
2025-11-04 15:19:25 INFO  frust.stepper: [Hess-XTB2-Freq-NoSym] row 0 (TS1(1-methylpyrrole_rpos(2)))…
2025-11-04 15:19:29 INFO  frust.stepper: Working dir: .
2025-11-04 15:19:29 INFO  frust.stepper: [DFT-OptTS-XTB2-TightSCF-OptTS] row 0 (TS1(1-methylpyrrole_rpos(2)))…
2025-11-04 15:19:35 INFO  frust.stepper: Working dir: .
2025-11-04 15:19:35 INFO  frust.stepper: [DFT-OptTS-XTB2-TightSCF-Freq] row 0 (TS1(1-methylpyrrole_rpos(2)))…
2025-11-04 15:19:40 INFO  frust.stepper: Working dir: .
2025-11-04 15:19:40 INFO  frust.stepper: [DFT-solv-XTB2-TightSCF] row 0 (TS1(1-methylpyrrole_rpos(2)))…


In [113]:
run_cleanup(out_dir=name)

[INFO]: Cleanup initiated...
[INFO]: Removing residual parquet file run_ts_per_rpos/init.parquet
[INFO]: Removing residual parquet file run_ts_per_rpos/init.hess.parquet
[INFO]: Removing residual parquet file run_ts_per_rpos/init.hess.optts.parquet
[INFO]: Removing residual parquet file run_ts_per_rpos/init.hess.optts.freq.parquet
[INFO]: Cleanup done!
