In [1]:
import submitit
import inspect
import os
import pandas as pd

from frust.stepper import Stepper
import frust.vis as vis
from pathlib import Path
from tooltoad.chemutils import xyz2mol

from rdkit import Chem

In [2]:
def _nuse():
    try:
        if "SLURM_JOB_ID" in os.environ:
            from nuse import start_monitoring
            start_monitoring(filter_cgroup=True)
    except ImportError:
        pass

In [16]:
COMMON_DIR = "run_ts1_test"
COMMON_DIR = Path(COMMON_DIR)
COMMON_DIR.mkdir(exist_ok=True)

f = Path("../structures/phenol_test/phenol.xyz")
file_name = f.stem
mols = {}
with open(f, "r") as file:
    xyz_block = file.read()
    mol = xyz2mol(xyz_block)
    mols[f.stem] = (mol, [0])

smile = Chem.MolToSmiles(mol)

step = Stepper(smile, step_type="mol", save_output_dir=False)
df = step.build_initial_df(mols)
df.loc[0, "smiles"] = smile
df.to_parquet(f"{COMMON_DIR}/init.parquet")

vis.MolTo3DGrid(f)

2025-11-07 10:23:26 INFO  frust.stepper: Working dir: .


In [21]:
# optional embedding
mols = {file_name: mol}
from frust.embedder import embed_mols
embedded = embed_mols(mols, n_confs=None, n_cores=10)
df = step.build_initial_df(embedded)
df

Unnamed: 0,custom_name,ligand_name,rpos,constraint_atoms,cid,smiles,atoms,coords_embedded,energy_uff
0,phenol,phenol,,,0,,"[C, C, O, H, H, C, C, H, C, C, H, H, H]","[(-0.8969549834011031, -0.5479744830140846, -1...",
1,phenol,phenol,,,1,,"[C, C, O, H, H, C, C, H, C, C, H, H, H]","[(-0.720112342197233, -1.3121935216068947, -0....",
2,phenol,phenol,,,2,,"[C, C, O, H, H, C, C, H, C, C, H, H, H]","[(0.8647773641906533, -1.2439324151139615, -0....",
3,phenol,phenol,,,3,,"[C, C, O, H, H, C, C, H, C, C, H, H, H]","[(1.7151276350869658, -0.16475482298904387, -0...",
4,phenol,phenol,,,4,,"[C, C, O, H, H, C, C, H, C, C, H, H, H]","[(0.10645602074829888, 1.4399270085440563, -0....",
5,phenol,phenol,,,5,,"[C, C, O, H, H, C, C, H, C, C, H, H, H]","[(-1.2081015353877338, 1.0880869471521857, -0....",
6,phenol,phenol,,,6,,"[C, C, O, H, H, C, C, H, C, C, H, H, H]","[(1.6011307251060294, -0.16187447640738897, 0....",
7,phenol,phenol,,,7,,"[C, C, O, H, H, C, C, H, C, C, H, H, H]","[(1.5321769825047598, 0.0802863230988054, -0.0...",
8,phenol,phenol,,,8,,"[C, C, O, H, H, C, C, H, C, C, H, H, H]","[(1.2578494850855038, -1.0324466630632163, 0.2...",
9,phenol,phenol,,,9,,"[C, C, O, H, H, C, C, H, C, C, H, H, H]","[(-0.6788811123182622, 1.0406202705045944, 0.3...",


In [22]:
def run_hess(
        parquet_path: str,
        n_cores: int = 1,
        mem_gb: int = 1,
        save_dir: str | None = None,
        work_dir: str | None = None):
        
    _nuse()
    
    df = pd.read_parquet(f"{save_dir}/{parquet_path}")

    ligand_smiles = list(dict.fromkeys(df["smiles"].tolist()))
    step = Stepper(
        ligand_smiles,
        n_cores=n_cores,
        memory_gb=mem_gb,
        output_base=save_dir,
        save_calc_dirs=False,
        save_output_dir=True,
        work_dir=work_dir,
    )

    df = step.orca(df, name="hess", options={
        "wB97X-D3"  : None,
        "6-31G**"   : None,
        "Freq"     : None,
    }, read_files=["input.hess"])

    stem = parquet_path.rsplit('.', 1)[0]
    fn_name = inspect.currentframe().f_code.co_name
    parquet_name = fn_name.split("_")[1]
    out_parquet = stem + f".{parquet_name}.parquet"
    df.to_parquet(f"{save_dir}/{out_parquet}")


#run_hess("init.parquet", save_dir=common_dir)

In [23]:
def run_optts(
        parquet_path: str,
        n_cores: int = 1,
        mem_gb: int = 1,
        save_dir: str | None = None,
        work_dir: str | None = None):

    _nuse()

    df = pd.read_parquet(f"{save_dir}/{parquet_path}")

    ligand_smiles = list(dict.fromkeys(df["smiles"].tolist()))
    step = Stepper(
        ligand_smiles,
        n_cores=n_cores,
        memory_gb=mem_gb,
        output_base=save_dir,
        save_calc_dirs=False,
        save_output_dir=True,
        work_dir=work_dir,
    )

    df = step.orca(df, name="OptTS", options={
        "wB97X-D3"  : None,
        "6-31G**"   : None,
        "TightSCF"  : None,
        "SlowConv"  : None,
        "OptTS"     : None,
        "NoSym"     : None,
    }, use_last_hess=True)

    stem = parquet_path.rsplit('.', 1)[0]
    fn_name = inspect.currentframe().f_code.co_name
    parquet_name = fn_name.split("_")[1]
    out_parquet = stem + f".{parquet_name}.parquet"
    df.to_parquet(f"{save_dir}/{out_parquet}")

#run_optts("init.hess.parquet", save_dir=common_dir)

In [24]:
def run_freq(
        parquet_path: str,
        n_cores: int = 1,
        mem_gb: int = 1,
        save_dir: str | None = None,
        work_dir: str | None = None):
    
    _nuse()

    df = pd.read_parquet(f"{save_dir}/{parquet_path}")

    ligand_smiles = list(dict.fromkeys(df["smiles"].tolist()))
    step = Stepper(
        ligand_smiles,
        n_cores=n_cores,
        memory_gb=mem_gb,
        output_base=save_dir,
        save_calc_dirs=False,
        save_output_dir=True,
        work_dir=work_dir,
    )

    df = step.orca(df, name="freq", options={
        "wB97X-D3"  : None,
        "6-31G**"   : None,
        "Freq"     : None,
    })

    stem = parquet_path.rsplit('.', 1)[0]
    fn_name = inspect.currentframe().f_code.co_name
    parquet_name = fn_name.split("_")[1]
    out_parquet = stem + f".{parquet_name}.parquet"
    df.to_parquet(f"{save_dir}/{out_parquet}")

#run_freq("init.hess.optts.parquet", save_dir=common_dir)

In [25]:
def run_cleanup(save_dir):
    print("[INFO]: Cleanup initiated...")

    """Deletes residual .parquet files"""
    parquet_files = list(Path(save_dir).glob("*.parquet"))
    if not parquet_files:
        print("No parquet files found.")
        return

    if len(parquet_files) < 2:
        print("Passing parquet cleanup, only 1 parquat file found.")
        return
    
    parquet_len = {}
    for f in parquet_files:
        c = len(f.name.split('.'))
        parquet_len[f] = c
        
    parquet_len_sorted = dict(sorted(parquet_len.items(), key=lambda x: x[1], reverse=False))
    dont_delete = max(parquet_len_sorted.values())

    for f, c in parquet_len_sorted.items():
        if c < dont_delete:
            print(f"[INFO]: Removing residual parquet file {f}")
            Path(f).unlink()

    print("[INFO]: Cleanup done!")

#run_cleanup(common_dir)

In [None]:
RES = {
    "run_hess":   dict(cpus=8, mem=62, time=7200, pre_step="init",),
    "run_optts":   dict(cpus=24, mem=62, time=7200, pre_step="init.hess",),
    "run_freq":  dict(cpus=8, mem=62, time=7200, pre_step="init.hess.optts",),
    "run_cleanup":  dict(cpus=1, mem=1, time=100, pre_step=None),
}

executor = submitit.AutoExecutor(f"logs/{COMMON_DIR}")

last_job, jobs = None, []
for i, j in RES.items():

    fn = globals()[i]

    if last_job:
        extra = {"dependency": f"afterok:{last_job.job_id}",
                 "exclude": "node236,node237,node238,node239"}
    else:
        extra = {}

    executor.update_parameters(
        slurm_job_name=i,
        slurm_partition = "kemi1",
        cpus_per_task=j.get('cpus'),
        mem_gb=j.get('mem'),
        timeout_min=j.get('time'),
        slurm_additional_parameters=extra
    )

    if i != 'run_cleanup':
        job = executor.submit(fn,
                              f"{j.get('pre_step')}.parquet",
                              j.get('cpus'),
                              j.get('mem'),
                              COMMON_DIR,
                              None # work dir
                              )
        last_job = job
    else:
        job = executor.submit(fn, COMMON_DIR) # clean up

    print(f"Submitted: {job.job_id}")

Submitted: 55031833
Submitted: 55031835
Submitted: 55031836
Submitted: 55031837
