In [1]:
import submitit
from frust.stepper import Stepper
import frust.vis as vis
from pathlib import Path
from tooltoad.chemutils import xyz2mol, xyz2ac
import pandas as pd

In [2]:
f = Path("../structures/misc/HH.xyz")
mols = {}
with open(f, "r") as file:
    xyz_block = file.read()
    mol = xyz2mol(xyz_block)
    mols[f.stem] = (mol, [0])
    atoms, coords = xyz2ac(xyz_block)

step = Stepper(list(mols.keys()), step_type="mol", save_output_dir=False)
df0 = step.build_initial_df(mols)
#vis.MolTo3DGrid(f)



2025-11-05 11:11:40 INFO  frust.stepper: Working dir: .


In [3]:
df0

Unnamed: 0,custom_name,ligand_name,rpos,constraint_atoms,cid,smiles,atoms,coords_embedded,energy_uff
0,HH,HH,,,0,,"[H, H]","[(-3.9213066101, 2.4630429745, 0.0), (-4.44494...",


In [4]:
def run_orca_calc(
    df,
    results_dir,
    debug=False,
    n_cores=1,
    mem_gb=5,
):
    from nuse import start_monitoring
    start_monitoring(filter_cgroup=True)
    
    import os
    print(os.cpu_count())
    
    name = df["custom_name"].iloc[0]

    results_dir = Path(results_dir)
    results_dir.mkdir(exist_ok=True)

    step = Stepper([name],
                step_type="none",
                debug=debug,
                save_output_dir=True,
                live=True,
                output_base=str(results_dir),
                n_cores=n_cores,
                memory_gb=mem_gb)

    df = step.orca(df, "DFT", {
        "XTB2"  : None,
        "SP"    : None,
    })

    df.to_parquet(f"{results_dir}/results_{name}.parquet")

# import signal

# class TimeoutError_(RuntimeError):
#     pass

# def _timeout_handler(signum, frame):
#     # Raise a Python exception (not os._exit) so your `finally` runs
#     raise TimeoutError_("Forced 30s timeout for testing")

# def run_orca_calc(
#     df,
#     results_dir,
#     debug=False,
#     n_cores=1,
#     mem_gb=5,
# ):
#     from nuse import start_monitoring
#     start_monitoring(filter_cgroup=True)

#     import os
#     from pathlib import Path
#     print(os.cpu_count())

#     name = df["custom_name"].iloc[0]

#     results_dir = Path(results_dir)
#     results_dir.mkdir(exist_ok=True)

#     step = Stepper(
#         [name],
#         step_type="none",
#         debug=debug,
#         save_output_dir=True,
#         live=True,
#         output_base=str(results_dir),
#         n_cores=n_cores,
#         memory_gb=mem_gb,
#     )

#     # Arm a 30s alarm; it fires while ORCA is running
#     signal.signal(signal.SIGALRM, _timeout_handler)
#     signal.alarm(30)
#     try:
#         df = step.orca(df, "DFT", {"HF": None, "Opt": None}, save_step=True)
#     finally:
#         # Always disarm the alarm afterwards
#         signal.alarm(0)

#     df.to_parquet(f"{results_dir}/results_{name}.parquet")

In [7]:
run_orca_calc(df0, "orca_local")

16
Attempt 1/5: Encountered error: 'PUGREST.BadRequest: error: '
2025-11-05 11:14:54 INFO  frust.stepper: Working dir: .
2025-11-05 11:14:54 INFO  frust.stepper: [DFT-XTB2-SP] row 0 (HH)â€¦


In [None]:
DEBUG           = False
N_CORES         = 1
MEM_GB          = 1
TIMEOUT_MIN     = 1440 
RESULTS_DIR     = "test"

executor = submitit.AutoExecutor(f"logs/{RESULTS_DIR}")
executor.update_parameters(
    slurm_partition="kemi1",
    cpus_per_task=N_CORES,
    mem_gb=MEM_GB,
    timeout_min=TIMEOUT_MIN,
    slurm_additional_parameters={
        "hint": "nomultithread", # disables multithreading
        "nodelist": "node236, node237, node238, node239",
    }
)

name = df0["custom_name"].iloc[0]
executor.update_parameters(slurm_job_name=name)
executor.submit(run_orca_calc, df0, RESULTS_DIR, DEBUG, N_CORES, MEM_GB)
print(f"Submitted: {name}")

Submitted: HH


In [None]:
from tooltoad.orca import orca_calculate

def tt_orca(atoms, coords):
    res = orca_calculate(atoms, coords, options={"XTB2": None})
    
    with open("tt_res", "w") as f:
        f.write(res)
    
executor.update_parameters(slurm_job_name="tooltoad_test", cpus_per_task=2, mem_gb=1, slurm_partition="kemi1")
executor.submit(tt_orca, atoms, coords)
print(f"Submitted: {name}")

Submitted: HH


# UMA

In [2]:
from frust.pipes import run_mols_UMA
import pandas as pd
import os

In [3]:
CSV_PATH = "../datasets/1m.csv"
TS_XYZ         = "../structures/ts1.xyz"
df       = pd.read_csv(CSV_PATH)
smi_list = list(dict.fromkeys(df["smiles"]))

In [4]:
# out_dir = "UMA_test"

# df = run_mols_UMA(
#     smi_list,
#     n_confs=1,
#     n_cores=10,
#     mem_gb=15,
#     out_dir=out_dir,
#     output_parquet=f"{out_dir}.parquet",
#     DFT=False,
#     save_output_dir=False,
#     select_mols=["HH"]
# )

In [6]:
from frust.pipes import run_mols_UMA

executor = submitit.AutoExecutor(f"logs/UMA_results")
executor.update_parameters(
    slurm_partition="kemi1",
    cpus_per_task=10,
    mem_gb=20,
    timeout_min=100,
)

name = "UMA-test"
executor.update_parameters(slurm_job_name=name)
executor.submit(
    run_mols_UMA,
    ligand_smiles_list=smi_list,
    n_confs=1,
    n_cores=10,
    mem_gb=20,
    debug=False,
    top_n=1,
    out_dir="UMA_dir",
    output_parquet="UMA_dir/UMA.parquet",
    save_output_dir=True,
    DFT=False,
    select_mols=["HH"])

SlurmJob<job_id=55004823, task_id=0, state="UNKNOWN">