In [28]:
import submitit
import inspect
import os
import pandas as pd

from frust.stepper import Stepper
from frust.embedder import embed_mols
import frust.vis as vis
from pathlib import Path
from tooltoad.chemutils import xyz2mol

from rdkit import Chem

In [29]:
def run_mols(
    mols,
    n_confs: int = 5,
    n_cores: int = 4,
    mem_gb: int = 20,    
    debug: bool = False,
    top_n: int = 5,
    out_dir: str | None = None,
    output_parquet: str | None = None,
    save_output_dir: bool = True,
    DFT: bool = False,
    select_mols: str | list[str] = "all",  # "all", "uniques", "generics", or specific names
):
    # 1) build generic-cycle molecules (with optional selection)

    # 2) embed
    embedded = embed_mols(mols, n_confs=n_confs, n_cores=n_cores)

    # 3) xTB cascade
    step = Stepper(
        "smiles",
        n_cores=n_cores,
        memory_gb=mem_gb,
        debug=debug,
        output_base=out_dir,
        save_output_dir=save_output_dir,
        save_calc_dirs=False,
    )
    df = step.build_initial_df(embedded)
    df = step.xtb(df, options={"gfnff": None, "opt": None})
    df = step.xtb(df, options={"gfn": 2})
    df = step.xtb(df, options={"gfn": 2, "opt": None}, lowest=top_n)

    functional      = "wB97X-D3" # wB97X-D3, wB97M-V
    basisset        = "6-31G**" # 6-31G**, def2-TZVPD
    basisset_solv   = "6-31+G**" # 6-31+G**, def2-TZVPD
    freq            = "Freq" # Freq, NumFreq

    df = step.orca(df, name="DFT-pre-SP", options={
        functional  : None,
        basisset    : None,
        "TightSCF"  : None,
        "SP"        : None,
        "NoSym"     : None,
    }, lowest=1)

    # 4) if no DFT requested, save/return
    if not DFT:

        if output_parquet:
            df.to_parquet(output_parquet)
        return df

    # ↓↓↓↓↓↓↓↓ DFT branch ↓↓↓↓↓↓↓↓
    df = step.orca(df, "DFT-Opt", options={
        functional  : None,
        basisset    : None,
        "TightSCF"  : None,
        "SlowConv"  : None,
        "Opt"       : None,
        freq        : None,
        "NoSym"     : None,
    }, lowest=1)

    df = step.orca(df, options={
        functional      : None,
        basisset_solv   : None,
        "TightSCF"      : None,
        "SP"            : None,
        "NoSym"         : None,
    }, xtra_inp_str="""%CPCM\nSMD TRUE\nSMDSOLVENT "chloroform"\nend""")

    if output_parquet:
        df.to_parquet(output_parquet)
    return df

In [30]:
f = Path("../structures/phenol_test/int2.xyz")
file_name = f.stem
mols = {}
with open(f, "r") as file:
    xyz_block = file.read()
    mol = xyz2mol(xyz_block)
    N = mol.GetAtomWithIdx(9)
    N.SetFormalCharge(+1)
    B = mol.GetAtomWithIdx(16)
    B.SetFormalCharge(-1)
    mols[f.stem] = mol

smile = Chem.MolToSmiles(mol)

vis.MolTo3DGrid(mols.get(f.stem), show_charges=True)

In [31]:
df = run_mols(mols, n_confs=None, n_cores=10, mem_gb=30, DFT=True, output_parquet="int2.parquet")

2025-11-09 21:52:18 INFO  frust.stepper: Working dir: .


2025-11-09 21:52:23 INFO  frust.stepper: [xtb-gfnff-opt] row 0 (int2)…
2025-11-09 21:52:24 INFO  frust.stepper: [xtb-gfnff-opt] row 1 (int2)…
2025-11-09 21:52:24 INFO  frust.stepper: [xtb-gfnff-opt] row 2 (int2)…
2025-11-09 21:52:24 INFO  frust.stepper: [xtb-gfnff-opt] row 3 (int2)…
2025-11-09 21:52:24 INFO  frust.stepper: [xtb-gfnff-opt] row 4 (int2)…
2025-11-09 21:52:24 INFO  frust.stepper: [xtb-gfnff-opt] row 5 (int2)…
2025-11-09 21:52:25 INFO  frust.stepper: [xtb-gfnff-opt] row 6 (int2)…
2025-11-09 21:52:25 INFO  frust.stepper: [xtb-gfnff-opt] row 7 (int2)…
2025-11-09 21:52:25 INFO  frust.stepper: [xtb-gfnff-opt] row 8 (int2)…
2025-11-09 21:52:25 INFO  frust.stepper: [xtb-gfnff-opt] row 9 (int2)…
2025-11-09 21:52:25 INFO  frust.stepper: [xtb-gfnff-opt] row 10 (int2)…
2025-11-09 21:52:25 INFO  frust.stepper: [xtb-gfnff-opt] row 11 (int2)…
2025-11-09 21:52:26 INFO  frust.stepper: [xtb-gfnff-opt] row 12 (int2)…
2025-11-09 21:52

In [32]:
f = Path("../structures/phenol_test/mol2.xyz")
file_name = f.stem
mols = {}
with open(f, "r") as file:
    xyz_block = file.read()
    mol = xyz2mol(xyz_block)
    mols[f.stem] = mol

smile = Chem.MolToSmiles(mol)

vis.MolTo3DGrid(f)

In [33]:
df = run_mols(mols, n_confs=None, n_cores=10, mem_gb=30, DFT=True, output_parquet="mol2.parquet")

2025-11-09 22:23:10 INFO  frust.stepper: Working dir: .


2025-11-09 22:23:15 INFO  frust.stepper: [xtb-gfnff-opt] row 0 (mol2)…
2025-11-09 22:23:16 INFO  frust.stepper: [xtb-gfnff-opt] row 1 (mol2)…
2025-11-09 22:23:16 INFO  frust.stepper: [xtb-gfnff-opt] row 2 (mol2)…
2025-11-09 22:23:16 INFO  frust.stepper: [xtb-gfnff-opt] row 3 (mol2)…
2025-11-09 22:23:16 INFO  frust.stepper: [xtb-gfnff-opt] row 4 (mol2)…
2025-11-09 22:23:17 INFO  frust.stepper: [xtb-gfnff-opt] row 5 (mol2)…
2025-11-09 22:23:17 INFO  frust.stepper: [xtb-gfnff-opt] row 6 (mol2)…
2025-11-09 22:23:17 INFO  frust.stepper: [xtb-gfnff-opt] row 7 (mol2)…
2025-11-09 22:23:17 INFO  frust.stepper: [xtb-gfnff-opt] row 8 (mol2)…
2025-11-09 22:23:17 INFO  frust.stepper: [xtb-gfnff-opt] row 9 (mol2)…
2025-11-09 22:23:18 INFO  frust.stepper: [xtb-gfnff-opt] row 10 (mol2)…
2025-11-09 22:23:18 INFO  frust.stepper: [xtb-gfnff-opt] row 11 (mol2)…
2025-11-09 22:23:18 INFO  frust.stepper: [xtb-gfnff-opt] row 12 (mol2)…
2025-11-09 22:23