In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from copy import copy

from tooltoad.vis import MolTo3DGrid
from tooltoad.chemutils import ac2mol

from rdkit import Chem
from rdkit.Chem import rdDetermineBonds, rdMolAlign
from rdkit.Chem.AllChem import ETKDGv3, EmbedMolecule
from rdkit.Chem.rdchem import RWMol

from frust.utils.mols import fix_pin_frag, get_molecule_name
from frust.embedder import embed_ts

In [3]:
def transformer_int3(
    ligand_smiles="C1=CC=CO1",
    ts_guess_struct="../structures/int3.xyz",
    bonds_to_remove = [(10, 19)],
    pre_name="INT3",
    embed_ready=True,
):

    # --- Read TS Guess Structure --- #
    try:
        with open(ts_guess_struct, 'r') as file:
            xyz_block = file.read()
    except FileNotFoundError:
        print(f"Error: Transition state structure file not found: {ts_guess_struct}")
        raise
    except PermissionError:
        print(f"Error: Permission denied when accessing file: {ts_guess_struct}")
        raise
    except IOError as e:
        print(f"Error: Failed to read transition state structure file {ts_guess_struct}: {e}")
        raise
    except Exception as e:
        print(f"Unexpected error loading transition state structure from {ts_guess_struct}: {e}")
        raise

    # --- Determine Connectivity --- #
    from rdkit import Chem
    ts = Chem.MolFromXYZBlock(xyz_block)
    rdDetermineBonds.DetermineConnectivity(ts, useVdw=True)
    ts_rw = RWMol(ts)

    bonds_to_remove = bonds_to_remove
    for bond in bonds_to_remove:
        ts_rw.RemoveBond(bond[0], bond[1])
    ts_rw_origin = copy(ts_rw)

    # --- Find ligand in guess ts structure --- #
    ts_ligand_pattern = Chem.MolFromSmarts("S1CCCC1")
    old_ring_match    = ts_rw.GetSubstructMatch(ts_ligand_pattern)  # e.g. (5,6,7,8,9)

    # --- Find unique positions and check that they are valid cH --- #
    lig_mol = Chem.MolFromSmiles(ligand_smiles)
    lig_mol = Chem.AddHs(lig_mol)

    cH_patt = Chem.MolFromSmarts('[cH]')
    matches = lig_mol.GetSubstructMatches(cH_patt)
    cH_atoms = [ind[0] for ind in matches]

    atom_rank = list(Chem.CanonicalRankAtoms(lig_mol, breakTies=False))

    def find_unique_atoms(lst):
        seen = set()
        result = []
        for i, x in enumerate(lst):
            if x not in seen:
                result.append(i)
                seen.add(x)
        return result

    unique_atoms = find_unique_atoms(atom_rank)
    unique_cH = set(unique_atoms).intersection(set(cH_atoms))
    unique_cH = tuple(unique_cH)

    # --- Create aligned maps --- #
    old_active_site = old_ring_match[0:3]

    maps = []
    for a in unique_cH:
        C_pos = lig_mol.GetAtomWithIdx(a)
        nbs = []
        for nb in C_pos.GetNeighbors():
            if nb.GetAtomicNum() == 1:
                pass # hydrogen
            else:
                nbs.append(nb.GetIdx())
        
        nbs.insert(1, C_pos.GetIdx())
        
        map = []
        for nb, aa in zip(nbs, old_active_site):
            map.append((nb, aa))
        maps.append(map)

    # --- Loop through each map a.k.a reactive position and create the molecule --- #
    params = ETKDGv3()
    params.randomSeed = 0xF00D  # Use any integer seed
    ts_mols = {}

    lig_mol_original = copy(lig_mol)

    for map in maps:
        lig_mol = lig_mol_original
        rpos = map[1][0]
        EmbedMolecule(lig_mol, params)
        ts_rw = Chem.RWMol(ts_rw_origin)
        rdMolAlign.AlignMol(lig_mol, ts_rw, atomMap=map)

        # --- remove hydrogen from the reacting carbon --- #
        chosen_carbon_idx = rpos
        chosen_carbon = lig_mol.GetAtomWithIdx(chosen_carbon_idx)

        for nb in chosen_carbon.GetNeighbors():
            if nb.GetAtomicNum() == 1:  # hydrogen
                lig_mol_rw = RWMol(lig_mol)
                lig_mol_rw.RemoveAtom(nb.GetIdx())
                lig_mol = lig_mol_rw.GetMol()
                break

        # --- Remove old ligand and determine bond order (to get aromaticity correct for the catalyst) --- #
        n_pattern_full = Chem.MolFromSmiles("C1CCCS1")
        n_old_indices = ts_rw.GetSubstructMatch(n_pattern_full)

        atoms_to_remove = set()
        for idx in n_old_indices:
            atom = ts_rw.GetAtomWithIdx(idx)
            atoms_to_remove.add(idx)
            for neighbor in atom.GetNeighbors():
                if neighbor.GetAtomicNum() == 1:  # Check if hydrogen
                    atoms_to_remove.add(neighbor.GetIdx())

        for idx in sorted(atoms_to_remove, reverse=True):
            ts_rw.RemoveAtom(idx)

        pin_pat = Chem.MolFromSmarts('[B]1OC(C(O1)(C)C)(C)C')
        B_pin_idx = ts_rw.GetSubstructMatches(pin_pat)[0][0]
        B_pin_nbs = ts_rw.GetAtomWithIdx(B_pin_idx).GetNeighbors()
        H_pin_idx = B_pin_nbs[0].GetIdx()
        
        ts_rw.RemoveBond(B_pin_idx, H_pin_idx)
        frag0, frag1 = Chem.GetMolFrags(ts_rw, asMols=True) 

        frag1 = fix_pin_frag(frag1)

        rdDetermineBonds.DetermineBonds(frag0)

        ts_rw = RWMol(Chem.CombineMols(frag0, frag1))

        pin_pat = Chem.MolFromSmarts('[B]1OC(C(O1)(C)C)(C)C')
        B_pin_idx = ts_rw.GetSubstructMatches(pin_pat)[0][0]
        B_pin_nbs = ts_rw.GetAtomWithIdx(B_pin_idx).GetNeighbors()
        H_pin_idx = B_pin_nbs[0].GetIdx()

        ts_rw.AddBond(11, 22, Chem.BondType.SINGLE)

        # --- Combine ligand and catalyst, add temporary bonds, and set temporary formal charges ---
        ts_combined = Chem.CombineMols(ts_rw, lig_mol)
        ts_rw_combined = RWMol(ts_combined)

        offset = ts_rw.GetNumAtoms()
        reactive_C = rpos + offset

        cat_pat = Chem.MolFromSmarts('[B]-c1ccccc1-[N]')
        B_cat_idx = ts_rw_combined.GetSubstructMatches(cat_pat)[0][0]
        N_cat_idx = ts_rw_combined.GetSubstructMatches(cat_pat)[0][7]
        B_nbs = ts_rw_combined.GetAtomWithIdx(B_cat_idx).GetNeighbors()
        Hs_on_B = [nb.GetIdx() for nb in B_nbs if nb.GetAtomicNum() == 1]

        ts_rw_combined.AddBond(reactive_C, B_cat_idx, Chem.BondType.SINGLE)

        pin_pat = Chem.MolFromSmarts('[B]1OC(C(O1)(C)C)(C)C')
        B_pin_idx = ts_rw_combined.GetSubstructMatches(pin_pat)[0][0]
        B_pin_nbs = ts_rw_combined.GetAtomWithIdx(B_pin_idx).GetNeighbors()
        H_pin_idx = B_pin_nbs[2].GetIdx()

        ts_rw_combined.AddBond(reactive_C, B_pin_idx, Chem.BondType.SINGLE)
        atom_indices_to_keep = [B_cat_idx, N_cat_idx]
        atom_indices_to_keep.extend([Hs_on_B[1]])
        atom_indices_to_keep.extend([B_pin_idx])
        atom_indices_to_keep.extend([H_pin_idx, reactive_C])

        ts_rw_combined.GetAtomWithIdx(reactive_C).SetFormalCharge(0)
        ts_rw_combined.GetAtomWithIdx(B_cat_idx).SetFormalCharge(-1)
        ts_rw_combined.GetAtomWithIdx(B_pin_idx).SetFormalCharge(-1)
        ts_rw_combined.GetAtomWithIdx(H_pin_idx).SetFormalCharge(1)

        het_ring = Chem.MolFromSmarts("[!#6;R]")
        hetero_atom_idx = lig_mol_original.GetSubstructMatches(het_ring)
        hetero_atom_idx = offset+hetero_atom_idx[0][0]

        ts_rw_combined.GetAtomWithIdx(hetero_atom_idx).SetFormalCharge(1)

        Chem.SanitizeMol(ts_rw_combined)

        mol_name = get_molecule_name(ligand_smiles)
        ts_mols[f'{pre_name}({mol_name}_rpos({rpos}))'] = (ts_rw_combined, atom_indices_to_keep, ligand_smiles)
    
    return ts_mols


ts_mols = transformer_int3()
#MolTo3DGrid(ts_rw_combined, cell_size=(600,600))

In [4]:
embeds = embed_ts(ts_mols, ts_type="INT3", n_confs=3, optimize=True)
embed_mols = [x[0] for x in embeds.values()]
MolTo3DGrid(embed_mols, show_charges=False)

Embedded 3 conformers on atom 43
Embedded 3 conformers on atom 44


In [5]:
from frust.stepper import Stepper

step = Stepper(["C1=CC=CO1"], step_type="INT3", save_output_dir=False)
df0 = step.build_initial_df(embeds)
df0

2025-07-17 10:16:01 INFO  frust.stepper: Working dir: .


Unnamed: 0,custom_name,ligand_name,rpos,constraint_atoms,cid,smiles,atoms,coords_embedded,energy_uff
0,INT3(furan_rpos(0)),INT3(furan,0,"[10, 12, 13, 22, 11, 43]",0,C1=CC=CO1,"[C, C, C, C, C, C, H, H, H, H, B, H, N, H, C, ...","[(-2.7761533052514054, 2.8276054292498096, -1....",296.115564
1,INT3(furan_rpos(0)),INT3(furan,0,"[10, 12, 13, 22, 11, 43]",1,C1=CC=CO1,"[C, C, C, C, C, C, H, H, H, H, B, H, N, H, C, ...","[(2.3215526287595485, -0.5415402580305316, -3....",356.257137
2,INT3(furan_rpos(0)),INT3(furan,0,"[10, 12, 13, 22, 11, 43]",2,C1=CC=CO1,"[C, C, C, C, C, C, H, H, H, H, B, H, N, H, C, ...","[(2.130345214697063, 3.4366443303582987, 0.677...",313.744075
3,INT3(furan_rpos(1)),INT3(furan,1,"[10, 12, 13, 22, 11, 44]",0,C1=CC=CO1,"[C, C, C, C, C, C, H, H, H, H, B, H, N, H, C, ...","[(-2.5715928659293352, 2.9680513922882024, -1....",297.438759
4,INT3(furan_rpos(1)),INT3(furan,1,"[10, 12, 13, 22, 11, 44]",1,C1=CC=CO1,"[C, C, C, C, C, C, H, H, H, H, B, H, N, H, C, ...","[(2.5098497980466643, 0.2334900575441476, -3.4...",300.633169
5,INT3(furan_rpos(1)),INT3(furan,1,"[10, 12, 13, 22, 11, 44]",2,C1=CC=CO1,"[C, C, C, C, C, C, H, H, H, H, B, H, N, H, C, ...","[(2.3543409783698013, 3.530828390074292, 0.077...",356.658704


In [6]:
df1 = step.xtb(df0, options={"gfnff": None, "opt": None}, constraint=True)

2025-07-17 10:16:01 INFO  frust.stepper: [xtb-gfnff-opt] row 0 (INT3(furan_rpos(0)))…
2025-07-17 10:16:01 INFO  frust.stepper: [xtb-gfnff-opt] row 1 (INT3(furan_rpos(0)))…
2025-07-17 10:16:01 INFO  frust.stepper: [xtb-gfnff-opt] row 2 (INT3(furan_rpos(0)))…
2025-07-17 10:16:01 INFO  frust.stepper: [xtb-gfnff-opt] row 3 (INT3(furan_rpos(1)))…
2025-07-17 10:16:01 INFO  frust.stepper: [xtb-gfnff-opt] row 4 (INT3(furan_rpos(1)))…
2025-07-17 10:16:01 INFO  frust.stepper: [xtb-gfnff-opt] row 5 (INT3(furan_rpos(1)))…


In [7]:
idx = 0
atoms = df1["atoms"].iloc[idx]
coords1 = df1["coords_embedded"].iloc[idx]
coords2 = df1["xtb-gfnff-opt-opt_coords"].iloc[idx]
all_coords = [coords1, coords2]
all_mols = [ac2mol(atoms, c) for c in all_coords]
MolTo3DGrid(all_mols, legends=['embed', 'xtb-opt'])

In [8]:
from tooltoad.chemutils import xyz2mol

with open("../structures/int3.xyz") as file:
    xyzblock = file.read()

mol = xyz2mol(xyzblock)

MolTo3DGrid(mol)