In [170]:
# helper imports
from tooltoad.vis import MolTo3DGrid

from copy import copy
from rdkit import Chem
from rdkit.Chem import rdDetermineBonds, rdMolAlign, rdmolops
from rdkit.Chem.AllChem import ETKDGv3, EmbedMolecule, UFFGetMoleculeForceField, EmbedMultipleConfs
from rdkit.Chem.rdchem import RWMol

from frust.transformer_utils import rotated_maps
from frust.utils import get_molecule_name

# ligand_smiles="CC([Si](N1C=CC=C1)(C(C)C)C(C)C)C"
ligand_smiles="CCN(CC)C(=O)c1cc2ccccc2[nH]1"
ts_guess_struct="../structures/ts1_guess.xyz"
bonds_to_remove=[(10, 41), (10, 12), (11, 41)]
pre_name="TS"
n_confs=10
n_cores=10
embed_ready=True

In [171]:
# --- Read TS Guess Structure --- #
try:
    with open(ts_guess_struct, 'r') as file:
        xyz_block = file.read()
except FileNotFoundError:
    print(f"Error: Transition state structure file not found: {ts_guess_struct}")
    raise
except PermissionError:
    print(f"Error: Permission denied when accessing file: {ts_guess_struct}")
    raise
except IOError as e:
    print(f"Error: Failed to read transition state structure file {ts_guess_struct}: {e}")
    raise
except Exception as e:
    print(f"Unexpected error loading transition state structure from {ts_guess_struct}: {e}")
    raise

# --- Determine Connectivity --- #
ts = Chem.MolFromXYZBlock(xyz_block)
rdDetermineBonds.DetermineConnectivity(ts, useVdw=True)
ts_rw = RWMol(ts)

# --- Remove Bonds --- #
bonds_to_remove = bonds_to_remove # Finding these bonds might need to be automated.
for bond in bonds_to_remove:
    ts_rw.RemoveBond(bond[0], bond[1])
ts_rw_origin = copy(ts_rw)

# --- Find ligand in guess ts structure --- #
ts_ligand_pattern = Chem.MolFromSmarts("N1CCCC1")
old_ring_match    = ts_rw.GetSubstructMatch(ts_ligand_pattern)  # e.g. (5,6,7,8,9)

# --- Find unique positions and check that they are valid cH --- #
lig_mol = Chem.MolFromSmiles(ligand_smiles)
lig_mol = Chem.AddHs(lig_mol)

cH_patt = Chem.MolFromSmarts('[cH]')
matches = lig_mol.GetSubstructMatches(cH_patt)
cH_atoms = [ind[0] for ind in matches]

atom_rank = list(Chem.CanonicalRankAtoms(lig_mol,breakTies=False))

def find_unique_atoms(lst):
    seen = set()
    result = []
    for i, x in enumerate(lst):
        if x not in seen:
            result.append(i)
            seen.add(x)
    return result

unique_atoms = find_unique_atoms(atom_rank)
unique_cH = set(unique_atoms).intersection(set(cH_atoms))
unique_cH = tuple(unique_cH)

MolTo3DGrid(ts_rw)

In [172]:
# --- Read indices of the old ligand and new ligand, to get them ready for alignment --- #
ligand_pattern = Chem.MolFromSmarts("[n,o,s]1cccc1")
params = ETKDGv3()
params.randomSeed = 0xF00D
EmbedMolecule(lig_mol, params)
lig_match = lig_mol.GetSubstructMatch(ligand_pattern)

# --- Create aligned maps --- #
# maps = rotated_maps(lig_match, old_ring_match, 41, unique_cH)

In [173]:
# print(maps[0])
# print(maps[1])

In [174]:
MolTo3DGrid(lig_mol)

In [175]:
old_active_site = old_ring_match[0:3]
old_active_site

maps = []
for a in unique_cH:
    C_pos = lig_mol.GetAtomWithIdx(a)
    nbs = []
    for nb in C_pos.GetNeighbors():
        if nb.GetAtomicNum() == 1:
            pass # hydrogen
        else:
            nbs.append(nb.GetIdx())
    
    nbs.insert(1, C_pos.GetIdx())
    
    map = []
    for nb, aa in zip(nbs, old_active_site):
        map.append((nb, aa))
    maps.append(map)

In [176]:
maps

[[(7, 49), (8, 41), (9, 42)],
 [(9, 49), (10, 41), (11, 42)],
 [(10, 49), (11, 41), (12, 42)],
 [(11, 49), (12, 41), (13, 42)],
 [(12, 49), (13, 41), (14, 42)]]

In [177]:
chosen_C = lig_mol.GetAtomWithIdx(list(unique_cH)[0])

print("Chosen C", chosen_C.GetIdx())

for nb in chosen_C.GetNeighbors():
    print("Atom no.", nb.GetAtomicNum(), end=" ")
    print("Atom id.", nb.GetIdx())

Chosen C 8
Atom no. 6 Atom id. 7
Atom no. 6 Atom id. 9
Atom no. 1 Atom id. 26


In [178]:
tm = 3

rpos = maps[tm][1][0]
EmbedMolecule(lig_mol, params)
ts_rw = Chem.RWMol(ts_rw_origin)
rdMolAlign.AlignMol(lig_mol, ts_rw, atomMap=maps[tm])

0.10205132086408637

In [179]:
#MolTo3DGrid(lig_mol)

In [180]:
# --- remove hydrogen from the reacting carbon --- #
reactive_old_C = old_ring_match[1]
reactive_atom_pos = old_ring_match.index(reactive_old_C)
chosen_carbon = lig_mol.GetAtomWithIdx(rpos)

for nb in chosen_carbon.GetNeighbors():
    if nb.GetAtomicNum() == 1:  # hydrogen
        lig_mol_rw = RWMol(lig_mol)
        lig_mol_rw.RemoveAtom(nb.GetIdx())
        lig_mol = lig_mol_rw.GetMol()
        break

#MolTo3DGrid(lig_mol, kekulize=False)

In [181]:
# --- Remove old ligand and determine bond order (to get aromaticity correct for the catalyst) --- #
n_pattern_full = Chem.MolFromSmiles("CN1CCCC1")
n_old_indices = ts_rw.GetSubstructMatch(n_pattern_full)

atoms_to_remove = set()
for idx in n_old_indices:
    atom = ts_rw.GetAtomWithIdx(idx)
    atoms_to_remove.add(idx)
    for neighbor in atom.GetNeighbors():
        if neighbor.GetAtomicNum() == 1:  # Check if hydrogen
            atoms_to_remove.add(neighbor.GetIdx())

for idx in sorted(atoms_to_remove, reverse=True):
    ts_rw.RemoveAtom(idx)

atom_idx_to_remove = 10
atom_to_remove = ts_rw.GetAtomWithIdx(atom_idx_to_remove)
atom_symbol = atom_to_remove.GetSymbol()
atom_coords = ts_rw.GetConformer().GetAtomPosition(atom_idx_to_remove)

ts_rw.RemoveAtom(atom_idx_to_remove)

rdDetermineBonds.DetermineBonds(ts_rw)

#MolTo3DGrid(ts_rw)

In [182]:
#  --- Add reactive H back --- #
new_atom_idx = ts_rw.AddAtom(Chem.Atom(atom_symbol))
ts_rw.GetConformer().SetAtomPosition(new_atom_idx, atom_coords)          

# --- Combine ligand and catalyst, add temporary bonds, and set temporary formal charges ---
ts_combined = Chem.CombineMols(ts_rw, lig_mol)
ts_rw_combined = RWMol(ts_combined)

offset = ts_rw.GetNumAtoms()
reactive_H = offset - 1 # the reactive H is the offset - 1, because it was the last atom added to the mol.
reactive_C = rpos + offset
# atom_indices_to_keep = [10, 11, 39, 40, 41, reactive_C]
atom_indices_to_keep = [10, 11, 39, 40, reactive_H, reactive_C]

embed_ready = True
if embed_ready:
    ts_rw_combined.AddBond(10, reactive_C, Chem.BondType.ZERO)
    ts_rw_combined.AddBond(10, reactive_H, Chem.BondType.SINGLE)
    b_atom = ts_rw_combined.GetAtomWithIdx(10)
    b_atom.SetFormalCharge(2)
    c_atom = ts_rw_combined.GetAtomWithIdx(reactive_C)
    c_atom.SetFormalCharge(-1)

#rpos = lig_match.index(rpos) # reset the index for rpos.
mol_name = get_molecule_name(ligand_smiles)        

MolTo3DGrid(ts_rw_combined, cell_size=(600,600))