## Catalyst structure generation

The following automatically generates the catalyst structures, varying the phosphine ligand identity while keeping the amine, aryl, and carboxylic ligands constant. The ligand is then ligated completely using a multistep relaxed scan with XTB to avoid atomic clashes.

In [1]:
import molli
from molli.external import rdkit as mrd

molli.visual.configure(bgcolor="black")

catalyst = molli.load("data/dft/qsar/benzylamine/submission/benzyl-template.mol2")

In [2]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.rdchem import Mol


def add_atom_to_phosphorus(mol: Mol, atom_symbol: str = "S") -> Mol:
    """Adds an additional atom (default is hydrogen) bonded to each phosphorus atom in the molecule.

    Parameters:
        mol (Mol): The input RDKit molecule.
        atom_symbol (str): The atomic symbol of the atom to add (default is 'H' for hydrogen).

    Returns:
        Mol: The modified RDKit molecule with new atoms bonded to phosphorus atoms.
    """
    # Start by editing the molecule
    editable_mol = Chem.RWMol(mol)

    # Iterate through each atom to find phosphorus atoms
    for atom in mol.GetAtoms():
        if atom.GetSymbol() == "P":  # Check if the atom is phosphorus
            # Add a new atom and bond it to the phosphorus atom
            new_atom_idx = editable_mol.AddAtom(Chem.Atom(atom_symbol))
            editable_mol.AddBond(atom.GetIdx(), new_atom_idx, Chem.rdchem.BondType.DOUBLE)

    # Return the modified molecule
    mol = editable_mol.GetMol()

    return mol


def smiles_to_molli(smiles: str, name: str) -> molli.Molecule:
    """Convert a SMILES string to a Molli molecule.

    Args:
        smiles (str): The SMILES representation of the molecule.
        name (str): The name to assign to the Molli molecule.

    Returns:
        molli.Molecule: The Molli molecule created from the SMILES string.
    """
    # Create RDKit molecule from SMILES
    mol = Chem.MolFromSmiles(smiles, sanitize=False)

    if mol is None:
        raise ValueError(f"Invalid SMILES string: {smiles}")

    # Create a dummy atom to create the attachment point
    mol = add_atom_to_phosphorus(mol)

    # Update valences and add hydrogens
    mol.UpdatePropertyCache(strict=False)
    mol = Chem.AddHs(mol)

    # Generate 3D coordinates
    flag = AllChem.EmbedMolecule(mol)
    if flag != 0:
        raise ValueError(f"3D embedding failed for {name}")

    # Convert 3D coordinates to Molli molecule
    mol = mrd.from_rdmol(mol)

    # The label needs to be set for the attachment point
    ap1 = [x for x in mol.atoms if x.element == 16][0]
    ap1.label = "AP1"

    # Correct the bond type to single
    bond = [x for x in mol.bonds_with_atom(ap1)][0]
    bond.btype = molli.chem.BondType.Single

    return mol

In [3]:
import pandas as pd

ligands = pd.read_csv("data/ligand-qsar/alkylamine-hte-ligand-data.tsv", sep="\t")
ligands = ligands[ligands["buchwald-type"] == 1]

ligands_mlib = molli.MoleculeLibrary(
    "data/dft/qsar/benzylamine/submission/ligands.mlib", readonly=False, overwrite=True
)

run = True

smiles, name = ligands["ligand_1_smiles"].values, ligands["ligand_1_name"].values

if run:
    with ligands_mlib.writing():
        for smiles, name in zip(smiles, name):
            try:
                mol = smiles_to_molli(smiles, name)
                ligands_mlib[name] = mol

            except Exception as e:
                print(f"Failed to add {name}: {e}")

Failed to add L-012: 3D embedding failed for L-012
Failed to add L-067: 3D embedding failed for L-067
Failed to add L-095: 3D embedding failed for L-095


In [4]:
from molli.external import openbabel as mob

ligands_cdxml = molli.CDXMLFile("data/dft/qsar/benzylamine/submission/ligands.cdxml")

if run:
    with ligands_mlib.writing():
        for key in ligands_cdxml.keys():
            try:
                ligand = ligands_cdxml[key]
                ligand.add_implicit_hydrogens()
                mob.obabel_optimize(ligand, inplace=True, max_steps=5000)

                ligands_mlib[key] = ligand
            except Exception as e:
                print(f"Failed to add {key}: {e}")


Failed to add L-054: "Key b'L-054' already exists."


KeyboardInterrupt: 

In [5]:
catalyst_mlib = molli.MoleculeLibrary(
    "data/dft/qsar/benzylamine/submission/catalysts.mlib",
    overwrite=True,
    readonly=False,
)

if run:
    with catalyst_mlib.writing(), ligands_mlib.reading():
        for ligand_name in ligands_mlib.keys():
            try:
                ligand = ligands_mlib[ligand_name]

                mol = molli.Molecule.join(
                    catalyst,
                    ligand,
                    "AP1",
                    "AP1",
                    optimize_rotation=True,
                    name=ligand_name,
                    dist=20,
                )

                metal_center = [x for x in mol.atoms if x.element == 46][0]
                ligand_bonds = [x for x in mol.bonds_with_atom(metal_center)]
                for bond in ligand_bonds:
                    bond.btype = molli.chem.BondType.Ligand

                catalyst_mlib[ligand_name] = mol
            except Exception as e:
                print(f"Error processing ligand {ligand_name}: {e}")
                continue


In [6]:
from pathlib import Path

ROOT = Path("data/dft/qsar/benzylamine/struc_gen")

with catalyst_mlib.reading():
    for name in list(catalyst_mlib.keys()):
        workdir = ROOT / name
        workdir.mkdir(parents=True, exist_ok=True)

        mol = catalyst_mlib[name]
        with open(workdir / "geom.xyz", "w") as f:
            mol.dump_xyz(f)

        metal_center = mol.get_atom(molli.Element.Pd)
        metal_idx = mol.get_atom_index(metal_center)
        ligands = mol.connected_atoms(metal_center)
        indexes = mol.get_atom_indices(*ligands)
        indexes = [x for x in indexes if mol.get_atom(x).element != molli.Element.P]

        phosphine = mol.get_atom(molli.Element.P)
        phosphine_idx = mol.get_atom_index(phosphine)

        inp_file = (
            f"$constrain\n"
            f"  force constant={0.5}\n"
            f"  all bonds=true\n"
            f"  distance: {metal_idx + 1}, {phosphine_idx + 1}, auto\n"
            f"  distance: {metal_idx + 1}, {indexes[0] + 1}, auto\n"
            f"  distance: {metal_idx + 1}, {indexes[1] + 1}, auto\n"
            f"  distance: {metal_idx + 1}, {indexes[2] + 1}, auto\n"
            f"  atoms: {metal_idx + 1}, {indexes[0] + 1}, {indexes[1] + 1}, {indexes[2] + 1}\n"
            f"$scan\n"
            f"  mode=concerted\n"
            f"  1: {20},{5},{5}\n"
            f"$end\n\n"
        )

        with open(workdir / "step1.input.inp", "w") as f:
            f.write(inp_file)

        inp_file = (
            f"$constrain\n"
            f"  force constant={0.5}\n"
            f"  all bonds=true\n"
            f"  distance: {metal_idx + 1}, {phosphine_idx + 1}, auto\n"
            f"  distance: {metal_idx + 1}, {indexes[0] + 1}, auto\n"
            f"  distance: {metal_idx + 1}, {indexes[1] + 1}, auto\n"
            f"  distance: {metal_idx + 1}, {indexes[2] + 1}, auto\n"
            f"  atoms: {metal_idx + 1}, {indexes[0] + 1}, {indexes[1] + 1}, {indexes[2] + 1}\n"
            f"$scan\n"
            f"  mode=concerted\n"
            f"  1: {5},{2.32},{20}\n"
            f"$end\n\n"
        )

        with open(workdir / "step2.input.inp", "w") as f:
            f.write(inp_file)