In [27]:
from __future__ import annotations
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
import numpy.typing as npt
import csv
from tkinter import Tk
from tkinter import filedialog
from os import PathLike
from pathlib import Path

In [28]:
# Set commonly used variables

Z_ROTATION_180 = np.array(
    [
        [-1, 0, 0],
        [0, -1, 0],
        [0,  0, 1]
    ],
    dtype=float
)

# Array of values for the distances between parent atoms
DISTANCE_MULTIPLIERS = [2.7, 2.8, 2.9, 3.0, 3.1, 3.2]

Z_UNIT_VECTOR = np.array([0.0, 0.0, 1.0])

Z_REFLECTOR = np.array(
    [
        [1, 0, 0],
        [0, 1, 0],
        [0, 0,-1]
    ],
    dtype=float
)

IDENTITY_MATRIX = np.array(
    [
        [1, 0, 0],
        [0, 1, 0],
        [0, 0, 1]
    ],
    dtype=float
)

In [29]:
class LinearAlgebra:

    @staticmethod
    def normalize(vector: npt.NDArray) -> npt.NDArray:
        """Determine the unit vector for a given vector."""

        norm = np.linalg.norm(vector)

        if norm != 0:
            return vector / norm
        else:
            return vector


    @staticmethod
    def quaternion_builder(
        vector: npt.ArrayLike,
        angle: float,
    ) -> npt.NDArray:
        """Build a quaternion matrix.
        
        Parameters
        ----------
        vector : ArrayLike
            The vector to rotate around (can be list or NDArray)
        angle : float
            The angle to rotate by (in radians)
        
        Returns
        -------
        quaternion_matrix : NDArray
            A quaternion matrix that rotates by `angle` around `vector`
        """

        quaternion = [
            np.cos(angle / 2),
            vector[0] * np.sin(angle / 2),
            vector[1] * np.sin(angle / 2),
            vector[2] * np.sin(angle / 2),
        ]

        # The normalized quaternion vector
        norm_quat = LinearAlgebra.normalize(quaternion)

        quaternion_matrix = np.array(
            [
                [
                    1 - 2 * (norm_quat[2] ** 2 + norm_quat[3] ** 2),
                    2 * (norm_quat[1] * norm_quat[2] - norm_quat[0] * norm_quat[3]),
                    2 * (norm_quat[1] * norm_quat[3] + norm_quat[0] * norm_quat[2]),
                ],
                [
                    2 * (norm_quat[1] * norm_quat[2] + norm_quat[0] * norm_quat[3]),
                    1 - 2 * (norm_quat[1] ** 2 + norm_quat[3] ** 2),
                    2 * (norm_quat[2] * norm_quat[3] - norm_quat[0] * norm_quat[1]),
                ],
                [
                    2 * (norm_quat[1] * norm_quat[3] - norm_quat[0] * norm_quat[2]),
                    2 * (norm_quat[2] * norm_quat[3] + norm_quat[0] * norm_quat[1]),
                    1 - 2 * (norm_quat[1] ** 2 + norm_quat[2] ** 2),
                ],
            ]
        )

        return quaternion_matrix
    
    
    @staticmethod
    def vector_angle(vector_one: npt.ArrayLike, vector_two: npt.ArrayLike) -> np.float64:
        """Calculate the angle (in radians) between two vectors."""

        unit_vector_one = LinearAlgebra.normalize(vector_one)
        unit_vector_two = LinearAlgebra.normalize(vector_two)

        angle: float = np.arccos(np.clip(np.dot(unit_vector_one, unit_vector_two), -1.0, 1.0))

        return angle


    @staticmethod
    def distance_calculator(
        vector_one: npt.ArrayLike, vector_two: npt.ArrayLike
    ) -> np.float64:
        """Calculate the Euclidean distance between two vectors."""

        distance = np.absolute(
            np.sqrt(
                ((float(vector_one[0]) - float(vector_two[0])) ** 2)
                + ((float(vector_one[1]) - float(vector_two[1])) ** 2)
                + ((float(vector_one[2]) - float(vector_two[2])) ** 2)
            )
        )

        return distance
    

    @staticmethod
    def nudge_matrix_generator(stage: str) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray]:
        """Creates the nudge matrices and vector for avoiding local minima during optimization.

        Parameters
        ----------
        stage : {"R", "P", "T"}
            Stage of the reaction ("R"=Reactant, "P"=Product, "T"=Transition)

        Returns
        -------
        nudge_rotate_x : NDArray
            An array that rotates around the x-axis by +/- 15 degrees
        nudge_rotate_y : NDArray
            An array that rotates around the y-axis by +/- 20 degrees 
        nudge_translate : NDArray
            A vector that nudges the molecule by a set amount
        """

        # xv_degree (15 Degrees)
        xv_degree = np.pi / 12
        # xx_degree (20 Degrees)
        xx_degree = np.pi / 9

        nudge_translate = np.array([0, 0, 0])

        nudge_rotate_x = np.empty([3, 3], dtype=float)
        nudge_rotate_y = np.empty([3, 3], dtype=float)

        if stage == "R":
            nudge_rotate_x = np.array(
                [
                    [1, 0, 0],
                    [0, np.cos(xv_degree), -np.sin(xv_degree)],
                    [0, np.sin(xv_degree), np.cos(xv_degree)],
                ],
                dtype=float,
            )

            nudge_rotate_y = np.array(
                [
                    [np.cos(xx_degree), 0, np.sin(xx_degree)],
                    [0, 1, 0],
                    [-np.sin(xx_degree), 0, np.cos(xx_degree)],
                ],
                dtype=float,
            )
        elif stage == "P":
            nudge_rotate_x = np.array(
                [
                    [1, 0, 0],
                    [0, np.cos(-xv_degree), -np.sin(-xv_degree)],
                    [0, np.sin(-xv_degree), np.cos(-xv_degree)],
                ],
                dtype=float,
            )

            nudge_rotate_y = np.array(
                [
                    [np.cos(-xx_degree), 0, np.sin(-xx_degree)],
                    [0, 1, 0],
                    [-np.sin(-xx_degree), 0, np.cos(-xx_degree)],
                ],
                dtype=float,
            )

        return nudge_rotate_x, nudge_rotate_y, nudge_translate
    

    @staticmethod
    def gen_alignment_matrix(
        vector_one: npt.NDArray,
        vector_two: npt.NDArray = None,
        alignment_angle: float = None,
        alignment_vector: npt.NDArray = None,
    ):
        
        if (alignment_angle is None or alignment_vector is None) and vector_two is None:
            raise ValueError("Must provide either a second vector or an alignment vector/angle!")
        
        if alignment_angle is None:
            alignment_angle = LinearAlgebra.vector_angle(vector_one, vector_two)
            
        if alignment_vector is None:
            alignment_vector = LinearAlgebra.normalize(np.cross(vector_one, vector_two))

        alignment_quaternion = LinearAlgebra.quaternion_builder(alignment_vector, alignment_angle)

        return alignment_quaternion

In [30]:
def substructure_match(smiles_string: str) -> tuple[int, list[int], int]:
    """Generate an RDKit molecule and search the structure for protonated atoms.
    
    Parameters
    ----------
    smiles_string : str
        Any valid SMILES string.

    Returns
    -------
    protonated_atom_index : int
        The line number of the atom with an extra proton (the parent charge site).
    proton_position_indices : list[int]
        A list containing any protons attached to the parent charge site.
    near_neighbor_index : int
        The line number of an atom directly connected to the parent charge site.
    """

    molecule = Chem.AddHs(Chem.MolFromSmiles(smiles_string))
    params = AllChem.ETKDGv3()
    AllChem.EmbedMolecule(molecule, params)

    charged_atom = Chem.MolFromSmarts("[#7H+,#7H2+,#7H3+,#8H+,#8H2+]")

    proton_atom = Chem.MolFromSmarts(
        "[$([#1][#7H+]),$([#1][#7H2+]),$([#1][#7H3+]),$([#1][#8H+]),$([#1][#8H2+])]"
    )

    near_neighbor = Chem.MolFromSmarts(
        "[$([*][#7H+]),$([*][#7H2+]),$([*][#7H3+]),$([*][#8H+]),$([*][#8H2+])]"
    )

    proton_position = molecule.GetSubstructMatches(proton_atom)
    parent_position = molecule.GetSubstructMatches(charged_atom)
    near_neighbor = molecule.GetSubstructMatches(near_neighbor)

    protonated_atom_index: int = parent_position[0][0]
    proton_position_indices: list[int] = [i[0] for i in proton_position]
    near_neighbor_index: int = near_neighbor[0][0]

    return protonated_atom_index, proton_position_indices, near_neighbor_index

In [31]:
def multiple_input(
    get_mmd: bool = True,
    get_xyz_path: bool = True,
) -> tuple[list[list[str]], PathLike] | list[list[str]] | PathLike:
    """Read CSV files with multiple molecules and/or specify folder containing XYZ files

    Parameters
    ----------
    get_mmd : bool, default=True
        Option to read a CSV file with data for multiple molecules.
    get_xyz_path : bool, default=True
        Option to select a folder path for XYZ files (to put in or to take out).

    Returns
    -------
    multiple_molecule_data : list[list[str]]
        A list of lists each containing data about a molecule.
    xyz_file_path : PathLike
        The path to a directory where XYZ files are or should be placed.

    Notes
    -----
    Selecting only one of the two parameters will return only that parameter.
    """

    root = Tk()
    root.wm_attributes("-topmost", 1)
    root.withdraw()

    if get_mmd:
        file_name = filedialog.askopenfilename(
            parent=root,
            initialdir="",
            title="Select a CSV",
            filetypes=(("CSV Files", "*.csv"), ("All files", "*")),
        )

        multiple_molecule_data = []

        with open(file_name, newline="") as csvfile:
            data = csv.reader(csvfile)
            for row in data:
                multiple_molecule_data.append(row)

    if get_xyz_path:
        xyz_file_path = Path(
            filedialog.askdirectory(initialdir="", title="Select XYZ File Parent Directory")
            + "/"
        )

    if get_mmd and get_xyz_path:
        return multiple_molecule_data[1:], xyz_file_path
    elif not get_xyz_path:
        return multiple_molecule_data[1:]
    elif not get_mmd:
        return xyz_file_path
    

In [32]:
class Atom:

    def __init__(
        self,
        element: str = None,
        xyz: npt.NDArray = None,
    ):
        if element is not None and xyz is not None:
            self.element = element
            self.xyz = xyz
        else:
            raise RuntimeError("You must supply the element and XYZ coordinate!")


    def __repr__(self):
        return (
            f"{"Element":12}" f"{"X":11}" f"{"Y":11}" f"{"Z":11}\n"
            f"{self.element:9}"f"{self.xyz[0]:11.6f}"f"{self.xyz[1]:11.6f}"f"{self.xyz[2]:11.6f}\n"
        )

In [33]:
class Geometry:

    def __init__(
        self,
        atoms: list[Atom] = None,
        charge: int = 0,
    ):
        if atoms is not None:
            self.atoms = atoms
            self.charge = charge
        else:
            raise RuntimeError("You must supply the atomic symbols and coordinates!")


    def get_num_atoms(self) -> int:
        return len(self.atoms)


    def get_coords(self) -> npt.NDArray:
        return np.array([i.xyz for i in self.atoms])


    def get_elements(self) -> list[str]:
        return [i.element for i in self.atoms]


    def new_xyz(self, new_xyzs: npt.NDArray):
        for i in range(len(self.atoms)):
            self.atoms[i].xyz = new_xyzs[i]


    def remove_atom(self, index: int):
        _ = self.atoms.pop(index)


    def pop_atom(self, index: int) -> Atom:
        return self.atoms.pop(index)
    

    def add_atom(self, atom: Atom, index: int = None):
        if index is not None:
            self.atoms.insert(index, atom)
        else:
            self.atoms.append(atom)


    @classmethod
    def from_xyz(cls, file_path: str, charge: int = 0):
        """Read in XYZ file format and return atomic symbols and coordinates

        Parameters
        ----------
        file_path : str
            Full path to an XYZ file.

        Returns
        -------
        Geometry object
        """

        molecule_xyz = []

        with open(file_path) as f:
            for line in f:
                # Takes each line in the file minus the last character, which is just the \n
                line = line[:-1].split()
                if line:
                    # If the line isn't empty then append to the molecule_xyz
                    molecule_xyz.append(line)

        elements = [str(i[0]) for i in molecule_xyz[2:]]
        xyzs = np.array([[float(j) for j in i[1:]] for i in molecule_xyz[2:]])

        atoms = []

        for i in range(len(elements)):
            atoms.append(Atom(elements[i], xyzs[i]))

        return Geometry(atoms, charge)


    @classmethod
    def from_smiles(cls, smiles_string: str):
        """Convert a SMILES string to a 3D molecule.

        Parameters
        ----------
        smiles_string : str
            Any valid SMILES string (for example, "N#Cc1nn[nH]c(C#N)1")

        Returns
        -------
        Geometry object
        """

        molecule = Chem.AddHs(Chem.MolFromSmiles(smiles_string))
        AllChem.EmbedMolecule(molecule, AllChem.ETKDGv3())
        charge = Chem.rdmolops.GetFormalCharge(molecule)

        xyz_string = Chem.rdmolfiles.MolToXYZBlock(molecule)
        molecule_xyz = [i.split() for i in xyz_string.split("\n")[2:-1]]

        elements = [str(i[0]) for i in molecule_xyz]
        xyzs = np.array([[float(j) for j in i[1:]] for i in molecule_xyz])

        atoms = []

        for i in range(len(elements)):
            atoms.append(Atom(elements[i], xyzs[i]))

        return Geometry(atoms, charge)


    @classmethod
    def from_list(cls, elements: list[str], xyzs: npt.NDArray, charge: int = 0):
        if len(elements) != len(xyzs):
            raise ValueError("The list of elements and coordinates must be of the same size!")
        else:
            atoms = []
            for i in range(len(elements)):
                atoms.append(Atom(elements[i], xyzs[i]))
        
        return Geometry(atoms, charge)
    

    def to_xyz(self, name: str, xyz_dir: PathLike = Path("./")):

        with open(xyz_dir/Path(f"{name}.xyz"), "w", newline="") as xyz_file:
            xyz_file.write(f"{self.get_num_atoms()}\n\n")
            for atom in self.atoms:
                xyz_file.write(f"{atom.element:3}{atom.xyz[0]:12.6f}{atom.xyz[1]:12.6f}{atom.xyz[2]:12.6f}\n")


    def __repr__(self):
        self_repr = f"{"Element":12}{"X":11}{"Y":11}{"Z":11}\n\n"
        for i in self.atoms:
            self_repr += f"{i.element:9}{i.xyz[0]:11.6f}{i.xyz[1]:11.6f}{i.xyz[2]:11.6f}\n"
        return self_repr
    

    def __add__(self, structure: Geometry):
        s1 = self.atoms
        s2 = structure.atoms
        return Geometry(s1+s2, self.charge+structure.charge)


    def __iter__(self):
        yield from self.atoms

    #def write_orca_input(self, name: str, input_dir: PathLike = Path("./"), **kwargs):

In [34]:
class Molecule:
    """Class that contains all molecule data.

    Attributes
    ----------
    smiles_string : str
        Any valid SMILES string.
    structure : Geometry
        The structure of the molecule
    molecule_name : str, optional
        The molecule name that will be used for naming output XYZ and INP files.
    protonated_atom_index : int, optional
        The line index of the atom that carries an extra proton (the parent charge site).
    neighboring_atom_index : int, optional
        The line index of an atom attached to the parent charge site.
    proton_position_indices : list of int, optional
        A list of the line indexes for each proton attached to the protonated atom.
    charge : int, default=1
        The charge of the molecule.

    Methods
    -------
    get_proton_position(attempt_number)
        Return the line number of the proton of interest.
    get_protonated_atom_position()
        Return the XYZ coordinates of the parent charge site.
    get_num_atoms()
        Return the number of atoms in the molecule.
    """

    def __init__(
        self,
        smiles_string: str,
        structure: Geometry = None,
        molecule_name: str = None,
        protonated_atom_index: int = None,
        neighboring_atom_index: int = None,
        proton_position_indices: list[int] = None,
    ):
        self.smiles_string = smiles_string
        self.structure = (
            structure
            if structure is not None
            else Geometry.from_smiles(self.smiles_string)
        )
        self.molecule_name = molecule_name if molecule_name is not None else "bean"
        self.protonated_atom_index = (
            protonated_atom_index
            if protonated_atom_index is not None
            or self.structure.charge == 0
            else substructure_match(self.smiles_string)[0]
        )
        self.neighboring_atom_index = (
            neighboring_atom_index
            if neighboring_atom_index is not None
            or self.structure.charge == 0
            else substructure_match(self.smiles_string)[2]
        )
        self.proton_position_indices = (
            proton_position_indices
            if proton_position_indices is not None
            or self.structure.charge == 0
            else substructure_match(self.smiles_string)[1]
        )

        if self.structure.charge > 0:
            self.structure.new_xyz(
                self.structure.get_coords()
                - self.structure.get_coords()[self.protonated_atom_index]
            )


    def get_proton_index(self, attempt_number: int) -> int:
        return self.proton_position_indices[attempt_number]


    def get_num_atoms(self) -> int:
        """Get the number of atoms in the molecule."""
        return self.structure.get_num_atoms()
    
    
    def get_coords(self) -> npt.NDArray:
        return self.structure.get_coords()
    

    def get_elements(self) -> list[str]:
        return self.structure.get_elements()
    

    def get_charge(self) -> int:
        return self.structure.charge
    
    
    def remove_atom(self, index: int):
        _ = self.structure.remove_atom(index)

    
    def pop_atom(self, index: int):
        return self.structure.pop_atom(index)
    

    def add_atom(self, atom: Atom, index: int = None):
        self.structure.add_atom(atom, index)


    def get_proton_position(self, attempt_number: int) -> npt.NDArray:
        """Get the coordinates for the proton requested.

        Parameters
        ----------
        attempt_number : {0, 1, 2}
            The current attempt number.
        
        Returns
        -------
        proton_position: NDArray
            The coordinates of the requested proton.
        """
        return self.get_coords()[self.get_proton_index(attempt_number)]


    def get_protonated_atom_position(self) -> npt.NDArray:
        """Get the XYZ coordinates of the parent charge site."""
        return self.get_coords()[self.protonated_atom_index]
    

    @staticmethod
    def calculate_charge(smiles: str):
        return Chem.rdmolops.GetFormalCharge(Chem.MolFromSmiles(smiles))

In [35]:
def input_generator(
    molecule_name: str,
    stage: str,
    final_structure: list[list[str]],
    save_directory: str = "",
    charge: int = 1,
    dft_method: str = "wB97X-D3BJ",
    basis_set: str = "def2-TZVP",
    nprocs: int = 20,
    optional_input_params: str = ""
):
    """Generate an ORCA input file.

    Parameters
    ----------
    molecule_name : str
        The name of the molecule.
    stage : str
        Either 'R' or 'P' for 'Reactant' or 'Product'.
    final_structure : list[list[str]]
        The structure of the final assembled molecule.
    save_directory : str, default=""
        The directory to which the input file will be saved.
    charge : int, default=1
        The total charge of the system.
    dft_method : str, default="wB97X-D3BJ"
        The chosen density functional.
    basis_set : str, default="def2-TZVP"
        The chosen basis set.
    nprocs : int, default=20
        The number of processes used in the calculation.
    optional_input_params : str, optional
        Any additional input parameters to pass to the keyword line.
    """

    input_specs = (
        f"!Opt {dft_method} {basis_set} LargePrint {optional_input_params}\n"
        "%geom\n"
        "\tMaxIter 200\n"
        "end\n"
        "%pal\n"
        f"\tnprocs {nprocs!s}\n"
        "end\n"
        f"* xyz {charge!s} 1"
    )

    end_line = "*\n"

    file_name = molecule_name + stage

    with open(save_directory + file_name + ".inp", "w", newline="\n") as input_file:
        np.savetxt(
            input_file,
            final_structure,
            fmt="%s",
            header=input_specs,
            footer=end_line,
            comments="",
        )

In [36]:
def structure_one_generator(
    molecule_alignment_quaternion: npt.NDArray,
    mol: Molecule,
) -> Geometry:
    """Align a molecule along Z axis and return the structure"""

    new_coords = []

    for atom_position in mol.get_coords():
        new_coord = np.dot(molecule_alignment_quaternion, atom_position)
        new_coords.append(new_coord)

    structure_one = Geometry.from_list(mol.get_elements(), new_coords)
    
    return structure_one

In [37]:
def structure_two_generator(
    stage: str,
    z_rotation_offset: npt.NDArray,
    structure_one: Geometry,
    distance_multiplier: float = 2.7,
    z_flipper: npt.NDArray = Z_REFLECTOR,
    disable_nudge: bool = False,
) -> Geometry:
    """Generate the second structure that the proton will be transferred to.
    
    Parameters
    ----------
    stage : {"R", "P", "T"}
        Stage of the reaction ("R"=Reactant, "P"=Product, "T"=Transition)
    z_rotation_offset : NDArray
        A matrix that rotates around the Z-axis.
    structure_one : ArrayLike
        The first structure generated from `structure_one_generator()`.
    distance_multiplier : float, default=2.7
        The distance between the parent charge sites in Angstroms.

    Returns
    -------
    structure_two : list[list[float]]
        The second structure to which the proton will be transferred.
    """

    if disable_nudge:
        nudge_rotate_x = IDENTITY_MATRIX
        nudge_rotate_y = IDENTITY_MATRIX
        nudge_translate = np.array([0., 0., 0.])

    else:
        nudge_rotate_x, nudge_rotate_y, nudge_translate = LinearAlgebra.nudge_matrix_generator(stage)

    new_coords = []

    for atom_position in structure_one.get_coords():
        if stage == "R":
            new_coord = np.dot(
                nudge_rotate_x,
                (
                    np.dot(
                        nudge_rotate_y,
                        np.dot(
                            (
                                (np.dot(z_flipper, atom_position))
                                + (distance_multiplier * Z_UNIT_VECTOR)
                            ),
                            z_rotation_offset
                        )
                    )
                ),
            ) + (nudge_translate)
        elif stage == "P":
            new_coord = np.dot(
                nudge_rotate_x,
                (
                    np.dot(
                        nudge_rotate_y,
                        np.dot(
                            (
                                (np.dot(z_flipper, atom_position))
                                + (distance_multiplier * Z_UNIT_VECTOR)
                            ),
                            z_rotation_offset
                        )
                    )
                ),
            ) + ((-1) * nudge_translate)
        elif stage == "T":
            new_coord = np.dot(
                z_rotation_offset,
                (
                    np.dot(z_flipper, atom_position)
                    + ((distance_multiplier - 0.1) * Z_UNIT_VECTOR)
                )
            )
        new_coords.append(new_coord)

    structure_two = Geometry.from_list(structure_one.get_elements(), new_coords)

    return structure_two

In [38]:
def structure_checker(
    structure_one: Geometry,
    structure_two: Geometry,
    mol: Molecule,
    attempt_number: int,
) -> bool:
    """Check for overlap between the two molecules
    
    Parameters
    ----------
    structure_one : ArrayLike
        The structure generated from `structure_one_generator()`.
    structure_two : ArrayLike
        The structure generated from `structure_two_generator()`.
    mol : Molecule
        A Molecule object
    attempt_number : int
        The attempt number for this run, typical maximum is 2 (3 possible tries)

    Returns
    -------
    bool
        True indicates that the structures have some overlap 
        (atoms within 1.5 Angstroms of each other).
        False indicates there is no detected overlap.
    """

    n = mol.get_proton_index(attempt_number)
    for i, atom_one in enumerate(structure_one):
        for j, atom_two in enumerate(structure_two):
            if (i == n) or (j == n - 1):
                continue
            # Only executes if not proton involved in transport
            if LinearAlgebra.distance_calculator(atom_one.xyz, atom_two.xyz) < 1.5:
                return True
            else:
                continue
    # We checked all atoms, none overlapped
    return False

In [39]:
def final_structure_generator(
    structure_one: Geometry,
    structure_two: Geometry,
    stage: str,
    mol: Molecule,
    attempt_number: int,
    distance_multiplier: float = 2.7,
) -> Geometry:
    """Generate the complete structure. Should only be used after checking for/handling overlap.

    Parameters
    ----------
    structure_one : ArrayLike
        The structure generated from `structure_one_generator()`.
    structure_two : ArrayLike
        The structure generated from `structure_two_generator()`.
    stage : {"R", "P", "T"}
        Stage of the reaction ("R"=Reactant, "P"=Product, "T"=Transition).
    mol : Molecule
        The current Molecule object.
    attempt_number : {0, 1, 2}
        The attempt number.
    distance_multiplier : float, default=2.7
        The distance between the parent charge sites in Angstroms.

    Returns
    -------
    final_structure : list[list[str]]
        The final assembled structure in a list of lists with the atomic symbol
        and the XYZ coordinates.
    """

    reactant_proton = structure_one.pop_atom(
        mol.proton_position_indices[attempt_number]
    )
    product_proton = structure_two.pop_atom(
        mol.proton_position_indices[attempt_number]
    )
    transition_proton = Atom("H", np.array([0., 0., (distance_multiplier - 0.01) / 2]))

    final_structure = structure_one + structure_two

    if stage == "R":
        final_structure.add_atom(reactant_proton)
    elif stage == "P":
        final_structure.add_atom(product_proton)
    elif stage == "T":
        final_structure.add_atom(transition_proton)

    return final_structure

In [40]:
def overlap_handler(
    structure_one: Geometry,
    structure_two: Geometry,
    stage: str,
    mol: Molecule,
    z_rotation_offset: npt.NDArray,
    distance_multiplier: float = 2.7,
) -> tuple[Geometry, Geometry, int]:
    """Fix molecule overlap issues

    First tries to rotate the molecule around the Z-axis,
    if that fails, switches to a different proton position.

    Parameters
    ----------
    structure_one : ArrayLike
        The structure generated from `structure_one_generator()`.
    structure_two : ArrayLike
        The structure generated from `structure_two_generator()`.
    stage : {"R", "P", "T"}
        Stage of the reaction ("R"=Reactant, "P"=Product, "T"=Transition).
    mol : Molecule
        The current Molecule object.
    z_rotation_offset : NDArray
        A matrix that rotates around the Z-axis.
    distance_multiplier : float, (default=2.7)
        The distance between the parent charge sites in Angstroms.

    Returns
    -------
    structure_one : list[list[float]]
        A corrected version of `structure_one`.
    structure_two : list[list[float]]
        A corrected version of `structure_two`.
    attempt_number : {0, 1, 2}
        Analogous to the proton position used.
    """

    proton_position_attempts = len(mol.proton_position_indices)
    attempt_number = 0

    while (
        structure_checker(structure_one, structure_two, mol, attempt_number)
        and attempt_number < proton_position_attempts
    ):
        z_rotation_offset = Z_ROTATION_180

        proton_position = mol.get_proton_position(attempt_number)

        alignment_matrix = LinearAlgebra.gen_alignment_matrix(proton_position, Z_UNIT_VECTOR)

        structure_one = structure_one_generator(
            alignment_matrix, mol
        )

        z_flipper = LinearAlgebra.gen_alignment_matrix(
            vector_one=structure_one.get_coords()[mol.get_proton_index(attempt_number)],
            vector_two=structure_one.get_coords()[mol.neighboring_atom_index],
            alignment_angle=np.pi
        )

        structure_two = structure_two_generator(
            stage,
            z_rotation_offset,
            structure_one,
            distance_multiplier,
            z_flipper,
            disable_nudge=True,
        )

        # If the structure works, then return it and exit the function
        if not structure_checker(
            structure_one, structure_two, mol, attempt_number
        ):
            print("Geometry was fixed with proton position #" + str(attempt_number))
            return structure_one, structure_two, attempt_number

        i = 0
        while (
            structure_checker(
                structure_one, structure_two, mol, attempt_number
            )
            and i < 8
        ):
            # 45 degree increments
            rotation_angle = i * (np.pi / 4)

            z_rotation_offset = np.array(
                [
                    [np.cos(rotation_angle), -np.sin(rotation_angle), 0],
                    [np.sin(rotation_angle), np.cos(rotation_angle), 0],
                    [0, 0, 1],
                ],
                dtype=float,
            )

            z_flipper = LinearAlgebra.gen_alignment_matrix(
                vector_one=structure_one.get_coords()[mol.get_proton_index(attempt_number)],
                vector_two=structure_one.get_coords()[mol.neighboring_atom_index],
                alignment_angle=np.pi
            )

            structure_two = structure_two_generator(
                stage,
                z_rotation_offset,
                structure_one,
                distance_multiplier,
                z_flipper,
                disable_nudge=True,
            )

            if not structure_checker(
                structure_one, structure_two, mol, attempt_number
            ):
                print(
                    f"Geometry was fixed with proton position #{attempt_number} with a rotation of {i * 45} degrees."
                )
                return structure_one, structure_two, attempt_number

            i += 1

        attempt_number += 1
    print("Geometry could not be fixed.")
    return structure_one, structure_two, attempt_number

In [41]:
class Data:
    
    def __init__(
        self,
        xyz_file_path: PathLike = None,
        smiles_strings: list[str] = None,
        names: list[str] = None,
        stage: str = "R",
    ):
        self.xyz_file_path = (
            Path(xyz_file_path)
            if xyz_file_path is not None
            else multiple_input(get_mmd=False)
        )
        self.stage = stage

        if names is None or smiles_strings is None:
            multiple_molecule_data = multiple_input(get_xyz_path=False)

        self.names: list[str] = (
            names
            if names is not None
            else [i[1] for i in multiple_molecule_data]
        )
        self.smiles_strings: list[str] = (
            smiles_strings
            if smiles_strings is not None
            else [i[0] for i in multiple_molecule_data]
        )
    

    def __repr__(self):
        self_repr = f"{"":6}{"Name":12}{"SMILES":11}\n\n"
        for i, smiles in enumerate(self.smiles_strings):
            self_repr += f"{str(i+1)+".":6}{self.names[i]:12}{smiles:20}\n"
        return self_repr
    

    def build_neb(
        self,
        index: int,
        has_xyz_data: bool = False,
        disable_nudge: bool = False,
    ):
        distance_multiplier = DISTANCE_MULTIPLIERS[0]

        if has_xyz_data:
            structure = Geometry.from_xyz(
                self.xyz_file_path/Path(f"{self.names[index]}.xyz"),
                charge=Molecule.calculate_charge(self.smiles_strings[index])
            )
        else:
            structure = Geometry.from_smiles(self.smiles_strings[index])
        
        if structure.charge == 0:
            raise RuntimeError("Molecule must be protonated for NEB structure generation!")

        attempt_number = 0

        mol = Molecule(
            self.smiles_strings[index],
            structure,
            self.names[index],
        )

        z_rotation_offset = IDENTITY_MATRIX

        proton_position = mol.get_proton_position(attempt_number)

        alignment_matrix = LinearAlgebra.gen_alignment_matrix(proton_position, Z_UNIT_VECTOR)

        structure_one = structure_one_generator(
            alignment_matrix, mol
        )

        z_flipper = LinearAlgebra.gen_alignment_matrix(
            vector_one=structure_one.get_coords()[mol.get_proton_index(attempt_number)],
            vector_two=structure_one.get_coords()[mol.neighboring_atom_index],
            alignment_angle=np.pi
        )

        structure_two = structure_two_generator(
            self.stage,
            z_rotation_offset,
            structure_one,
            distance_multiplier,
            z_flipper,
            disable_nudge,
        )

        print(mol.molecule_name)

        attempt_number = 0

        if structure_checker(structure_one, structure_two, mol, 0):
            print("Atomic Overlap Detected, attempting to fix...")
            structure_one, structure_two, attempt_number = overlap_handler(
                structure_one,
                structure_two,
                self.stage,
                mol,
                z_rotation_offset,
                distance_multiplier,
            )

        final_structure = final_structure_generator(
            structure_one,
            structure_two,
            self.stage,
            mol,
            attempt_number,
            distance_multiplier,
        )
        
        final_structure.to_xyz(
            name=f"{mol.molecule_name}-{self.stage}",
            xyz_dir=self.xyz_file_path
        )


    def build_single(self, index: int):
        structure = Geometry.from_smiles(self.smiles_strings[index])

        if structure.charge == 0:
            self.stage = "N"
        elif structure.charge > 0:
            self.stage = "C"

        structure.to_xyz(
            name=f"{self.names[index]}-{self.stage}",
            xyz_dir=self.xyz_file_path
        )


    def build_all_neb(self, has_xyz_data: bool = False, disable_nudge: bool = False):
        for i in range(len(self.names)):
            Data.build_neb(self, index=i, has_xyz_data=has_xyz_data, disable_nudge=disable_nudge)


    def build_all_single(self):
        for i in range(len(self.names)):
            Data.build_single(self, index=i)