In [None]:
from __future__ import annotations
import platform
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
import numpy.typing as npt
import csv
from tkinter import Tk
from tkinter import filedialog
from os import PathLike
from pathlib import Path

if platform.system() == "Windows":
    from ctypes import windll
    windll.user32.SetProcessDPIAware()

In [None]:
# Set commonly used variables

Z_ROTATION_180 = np.array(
    [
        [-1, 0, 0],
        [0, -1, 0],
        [0,  0, 1]
    ],
    dtype=float
)

# Array of values for the distances between parent atoms
DISTANCE_MULTIPLIERS = [2.7, 2.8, 2.9, 3.0, 3.1, 3.2]

Z_UNIT_VECTOR = np.array([0.0, 0.0, 1.0])

Z_REFLECTOR = np.array(
    [
        [1, 0, 0],
        [0, 1, 0],
        [0, 0,-1],
    ],
    dtype=float
)

IDENTITY_MATRIX = np.array(
    [
        [1, 0, 0],
        [0, 1, 0],
        [0, 0, 1],
    ],
    dtype=float
)

In [None]:
class LinearAlgebra:

    @staticmethod
    def normalize(vector: npt.NDArray) -> npt.NDArray:
        """Return the corresponding unit vector for a given vector.

        Notes
        -----
        The normalization of a vector in Euclidean geometry to its 
        corresponding unit vector is accomplished by dividing the 
        vector by its Euclidean norm, given as the square root of the sum
        of its components squared.

        The normalization of a matrix to a unit matrix is accomplished by
        dividing the matrix by its Frobenius norm, a generalization of the
        Euclidean norm to an N by M matrix. It can be calculated by taking the
        trace of the conjugate transpose of the matrix times the matrix itself.
        """

        norm = np.linalg.norm(vector)

        if norm != 0:
            return vector / norm
        else:
            return vector


    @staticmethod
    def quaternion_builder(
        vector: npt.ArrayLike,
        angle: float,
    ) -> npt.NDArray:
        """Build a quaternion matrix.

        Parameters
        ----------
        vector : ArrayLike
            The vector to rotate around (can be list or NDArray)
        angle : float
            The angle to rotate by (in radians)

        Returns
        -------
        quaternion_matrix : NDArray
            A quaternion matrix that rotates by `angle` around `vector`
        """

        quaternion = [
            np.cos(angle / 2),
            vector[0] * np.sin(angle / 2),
            vector[1] * np.sin(angle / 2),
            vector[2] * np.sin(angle / 2),
        ]

        # The normalized quaternion vector
        norm_quat = LinearAlgebra.normalize(quaternion)

        quaternion_matrix = np.array(
            [
                [
                    1 - 2 * (norm_quat[2] ** 2 + norm_quat[3] ** 2),
                    2 * (norm_quat[1] * norm_quat[2] - norm_quat[0] * norm_quat[3]),
                    2 * (norm_quat[1] * norm_quat[3] + norm_quat[0] * norm_quat[2]),
                ],
                [
                    2 * (norm_quat[1] * norm_quat[2] + norm_quat[0] * norm_quat[3]),
                    1 - 2 * (norm_quat[1] ** 2 + norm_quat[3] ** 2),
                    2 * (norm_quat[2] * norm_quat[3] - norm_quat[0] * norm_quat[1]),
                ],
                [
                    2 * (norm_quat[1] * norm_quat[3] - norm_quat[0] * norm_quat[2]),
                    2 * (norm_quat[2] * norm_quat[3] + norm_quat[0] * norm_quat[1]),
                    1 - 2 * (norm_quat[1] ** 2 + norm_quat[2] ** 2),
                ],
            ]
        )

        return quaternion_matrix


    @staticmethod
    def vector_angle(vector_one: npt.ArrayLike, vector_two: npt.ArrayLike) -> float:
        """Calculate the angle (in radians) between two vectors."""

        unit_vector_one = LinearAlgebra.normalize(vector_one)
        unit_vector_two = LinearAlgebra.normalize(vector_two)

        angle = np.arccos(
            np.clip(
                np.dot(unit_vector_one, unit_vector_two),
                -1.0,
                1.0,
            )
        )

        return float(angle)


    @staticmethod
    def distance_calculator(
        vector_one: npt.ArrayLike, vector_two: npt.ArrayLike
    ) -> np.float64:
        """Calculate the Euclidean distance between two vectors."""

        distance = np.absolute(
            np.sqrt(
                ((float(vector_one[0]) - float(vector_two[0])) ** 2)
                + ((float(vector_one[1]) - float(vector_two[1])) ** 2)
                + ((float(vector_one[2]) - float(vector_two[2])) ** 2)
            )
        )

        return distance


    @staticmethod
    def nudge_matrix_generator(stage: str) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray]:
        """Creates the nudge matrices and vector for avoiding local minima during optimization.

        Parameters
        ----------
        stage : {"Reactant", "Product", "Transition"}
            Stage of the reaction.

        Returns
        -------
        nudge_rotate_x : NDArray
            An array that rotates around the x-axis by +/- 15 degrees
        nudge_rotate_y : NDArray
            An array that rotates around the y-axis by +/- 20 degrees
        nudge_translate : NDArray
            A vector that nudges the molecule by a set amount

        Notes
        -----
        Calling this for `stage="Reactant"` yields rotation matrices that rotate by +15 degrees
        in the X-axis and +20 degrees in the Y-axis. The opposite is true for `stage="Product"`.
        Calling this for `stage="Transition"` returns the identity matrix for rotations. 
        """

        # xv_degree (15 Degrees)
        xv_degree = np.pi / 12
        # xx_degree (20 Degrees)
        xx_degree = np.pi / 9

        nudge_translate = np.array([0, 0, 0])

        if stage == "Reactant":
            nudge_rotate_x = np.array(
                [
                    [1, 0, 0],
                    [0, np.cos(xv_degree), -np.sin(xv_degree)],
                    [0, np.sin(xv_degree), np.cos(xv_degree)],
                ],
                dtype=float,
            )

            nudge_rotate_y = np.array(
                [
                    [np.cos(xx_degree), 0, np.sin(xx_degree)],
                    [0, 1, 0],
                    [-np.sin(xx_degree), 0, np.cos(xx_degree)],
                ],
                dtype=float,
            )
        elif stage == "Product":
            nudge_rotate_x = np.array(
                [
                    [1, 0, 0],
                    [0, np.cos(-xv_degree), -np.sin(-xv_degree)],
                    [0, np.sin(-xv_degree), np.cos(-xv_degree)],
                ],
                dtype=float,
            )

            nudge_rotate_y = np.array(
                [
                    [np.cos(-xx_degree), 0, np.sin(-xx_degree)],
                    [0, 1, 0],
                    [-np.sin(-xx_degree), 0, np.cos(-xx_degree)],
                ],
                dtype=float,
            )
        elif stage == "Transition":
            nudge_rotate_x = IDENTITY_MATRIX
            nudge_rotate_y = IDENTITY_MATRIX

        return nudge_rotate_x, nudge_rotate_y, nudge_translate


    @staticmethod
    def gen_alignment_matrix(
        vector_one: npt.ArrayLike,
        vector_two: npt.ArrayLike | None = None,
        alignment_angle: float | None = None,
    ):
        """Generate a matrix to align two vectors.
        
        Parameters
        ----------
        vector_one : ArrayLike
            The vector that will be aligned.
        vector_two : ArrayLike | None, default=None
            The vector to which `vector_one` will be aligned. Default aligns to the Z-axis.

        Notes
        -----
        This method calls `LinearAlgebra.vector_angle(vector_one, vector_two)`,
        then `LinearAlgebra.normalize(np.cross(vector_one, vector_two))`, and feeds the resulting
        alignment vector and alignment angle to `LinearAlgebra.quaternion_builder()`.
        """

        if vector_two is None:
            vector_two = Z_UNIT_VECTOR

        if alignment_angle is None:
            alignment_angle = LinearAlgebra.vector_angle(vector_one, vector_two)

        alignment_vector = LinearAlgebra.normalize(np.cross(vector_one, vector_two))

        alignment_quaternion = LinearAlgebra.quaternion_builder(alignment_vector, alignment_angle)

        return alignment_quaternion

In [None]:
def open_folder(
    initial_dir: PathLike | str | None = None,
    must_exist: bool = False,
    title: str = "Select Directory"
):
    """Open a file browser dialog to select a folder.
    
    Parameters
    ----------
    initial_dir : PathLike | str | None, default=None
        The initial directory for the file dialog. Leaving this blank makes the file path the current working directory.
    must_exist : bool, default=False
        Require the directory to already exist.
    title : str, default="Select Directory"
        The title of the file dialog, should help the user understand what the dialog is for.

    Returns
    -------
    dir_path : PathLike
        The path to the directory that was selected.
    """

    if initial_dir is None:
        initial_dir = Path.cwd()

    root = Tk()
    root.wm_attributes("-topmost", 1)
    root.withdraw()

    dir_path = filedialog.askdirectory(
        initialdir=initial_dir,
        mustexist=must_exist,
        parent=root,
        title=title,
    )

    root.destroy()

    return Path(dir_path)

In [None]:
def open_file(
    default_extension: str | None = "",
    file_types: list[list[str] | str] | None = None,
    initial_dir: PathLike | None = None,
    initial_file: PathLike | None = None,
    title: str = "Select a file.",
):
    """Open a file browser dialog to select a file.
    
    Parameters
    ----------
    default_extension : str | None = ""
        The default extension that is appended to file names if they do not have an extension specified.
    file_types : list[str, str] | None = None
        The file types that are allowed to be selected. Format is `[[name1, ext1], [name2, ext2], ...]`.
    initial_dir : PathLike | str | None, default=None
        The initial directory for the file dialog. Leaving this blank makes the file path the current working directory.
    initial_file : PathLike | None, default=None
        An initial filename that is displayed in the dialog.
    title : str, default="Select Directory"
        The title of the file dialog, should help the user understand what the dialog is for.

    Returns
    -------
    file_path : PathLike
        The path to the file that was selected.
    """
    
    if initial_dir is None:
        initial_dir = Path.cwd()

    if file_types is not None:
        file_types = tuple((type[0], type[1]) for type in file_types)
    else:
        file_types = ()

    root = Tk()
    root.wm_attributes("-topmost", 1)
    root.withdraw()

    file_path = filedialog.askopenfilename(
        defaultextension=default_extension,
        filetypes=file_types,
        initialdir=initial_dir,
        initialfile=initial_file,
        parent=root,
        title=title,
    )

    root.destroy()

    return Path(file_path)

In [None]:
class Atom:
    """Class containing the information of a single atom.
    
    Attributes
    ----------
    element : str
        The atomic symbol of the element.
    xyz : NDArray
        The x-, y-, and z-coordinates of the atom.
    """

    def __init__(
        self,
        element: str,
        xyz: npt.NDArray,
    ):
        self.element = element
        self.xyz = np.array(xyz, dtype=np.float64)


    def __repr__(self):
        return (
            f"{"Element":12}" f"{"X":11}" f"{"Y":11}" f"{"Z":11}\n"
            f"{self.element:9}"f"{self.xyz[0]:11.6f}"f"{self.xyz[1]:11.6f}"f"{self.xyz[2]:11.6f}\n"
        )

In [None]:
class Geometry:
    """Class storing the geometry of a single molecule.
    
    Attributes
    ----------
    atoms : list[Atom]
        A list of the atoms in the molecule.
    charge : int, default=0
        The total formal charge of the molecule.
    """

    def __init__(
        self,
        atoms: list[Atom],
        charge: int = 0,
    ):
        self.atoms = atoms
        self.charge = charge


    def get_num_atoms(self) -> int:
        return len(self.atoms)


    def get_coords(self) -> npt.NDArray:
        return np.array([i.xyz for i in self.atoms])


    def get_elements(self) -> list[str]:
        return [i.element for i in self.atoms]


    def new_xyz(self, new_xyzs: npt.NDArray):
        for i in range(len(self.atoms)):
            self.atoms[i].xyz = new_xyzs[i]


    def remove_atom(self, index: int):
        del self.atoms[index]


    def pop_atom(self, index: int) -> Atom:
        return self.atoms.pop(index)


    def add_atom(self, atom: Atom, index: int = None):
        if index is not None:
            self.atoms.insert(index, atom)
        else:
            self.atoms.append(atom)


    @classmethod
    def from_xyz(cls, file_path: str, charge: int = 0):
        """Read in XYZ file format and return atomic symbols and coordinates

        Parameters
        ----------
        file_path : str
            Full path to an XYZ file.

        Returns
        -------
        Geometry object
        """

        molecule_xyz = []

        with open(file_path) as f:
            for line in f:
                # Takes each line in the file minus the last character, which is just the \n
                line = line[:-1].split()
                if line:
                    # If the line isn't empty then append to the molecule_xyz
                    molecule_xyz.append(line)

        elements = [str(i[0]) for i in molecule_xyz[2:]]
        xyzs = np.array([[float(j) for j in i[1:]] for i in molecule_xyz[2:]])

        atoms = []

        for i, element in enumerate(elements):
            atoms.append(Atom(element, xyzs[i]))

        return Geometry(atoms, charge)


    @classmethod
    def from_smiles(cls, smiles_string: str):
        """Convert a SMILES string to a 3D molecule.

        Parameters
        ----------
        smiles_string : str
            Any valid SMILES string (for example, "N#Cc1nn[nH]c(C#N)1")

        Returns
        -------
        Geometry object
        """

        molecule = Chem.AddHs(Chem.MolFromSmiles(smiles_string))
        AllChem.EmbedMolecule(molecule, AllChem.ETKDGv3())
        charge = Chem.rdmolops.GetFormalCharge(molecule)

        xyz_string = Chem.rdmolfiles.MolToXYZBlock(molecule)
        molecule_xyz = [i.split() for i in xyz_string.split("\n")[2:-1]]

        elements = [str(i[0]) for i in molecule_xyz]
        xyzs = np.array([[float(j) for j in i[1:]] for i in molecule_xyz])

        atoms = []

        for i, element in enumerate(elements):
            atoms.append(Atom(element, xyzs[i]))

        return Geometry(atoms, charge)


    @classmethod
    def from_list(cls, elements: list[str], xyzs: npt.NDArray, charge: int = 0):
        if len(elements) != len(xyzs):
            raise ValueError("The list of elements and coordinates must be of the same size!")
        else:
            atoms = []
            for i, element in enumerate(elements):
                atoms.append(Atom(element, xyzs[i]))

        return Geometry(atoms, charge)


    def to_xyz(self, name: str, xyz_dir: PathLike | None = None):
        if xyz_dir is None:
            xyz_dir = Path.cwd()

        with open(xyz_dir/Path(f"{name}.xyz"), "w", newline="") as xyz_file:
            xyz_file.write(f"{self.get_num_atoms()}\n\n")
            for atom in self.atoms:
                xyz_file.write(f"{atom.element:3}{atom.xyz[0]:12.6f}{atom.xyz[1]:12.6f}{atom.xyz[2]:12.6f}\n")


    def __repr__(self):
        self_repr = f"{"Element":12}{"X":11}{"Y":11}{"Z":11}\n\n"
        for i in self.atoms:
            self_repr += f"{i.element:9}{i.xyz[0]:11.6f}{i.xyz[1]:11.6f}{i.xyz[2]:11.6f}\n"
        return self_repr


    def __add__(self, structure: Geometry):
        s1 = self.atoms
        s2 = structure.atoms
        return Geometry(s1+s2, self.charge+structure.charge)


    def __iter__(self):
        yield from self.atoms

    #def write_orca_input(self, name: str, input_dir: PathLike = Path("./"), **kwargs):

In [None]:
class Molecule:
    """Class that contains all molecule data.

    Attributes
    ----------
    smiles_string : str
        Any valid SMILES string.
    structure : Geometry
        The structure of the molecule
    molecule_name : str, optional
        The molecule name that will be used for naming output XYZ and INP files.
    protonated_atom_index : int, optional
        The line index of the atom that carries an extra proton (the parent charge site).
    neighboring_atom_index : int, optional
        The line index of an atom attached to the parent charge site.
    proton_position_indices : list of int, optional
        A list of the line indexes for each proton attached to the protonated atom.
    charge : int, default=1
        The charge of the molecule.

    Methods
    -------
    get_proton_position(attempt_number)
        Return the line number of the proton of interest.
    get_protonated_atom_position()
        Return the XYZ coordinates of the parent charge site.
    calculate_charge(smiles)
        Calculate the charge of a molecule denoted by a SMILES string
    """

    def __init__(
        self,
        smiles_string: str,
        structure: Geometry = None,
        molecule_name: str = None,
        protonated_atom_index: int = None,
        neighboring_atom_index: int = None,
        proton_position_indices: list[int] = None,
    ):
        self.smiles_string = smiles_string

        if structure is not None:
            self.structure = structure
        else:
            self.structure = Geometry.from_smiles(self.smiles_string)

        self.molecule_name = molecule_name if molecule_name is not None else "bean"

        if protonated_atom_index is not None or self.structure.charge == 0:
            self.protonated_atom_index = protonated_atom_index
        else:
            self.protonated_atom_index = (
                Molecule.substructure_match(self.smiles_string, "protonated_atom")
            )

        if neighboring_atom_index is not None or self.structure.charge == 0:
            self.neighboring_atom_index = neighboring_atom_index
        else:
            self.neighboring_atom_index = (
                Molecule.substructure_match(self.smiles_string, "neighbor")
            )

        if proton_position_indices is not None or self.structure.charge == 0:
            self.proton_position_indices = proton_position_indices
        else:
            self.proton_position_indices = (
                Molecule.substructure_match(self.smiles_string, "protons")
            )

        if self.structure.charge > 0:
            self.structure.new_xyz(
                self.structure.get_coords()
                - self.structure.get_coords()[self.protonated_atom_index]
            )


    def get_proton_index(self, attempt_number: int) -> int:
        return self.proton_position_indices[attempt_number]


    def get_num_atoms(self) -> int:
        """Get the number of atoms in the molecule."""
        return self.structure.get_num_atoms()


    def get_coords(self) -> npt.NDArray:
        return self.structure.get_coords()


    def get_elements(self) -> list[str]:
        return self.structure.get_elements()


    def get_charge(self) -> int:
        return self.structure.charge


    def remove_atom(self, index: int):
        _ = self.structure.remove_atom(index)


    def pop_atom(self, index: int):
        return self.structure.pop_atom(index)


    def add_atom(self, atom: Atom, index: int = None):
        self.structure.add_atom(atom, index)


    def get_proton_position(self, attempt_number: int) -> npt.NDArray:
        """Get the coordinates for the proton requested.

        Parameters
        ----------
        attempt_number : {0, 1, 2}
            The current attempt number.

        Returns
        -------
        proton_position: NDArray
            The coordinates of the requested proton.
        """
        return self.get_coords()[self.get_proton_index(attempt_number)]


    def get_protonated_atom_position(self) -> npt.NDArray:
        """Get the XYZ coordinates of the parent charge site."""
        return self.get_coords()[self.protonated_atom_index]


    @staticmethod
    def calculate_charge(smiles: str):
        return Chem.rdmolops.GetFormalCharge(Chem.MolFromSmiles(smiles))
    

    @staticmethod
    def substructure_match(smiles_string: str, match_type: str) -> tuple[int, list[int], int]:
        """Generate an RDKit molecule and search the structure for protonated atoms.

        Parameters
        ----------
        smiles_string : str
            Any valid SMILES string.
        match_type : {"protons", "protonated_atom", "neighbor"}
            Feature in the structure to match to.

        Returns
        -------
        protonated_atom_index : int
            The line number of the atom with an extra proton (the parent charge site).
        proton_position_indices : list[int]
            A list containing any protons attached to the parent charge site.
        near_neighbor_index : int
            The line number of an atom directly connected to the parent charge site.

        Notes
        -----
        The features available for matching are `"protons"`, `"protonated_atom"`, and `"neighbor"`

        `"protons"` will search for any protons attached to a protonated atom, e.g. R-NH3+, R-OH2+, and return the
        line indexes of each proton found.

        `"protonated_atom"` will search for the protonated atom itself, and return its line number. If there are multiple
        protonated atoms, it will return the first one.

        `"neighbor"` will search for the neighboring atoms to the protonated atom, except for the hydrogens, and return
        the line index of one of the neighbors.
        """

        molecule = Chem.AddHs(Chem.MolFromSmiles(smiles_string))
        params = AllChem.ETKDGv3()
        AllChem.EmbedMolecule(molecule, params)

        if match_type == "protons":
            proton_atom = Chem.MolFromSmarts(
                "[$([#1][#7H+]),$([#1][#7H2+]),$([#1][#7H3+]),$([#1][#8H+]),$([#1][#8H2+])]"
            )
            proton_position = molecule.GetSubstructMatches(proton_atom)

            proton_position_indices: list[int] = [i[0] for i in proton_position]
            return proton_position_indices

        elif match_type == "protonated_atom":
            charged_atom = Chem.MolFromSmarts("[#7H+,#7H2+,#7H3+,#8H+,#8H2+]")
            parent_position = molecule.GetSubstructMatches(charged_atom)

            protonated_atom_index: int = parent_position[0][0]
            return protonated_atom_index

        elif match_type == "neighbor":
            near_neighbor = Chem.MolFromSmarts(
                "[$([*][#7H+]),$([*][#7H2+]),$([*][#7H3+]),$([*][#8H+]),$([*][#8H2+])]"
            )
            near_neighbor = molecule.GetSubstructMatches(near_neighbor)
        
            near_neighbor_index: int = near_neighbor[0][0]
            return near_neighbor_index
        
        else:
            raise ValueError('Invalid Match Type! Please select from "protons", "protonated_atom", "neighbor"')

In [None]:
def input_generator(
    molecule_name: str,
    stage: str,
    final_structure: list[list[str]],
    save_directory: str = "",
    charge: int = 1,
    dft_method: str = "wB97X-D3BJ",
    basis_set: str = "def2-TZVP",
    nprocs: int = 20,
    optional_input_params: str = ""
):
    """Generate an ORCA input file.

    Parameters
    ----------
    molecule_name : str
        The name of the molecule.
    stage : str
        Either 'R' or 'P' for 'Reactant' or 'Product'.
    final_structure : list[list[str]]
        The structure of the final assembled molecule.
    save_directory : str, default=""
        The directory to which the input file will be saved.
    charge : int, default=1
        The total charge of the system.
    dft_method : str, default="wB97X-D3BJ"
        The chosen density functional.
    basis_set : str, default="def2-TZVP"
        The chosen basis set.
    nprocs : int, default=20
        The number of processes used in the calculation.
    optional_input_params : str, optional
        Any additional input parameters to pass to the keyword line.
    """

    input_specs = (
        f"!Opt {dft_method} {basis_set} LargePrint {optional_input_params}\n"
        "%geom\n"
        "\tMaxIter 200\n"
        "end\n"
        "%pal\n"
        f"\tnprocs {nprocs!s}\n"
        "end\n"
        f"* xyz {charge!s} 1"
    )

    end_line = "*\n"

    file_name = molecule_name + stage

    with open(save_directory + file_name + ".inp", "w", newline="\n") as input_file:
        np.savetxt(
            input_file,
            final_structure,
            fmt="%s",
            header=input_specs,
            footer=end_line,
            comments="",
        )

In [None]:
def structure_one_generator(
    molecule_alignment_quaternion: npt.NDArray,
    mol: Molecule,
) -> Geometry:
    """Align a molecule along Z axis and return the structure"""

    new_coords = []

    for atom_position in mol.get_coords():
        new_coord = np.dot(molecule_alignment_quaternion, atom_position)
        new_coords.append(new_coord)

    structure_one = Geometry.from_list(mol.get_elements(), new_coords)

    return structure_one

In [None]:
def structure_two_generator(
    stage: str,
    z_rotation_offset: npt.NDArray,
    structure_one: Geometry,
    distance_multiplier: float = 2.7,
    z_flipper: npt.NDArray = Z_REFLECTOR,
    disable_nudge: bool = False,
) -> Geometry:
    """Generate the second structure that the proton will be transferred to.

    Parameters
    ----------
    stage : {"Reactant", "Product", "Transition"}
        Stage of the reaction.
    z_rotation_offset : NDArray
        A matrix that rotates around the Z-axis.
    structure_one : ArrayLike
        The first structure generated from `structure_one_generator()`.
    distance_multiplier : float, default=2.7
        The distance between the parent charge sites in Angstroms.

    Returns
    -------
    structure_two : list[list[float]]
        The second structure to which the proton will be transferred.
    """

    if disable_nudge:
        nudge_rotate_x = IDENTITY_MATRIX
        nudge_rotate_y = IDENTITY_MATRIX
        nudge_translate = np.array([0., 0., 0.])

    else:
        nudge_rotate_x, nudge_rotate_y, nudge_translate = LinearAlgebra.nudge_matrix_generator(stage)

    new_coords = []

    for atom_position in structure_one.get_coords():
        if stage == "Reactant":
            new_coord = np.dot(
                nudge_rotate_x,
                (
                    np.dot(
                        nudge_rotate_y,
                        np.dot(
                            (
                                (np.dot(z_flipper, atom_position))
                                + (distance_multiplier * Z_UNIT_VECTOR)
                            ),
                            z_rotation_offset
                        )
                    )
                ),
            ) + (nudge_translate)
        elif stage == "Product":
            new_coord = np.dot(
                nudge_rotate_x,
                (
                    np.dot(
                        nudge_rotate_y,
                        np.dot(
                            (
                                (np.dot(z_flipper, atom_position))
                                + (distance_multiplier * Z_UNIT_VECTOR)
                            ),
                            z_rotation_offset
                        )
                    )
                ),
            ) + ((-1) * nudge_translate)
        elif stage == "Transition":
            new_coord = np.dot(
                z_rotation_offset,
                (
                    np.dot(z_flipper, atom_position)
                    + ((distance_multiplier - 0.1) * Z_UNIT_VECTOR)
                )
            )
        new_coords.append(new_coord)

    structure_two = Geometry.from_list(structure_one.get_elements(), new_coords)

    return structure_two

In [None]:
def structure_checker(
    structure_one: Geometry,
    structure_two: Geometry,
    mol: Molecule,
    attempt_number: int,
) -> bool:
    """Check for overlap between the two molecules

    Parameters
    ----------
    structure_one : ArrayLike
        The structure generated from `structure_one_generator()`.
    structure_two : ArrayLike
        The structure generated from `structure_two_generator()`.
    mol : Molecule
        A Molecule object
    attempt_number : int
        The attempt number for this run, typical maximum is 2 (3 possible tries)

    Returns
    -------
    bool
        True indicates that the structures have some overlap 
        (atoms within 1.5 Angstroms of each other).
        False indicates there is no detected overlap.
    """

    n = mol.get_proton_index(attempt_number)
    for i, atom_one in enumerate(structure_one):
        for j, atom_two in enumerate(structure_two):
            if (i == n) or (j == n - 1):
                continue
            # Only executes if not proton involved in transport
            if LinearAlgebra.distance_calculator(atom_one.xyz, atom_two.xyz) < 1.5:
                return True
            else:
                continue
    # We checked all atoms, none overlapped
    return False

In [None]:
def final_structure_generator(
    structure_one: Geometry,
    structure_two: Geometry,
    stage: str,
    mol: Molecule,
    attempt_number: int,
    distance_multiplier: float = 2.7,
) -> Geometry:
    """Generate the complete structure. Should only be used after checking for/handling overlap.

    Parameters
    ----------
    structure_one : Geometry
        The structure generated from `structure_one_generator()`.
    structure_two : Geometry
        The structure generated from `structure_two_generator()`.
    stage : {"Reactant", "Product", "Transition"}
        Stage of the reaction..
    mol : Molecule
        The current Molecule object.
    attempt_number : {0, 1, 2}
        The attempt number.
    distance_multiplier : float, default=2.7
        The distance between the parent charge sites in Angstroms.

    Returns
    -------
    final_structure : Geometry
        The final assembled structure as a Geometry object.
    """

    reactant_proton = structure_one.pop_atom(
        mol.proton_position_indices[attempt_number]
    )
    product_proton = structure_two.pop_atom(
        mol.proton_position_indices[attempt_number]
    )
    transition_proton = Atom("H", np.array([0., 0., (distance_multiplier - 0.01) / 2]))

    final_structure = structure_one + structure_two

    if stage == "Reactant":
        final_structure.add_atom(reactant_proton)
    elif stage == "Product":
        final_structure.add_atom(product_proton)
    elif stage == "Transition":
        final_structure.add_atom(transition_proton)

    return final_structure

In [None]:
def overlap_handler(
    structure_one: Geometry,
    structure_two: Geometry,
    stage: str,
    mol: Molecule,
    z_rotation_offset: npt.NDArray,
    distance_multiplier: float = 2.7,
) -> tuple[Geometry, Geometry, int]:
    """Fix molecule overlap issues

    First tries to rotate the molecule around the Z-axis,
    if that fails, switches to a different proton position.

    Parameters
    ----------
    structure_one : ArrayLike
        The structure generated from `structure_one_generator()`.
    structure_two : ArrayLike
        The structure generated from `structure_two_generator()`.
    stage : {"Reactant", "Product", "Transition"}
        Stage of the reaction..
    mol : Molecule
        The current Molecule object.
    z_rotation_offset : NDArray
        A matrix that rotates around the Z-axis.
    distance_multiplier : float, (default=2.7)
        The distance between the parent charge sites in Angstroms.

    Returns
    -------
    structure_one : list[list[float]]
        A corrected version of `structure_one`.
    structure_two : list[list[float]]
        A corrected version of `structure_two`.
    attempt_number : {0, 1, 2}
        Analogous to the proton position used.
    """

    proton_position_attempts = len(mol.proton_position_indices)
    attempt_number = 0

    while (
        structure_checker(structure_one, structure_two, mol, attempt_number)
        and attempt_number < proton_position_attempts
    ):
        z_rotation_offset = Z_ROTATION_180

        proton_position = mol.get_proton_position(attempt_number)

        alignment_matrix = LinearAlgebra.gen_alignment_matrix(proton_position, Z_UNIT_VECTOR)

        structure_one = structure_one_generator(
            alignment_matrix, mol
        )

        z_flipper = LinearAlgebra.gen_alignment_matrix(
            vector_one=structure_one.get_coords()[mol.get_proton_index(attempt_number)],
            vector_two=structure_one.get_coords()[mol.neighboring_atom_index],
            alignment_angle=np.pi
        )

        structure_two = structure_two_generator(
            stage,
            z_rotation_offset,
            structure_one,
            distance_multiplier,
            z_flipper,
            disable_nudge=True,
        )

        # If the structure works, then return it and exit the function
        if not structure_checker(
            structure_one, structure_two, mol, attempt_number
        ):
            print("Geometry was fixed with proton position #" + str(attempt_number))
            return structure_one, structure_two, attempt_number

        i = 0
        while (
            structure_checker(
                structure_one, structure_two, mol, attempt_number
            )
            and i < 8
        ):
            # 45 degree increments
            rotation_angle = i * (np.pi / 4)

            z_rotation_offset = np.array(
                [
                    [np.cos(rotation_angle), -np.sin(rotation_angle), 0],
                    [np.sin(rotation_angle), np.cos(rotation_angle), 0],
                    [0, 0, 1],
                ],
                dtype=float,
            )

            z_flipper = LinearAlgebra.gen_alignment_matrix(
                vector_one=structure_one.get_coords()[mol.get_proton_index(attempt_number)],
                vector_two=structure_one.get_coords()[mol.neighboring_atom_index],
                alignment_angle=np.pi
            )

            structure_two = structure_two_generator(
                stage,
                z_rotation_offset,
                structure_one,
                distance_multiplier,
                z_flipper,
                disable_nudge=True,
            )

            if not structure_checker(
                structure_one, structure_two, mol, attempt_number
            ):
                print(
                    f"Geometry was fixed with proton position #{attempt_number} with a rotation of {i * 45} degrees."
                )
                return structure_one, structure_two, attempt_number

            i += 1

        attempt_number += 1
    print("Geometry could not be fixed.")
    return structure_one, structure_two, attempt_number

In [None]:
class Data:
    """Class to contain a list of SMILES and names for generating NEB or single-molecule structures.
    
    Attributes
    ----------
    smiles_strings : list[str]
        A list of the SMILES strings for the molecules.
    names : list[str]
        A list of the names of each molecule.

    Methods
    -------
    build_neb(index, stage, xyz_save_dir, xyz_data_dir, disable_nudge, distance_multiplier)
        Create a single-atom transfer NEB structure for a specified molecule in the list.
    build_single(index, xyz_save_dir)
        Create a single molecule structure from a SMILES string in the list.
    build_all_neb(stage, xyz_save_dir, xyz_data_dir, disable_nudge, distance_multiplier)
        Create single-atom transfer NEB structures for all molecules in the list.
    build_all_single(xyz_save_dir)
        Create a single molecule structure for all molecules in the list from their SMILES strings.
    multiple_input()
        Load a CSV containing molecule SMILES in the first column (with header) and molecule names in the second column (with header).

    Notes
    -----
    A CSV for this class should look like the following:

    ```
    smiles_header,names_header
    smiles1,name1
    smiles2,name2
    ...
    ```
    """

    def __init__(
        self,
        smiles_strings: list[str],
        names: list[str],
    ):
        self.smiles_strings: list[str] = smiles_strings
        self.names: list[str] = names


    def __repr__(self):
        self_repr = f"{"":6}{"Name":12}{"SMILES":11}\n\n"
        for i, smiles in enumerate(self.smiles_strings):
            self_repr += f"{str(i+1)+".":6}{self.names[i]:12}{smiles:20}\n"
        return self_repr


    def build_neb(
        self,
        index: int,
        stage: str = "Reactant",
        xyz_save_dir: PathLike | None = None,
        xyz_data_dir: PathLike | None = None,
        disable_nudge: bool = False,
        distance_multiplier: float = 2.7,
    ):
        """Build structures for single-atom transfer NEB calculations and write
        the result to an XYZ file.

        Parameters
        ----------
        index : int
            The index of the molecule that should be used in the structure build.
        stage : {"Reactant", "Product", "Transition"}
            Stage of the reaction..
        xyz_save_dir : PathLike | None, default=None
            Path to the folder that the XYZ file(s) should be written to.
            Default `None` prompts user to select a folder.
        xyz_data_dir : PathLike | None, default=None
            Path to a folder containing pre-existing structures that will be used in generating the NEB structures.
            Default `None` generates structures based off of the SMILES strings.
        disable_nudge : bool, default=False
            Disables the part of the structure build that applies a slight 
            nudge to the structure to avoid local minima in geometry optimizations
        distance_multiplier : float, default=2.7
            The distance between the protonated atoms in the final NEB structure (in Angstroms).

        Notes
        -----
        The `xyz_data_dir` should have a list of XYZ files with the same name as the supplied names (with a `.xyz` extension).

        The nudge referred to by `disable_nudge` is an applied rotation of +/- 15 degrees around the X-axis and +/- 20 degrees around
        the Y-axis used to avoid the molecules falling into a local minima during geometry optimizations.
        Sometimes this can cause structure overlap when no nudge would provide a clean structure, but generally has no problems.

        The parameter `distance_multiplier` is a set distance between the 2 protonated atoms BEFORE the nudge is applied.
        The default of 2.7 Angstroms is consistent with a large amount of optimized reactant and product geometries, however
        some systems may require a longer bond. It is not recommended to go below 2.5 Angstroms, as this can often cause atoms to
        overlap during structure building and make it harder for the overlap handler to fix the issue.
        """

        if xyz_save_dir is None:
            xyz_save_dir = open_folder(title="Select a directory to save XYZ Files")

        if xyz_data_dir is not None:
            structure = Geometry.from_xyz(
                xyz_data_dir/Path(f"{self.names[index]}.xyz"),
                charge=Molecule.calculate_charge(self.smiles_strings[index])
            )
        else:
            structure = Geometry.from_smiles(self.smiles_strings[index])

        if structure.charge == 0:
            raise RuntimeError("Molecule must be protonated for NEB structure generation!")

        attempt_number = 0

        mol = Molecule(
            self.smiles_strings[index],
            structure,
            self.names[index],
        )

        print(f"Working on {mol.molecule_name}...")

        z_rotation_offset = IDENTITY_MATRIX

        proton_position = mol.get_proton_position(attempt_number)

        alignment_matrix = LinearAlgebra.gen_alignment_matrix(proton_position, Z_UNIT_VECTOR)

        structure_one = structure_one_generator(
            alignment_matrix, mol
        )

        z_flipper = LinearAlgebra.gen_alignment_matrix(
            vector_one=structure_one.get_coords()[mol.get_proton_index(attempt_number)],
            vector_two=structure_one.get_coords()[mol.neighboring_atom_index],
            alignment_angle=np.pi
        )

        structure_two = structure_two_generator(
            stage,
            z_rotation_offset,
            structure_one,
            distance_multiplier,
            z_flipper,
            disable_nudge,
        )

        attempt_number = 0

        if structure_checker(structure_one, structure_two, mol, 0):
            print("Atomic Overlap Detected, attempting to fix...")
            structure_one, structure_two, attempt_number = overlap_handler(
                structure_one,
                structure_two,
                stage,
                mol,
                z_rotation_offset,
                distance_multiplier,
            )

        final_structure = final_structure_generator(
            structure_one,
            structure_two,
            stage,
            mol,
            attempt_number,
            distance_multiplier,
        )

        final_structure.to_xyz(
            name=f"{mol.molecule_name}-{stage}",
            xyz_dir=xyz_save_dir
        )

        print(f"Molecule {mol.molecule_name} complete!\n")


    def build_single(self, index: int, xyz_save_dir: PathLike | None = None):
        """Build a single molecule structure from the list of SMILES.
        
        Parameters
        ----------
        index : int
            The index of the molecule that should be used in the structure build.
        xyz_save_dir : PathLike | None, default=None
            Path to the directory that the XYZ file(s) should be written to.
            Default `None` prompts user to select a directory.

        Notes
        -----
        The stage for the molecules are automatically selected to be either `"C"` for charged or `"N"` for neutral
        depending on the charge calculated from the SMILES. These are appended to the end of the file name to delineate
        between the protonated and neutral forms of the molecules.
        """

        print(f"Working on {self.names[index]}...")

        if xyz_save_dir is None:
            xyz_save_dir = open_folder(title="Select a directory to save XYZ Files")

        structure = Geometry.from_smiles(self.smiles_strings[index])

        if structure.charge == 0:
            stage = "N"
        elif structure.charge > 0:
            stage = "C"

        structure.to_xyz(
            name=f"{self.names[index]}-{stage}",
            xyz_dir=xyz_save_dir
        )

        print(f"Molecule {self.names[index]} complete!\n")


    def build_all_neb(
        self,
        stage: str = "Reactant",
        xyz_save_dir: PathLike | None = None,
        xyz_data_dir: PathLike | None = None,
        disable_nudge: bool = False,
        distance_multiplier: float = 2.7,
    ):
        """Build structures for single-atom transfer NEB calculations for every molecule 
        in the list and write the result to an XYZ file.

        All parameters are passed to `Data.build_neb()`.
        """

        if xyz_save_dir is None:
            xyz_save_dir = open_folder(title="Select a directory to save XYZ Files")

        for i in range(len(self.names)):
            Data.build_neb(
                self,
                index=i,
                stage=stage,
                xyz_save_dir=xyz_save_dir,
                xyz_data_dir=xyz_data_dir,
                disable_nudge=disable_nudge,
                distance_multiplier=distance_multiplier,
            )


    def build_all_single(self, xyz_save_dir: PathLike | None = None):
        """Build single-molecule structures for all SMILES in the list.

        All parameters are passed to `Data.build_single()`.
        """

        if xyz_save_dir is None:
            xyz_save_dir = open_folder(title="Select a directory to save XYZ Files")

        for i in range(len(self.names)):
            Data.build_single(self, index=i, xyz_save_dir=xyz_save_dir)


    @classmethod
    def multiple_input(
        cls,
        file_path: PathLike | None = None,
    ) -> Data:
        """Read CSV files with multiple molecules and/or specify folder containing XYZ files."""

        if file_path is None:
            file_path = open_file(title="Select a CSV", file_types = [["CSV Files", "*.csv"], ["All files", "*"]])

        multiple_molecule_data = []

        with open(file_path, newline="") as csvfile:
            data = csv.reader(csvfile)
            for row in data:
                multiple_molecule_data.append(row)

        smiles_strings = [i[0] for i in multiple_molecule_data]
        names = [i[1] for i in multiple_molecule_data]

        return Data(smiles_strings, names)