In [128]:
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
import numpy.typing as npt
import csv
from tkinter import Tk
from tkinter import filedialog
from os import PathLike
from pathlib import Path

In [23]:
# Set commonly used variables

z_rotation_offset = np.array(
    [
        [-1, 0, 0],
        [0, -1, 0],
        [0,  0, 1]
    ],
    dtype=float
)

# Array of values for the distances between parent atoms
DISTANCE_MULTIPLIERS = [2.7, 2.8, 2.9, 3.0, 3.1, 3.2]

Z_UNIT_VECTOR = np.array([0.0, 0.0, 1.0])

Z_REFLECTOR = np.array(
    [
        [1, 0, 0],
        [0, 1, 0],
        [0, 0,-1]
    ],
    dtype=float
)

IDENTITY_MATRIX = np.array(
    [
        [1, 0, 0],
        [0, 1, 0],
        [0, 0, 1]
    ],
    dtype=float
)

In [None]:
def from_smiles(smiles_string: str) -> tuple[list[str], npt.NDArray]:
    """Convert a SMILES string to a 3D molecule.

    Parameters
    ----------
    smiles_string : str
        Any valid SMILES string (for example, "N#Cc1nn[nH]c(C#N)1")

    Returns
    -------
    atom_identities : list[str]
        A list of all of the atomic symbols
    atom_coords : NDArray
        A numpy array containing lists of XYZ coordinates for each atom
    """

    molecule = Chem.AddHs(Chem.MolFromSmiles(smiles_string))
    AllChem.EmbedMolecule(molecule, AllChem.ETKDGv3())

    xyz_string = Chem.rdmolfiles.MolToXYZBlock(molecule)
    molecule_xyz = [i.split() for i in xyz_string.split("\n")[2:-1]]

    atom_identities = [str(i[0]) for i in molecule_xyz]
    atom_coords = np.array([[float(j) for j in i[1:]] for i in molecule_xyz])

    return atom_identities, atom_coords

In [None]:
def substructure_match(smiles_string: str) -> tuple[int, list[int], int]:
    """Generate an RDKit molecule and search the structure for protonated atoms.
    
    Parameters
    ----------
    smiles_string : str
        Any valid SMILES string.

    Returns
    -------
    protonated_atom_index : int
        The line number of the atom with an extra proton (the parent charge site).
    proton_position_indices : list[int]
        A list containing any protons attached to the parent charge site.
    near_neighbor_index : int
        The line number of an atom directly connected to the parent charge site.
    """

    molecule = Chem.AddHs(Chem.MolFromSmiles(smiles_string))
    params = AllChem.ETKDGv3()
    AllChem.EmbedMolecule(molecule, params)

    charged_atom = Chem.MolFromSmarts("[#7H+,#7H2+,#7H3+,#8H+,#8H2+]")

    proton_atom = Chem.MolFromSmarts(
        "[$([#1][#7H+]),$([#1][#7H2+]),$([#1][#7H3+]),$([#1][#8H+]),$([#1][#8H2+])]"
    )

    near_neighbor = Chem.MolFromSmarts(
        "[$([*][#7H+]),$([*][#7H2+]),$([*][#7H3+]),$([*][#8H+]),$([*][#8H2+])]"
    )

    proton_position = molecule.GetSubstructMatches(proton_atom)
    parent_position = molecule.GetSubstructMatches(charged_atom)
    near_neighbor = molecule.GetSubstructMatches(near_neighbor)

    protonated_atom_index: int = parent_position[0][0]
    proton_position_indices: list[int] = [i[0] for i in proton_position]
    near_neighbor_index: int = near_neighbor[0][0]

    return protonated_atom_index, proton_position_indices, near_neighbor_index

In [None]:
def multiple_input(
    get_mmd: bool = True,
    get_xyz_path: bool = True,
) -> tuple[list[list[str]], PathLike] | list[list[str]] | PathLike:
    """Read CSV files with multiple molecules and/or specify folder containing XYZ files

    Parameters
    ----------
    get_mmd : bool, default=True
        Option to read a CSV file with data for multiple molecules.
    get_xyz_path : bool, default=True
        Option to select a folder path for XYZ files (to put in or to take out).

    Returns
    -------
    multiple_molecule_data : list[list[str]]
        A list of lists each containing data about a molecule.
    xyz_file_path : PathLike
        The path to a directory where XYZ files are or should be placed.

    Notes
    -----
    Selecting only one of the two parameters will return only that parameter.
    """

    root = Tk()
    root.wm_attributes("-topmost", 1)
    root.withdraw()

    if get_mmd:
        file_name = filedialog.askopenfilename(
            parent=root,
            initialdir="",
            title="Select a CSV",
            filetypes=(("CSV Files", "*.csv"), ("All files", "*")),
        )

        multiple_molecule_data = []

        with open(file_name, newline="") as csvfile:
            data = csv.reader(csvfile)
            for row in data:
                multiple_molecule_data.append(row)

    if get_xyz_path:
        xyz_file_path = Path(
            filedialog.askdirectory(initialdir="", title="Select XYZ File Parent Directory")
            + "/"
        )

    if get_mmd and get_xyz_path:
        return multiple_molecule_data[1:], xyz_file_path
    elif not get_xyz_path:
        return multiple_molecule_data[1:]
    elif not get_mmd:
        return xyz_file_path
    

In [None]:
class Data:
    
    def __init__(
        self,
        xyz_file_path: PathLike = None,
        smiles_strings: list[str] = None,
        names: list[str] = None,
        stage: str = "S",
        charge: int = 1,
    ):
        self.xyz_file_path = (
            Path(xyz_file_path)
            if xyz_file_path is not None
            else multiple_input(get_mmd=False)
        )
        self.stage = stage
        self.charge = charge

        if names is None or smiles_strings is None:
            multiple_molecule_data = multiple_input(get_xyz_path=False)

        self.names: list[str] = (
            names
            if names is not None
            else [i[1] for i in multiple_molecule_data]
        )
        self.smiles_strings: list[str] = (
            smiles_strings
            if smiles_strings is not None
            else [i[0] for i in multiple_molecule_data]
        )


    def __repr__(self):
        self_repr = "\tName\tSMILES\n\n"
        for i in range(len(self.smiles_strings)):
            self_repr += f"{i+1})\t"+self.names[i]+"\t"+self.smiles_strings[i]+"\n"
        return self_repr
    

    def build_all_neb(self, has_xyz_data: bool = False):

        distance_multiplier = DISTANCE_MULTIPLIERS[0]

        for i in range(len(self.names)):
            if has_xyz_data:
                atom_identities, atom_coords = read_xyz_file(
                    self.xyz_file_path/Path(f"{self.names[i]}.xyz")
                )
            else:
                atom_identities, atom_coords = None, None
            
            structure = Molecule(
                self.smiles_strings[i],
                self.names[i],
                atom_identities,
                atom_coords,
            )

            proton_position = structure.get_proton_position(attempt_number=0)

            molecule_alignment_angle = vector_angle(proton_position, Z_UNIT_VECTOR)
            molecule_alignment_vector = normalize(np.cross(proton_position, Z_UNIT_VECTOR))
            molecule_alignment_quaternion = quaternion_builder(
                molecule_alignment_vector, molecule_alignment_angle
            )

            check_structure_one = structure_one_generator(
                molecule_alignment_quaternion, structure
            )

            new_alignment_angle = np.pi
            new_alignment_vector = normalize(
                np.cross(
                    check_structure_one[structure.proton_position_indices[0]],
                    check_structure_one[structure.neighboring_atom_index]
                )
            )
            z_flipper = quaternion_builder(new_alignment_vector, new_alignment_angle)

            check_structure_two = structure_two_generator(
                self.stage,
                z_rotation_offset,
                check_structure_one,
                distance_multiplier,
                z_flipper,
            )

            print(structure.molecule_name)

            attempt_number = 0

            if structure_checker(check_structure_one, check_structure_two, structure, 0):
                print("Atomic Overlap Detected, attempting to fix...")
                check_structure_one, check_structure_two, attempt_number = overlap_handler(
                    check_structure_one,
                    check_structure_two,
                    self.stage,
                    structure,
                    z_rotation_offset,
                    distance_multiplier,
                )

            final_structure = final_structure_generator(
                check_structure_one,
                check_structure_two,
                self.stage,
                structure,
                attempt_number,
                distance_multiplier,
            )

            with open(
                self.xyz_file_path/Path(f"{structure.molecule_name}-{self.stage}.xyz"),
                "w",
                newline="",
            ) as finalXYZ:
                headerstart = str(structure.get_num_atoms() * 2 - 1) + "\n"
                np.savetxt(finalXYZ, final_structure, fmt="%s", header=headerstart, comments="")


    def build_all_single(self):
        for i in range(len(self.names)):
            structure = Molecule(
                smiles_string=self.smiles_strings[i],
                molecule_name=self.names[i],
                charge=self.charge,
            )

            

In [None]:
class Structure:




    


    def __init__(
        self,
        xyz_file_path: PathLike = None,
        smiles_string: str = None,
        symbols: list[str] = None,
        coords: npt.NDArray = None,
    ):

        if xyz_file_path is not None:
            self.symbols, self.coords = Structure.from_xyz(xyz_file_path)
        elif smiles_string is not None:
            self.symbols, self.coords = Structure.from_smiles(smiles_string)
        elif symbols is not None and coords is not None:
            self.symbols = symbols
            self.coords = coords
        else:
            raise RuntimeError(
                "Must supply either an XYZ file path, a valid SMILES string, or coordinates AND symbols!"
            )


    def get_num_atoms(self):
        return len(self.symbols)


    def concat_xyz(self):
        xyz = []
        for i in range(self.get_num_atoms()):
            xyz.append([self.symbols[i]] + list(self.coords[i]))
        return xyz
    
    @classmethod
    def from_xyz(cls, file_path: str) -> tuple[list[str], npt.NDArray]:
        """Read in XYZ file format and return atomic symbols and coordinates

        Parameters
        ----------
        file_path : str
            Full path to an XYZ file.

        Returns
        -------
        atom_identities : list[str]
            A list of all of the atomic symbols
        atom_coords : NDArray
            A numpy array containing lists of XYZ coordinates for each atom
        """

        molecule_xyz = []

        with open(file_path) as f:
            for line in f:
                # Takes each line in the file minus the last character, which is just the \n
                line = line[:-1].split()
                if line:
                    # If the line isn't empty then append to the molecule_xyz
                    molecule_xyz.append(line)

        # List of all atomic symbols in molecule, in order from RDkit generated XYZ file
        atom_identities = [str(i[0]) for i in molecule_xyz[2:]]

        # Array with all atomic coordinates, without atomic symbols
        atom_coords = np.array([[float(j) for j in i[1:]] for i in molecule_xyz[2:]])

        return atom_identities, atom_coords

    @classmethod
    def from_smiles(cls, smiles_string: str) -> tuple[list[str], npt.NDArray]:
        """Convert a SMILES string to a 3D molecule.

        Parameters
        ----------
        smiles_string : str
            Any valid SMILES string (for example, "N#Cc1nn[nH]c(C#N)1")

        Returns
        -------
        atom_identities : list[str]
            A list of all of the atomic symbols
        atom_coords : NDArray
            A numpy array containing lists of XYZ coordinates for each atom
        """

        molecule = Chem.AddHs(Chem.MolFromSmiles(smiles_string))
        AllChem.EmbedMolecule(molecule, AllChem.ETKDGv3())

        xyz_string = Chem.rdmolfiles.MolToXYZBlock(molecule)
        molecule_xyz = [i.split() for i in xyz_string.split("\n")[2:-1]]

        atom_identities = [str(i[0]) for i in molecule_xyz]
        atom_coords = np.array([[float(j) for j in i[1:]] for i in molecule_xyz])

        return Structure(symbols=atom_identities, coords=atom_coords)


    def write_xyz_file(self, name: str, xyz_dir: PathLike = Path("./")):
        xyz = self.concat_xyz()

        with open(xyz_dir/Path(f"{name}.xyz"), "w", newline="") as xyz_file:
            xyz_file.write(f"{self.get_num_atoms()}\n\n")
            for atom in xyz:
                xyz_file.write(f"{atom[0]:3}  {atom[1]:10f}  {atom[2]:10f}  {atom[3]:10f}\n")

    
    def __repr__(self):
        self_repr = "\tSymbol\tX\tY\tZ\n\n"
        for i in range(self.get_num_atoms()):
            self_repr += f"{i+1})\t{self.symbols[i]}\t{self.coords[i][0]:5f}\t{self.coords[i][1]:5f}\t{self.coords[i][2]:5f}\n"
        return self_repr
        

    #def write_orca_input(self, name: str, input_dir: PathLike = Path("./"), **kwargs):

In [130]:
b = [[1,2,3],[1,2,3],[1,2,3]]

print(np.array(b))

[[1 2 3]
 [1 2 3]
 [1 2 3]]


In [112]:
class Molecule:
    """Class that contains all molecule data.

    Attributes
    ----------
    smiles_string : str
        Any valid SMILES string.
    structure : Structure
        The structure of the molecule
    molecule_name : str, optional
        The molecule name that will be used for naming output XYZ and INP files.
    protonated_atom_index : int, optional
        The line index of the atom that carries an extra proton (the parent charge site).
    neighboring_atom_index : int, optional
        The line index of an atom attached to the parent charge site.
    proton_position_indices : list of int, optional
        A list of the line indexes for each proton attached to the protonated atom.
    charge : int, default=1
        The charge of the molecule.

    Methods
    -------
    get_proton_position(attempt_number)
        Return the line number of the proton of interest.
    get_protonated_atom_position()
        Return the XYZ coordinates of the parent charge site.
    get_num_atoms()
        Return the number of atoms in the molecule.
    """

    def __init__(
        self,
        smiles_string: str,
        structure: Structure = None,
        molecule_name: str = None,
        protonated_atom_index: int = None,
        neighboring_atom_index: int = None,
        proton_position_indices: list[int] = None,
        charge: int = 1,
    ):
        self.smiles_string = smiles_string
        self.structure = (
            structure
            if structure is not None
            else Structure(smiles_string=self.smiles_string)
        )
        self.molecule_name = molecule_name if molecule_name is not None else "bean"
        self.protonated_atom_index = (
            protonated_atom_index
            if protonated_atom_index is not None
            or charge == 0
            else substructure_match(self.smiles_string)[0]
        )
        self.neighboring_atom_index = (
            neighboring_atom_index
            if neighboring_atom_index is not None
            else substructure_match(self.smiles_string)[2]
        )
        self.proton_position_indices = (
            proton_position_indices
            if proton_position_indices is not None
            or charge == 0
            else substructure_match(self.smiles_string)[1]
        )

        if charge > 0:
            self.structure.coords = self.structure.coords - self.structure.coords[self.protonated_atom_index]


    def get_proton_position(self, attempt_number: int) -> npt.NDArray:
        """Get the line number for the proton requested.

        Parameters
        ----------
        attempt_number : {0, 1, 2}
            The current attempt number.
        
        Returns
        -------
        proton_position: NDArray
            The line number of the requested proton.
        """
        return self.structure.coords[
            int(self.proton_position_indices[attempt_number])
        ]


    def get_protonated_atom_position(self) -> npt.NDArray:
        """Get the XYZ coordinates of the parent charge site."""
        return self.structure.coords[self.protonated_atom_index]


    def get_num_atoms(self) -> int:
        """Get the number of atoms in the molecule."""
        return self.structure.get_num_atoms()

In [None]:
def input_generator(
    molecule_name: str,
    stage: str,
    final_structure: list[list[str]],
    save_directory: str = "",
    charge: int = 1,
    dft_method: str = "wB97X-D3BJ",
    basis_set: str = "def2-TZVP",
    nprocs: int = 20,
    optional_input_params: str = ""
):
    """Generate an ORCA input file.

    Parameters
    ----------
    molecule_name : str
        The name of the molecule.
    stage : str
        Either 'R' or 'P' for 'Reactant' or 'Product'.
    final_structure : list[list[str]]
        The structure of the final assembled molecule.
    save_directory : str, default=""
        The directory to which the input file will be saved.
    charge : int, default=1
        The total charge of the system.
    dft_method : str, default="wB97X-D3BJ"
        The chosen density functional.
    basis_set : str, default="def2-TZVP"
        The chosen basis set.
    nprocs : int, default=20
        The number of processes used in the calculation.
    optional_input_params : str, optional
        Any additional input parameters to pass to the keyword line.
    """

    input_specs = (
        f"!Opt {dft_method} {basis_set} LargePrint {optional_input_params}\n"
        "%geom\n"
        "\tMaxIter 200\n"
        "end\n"
        "%pal\n"
        f"\tnprocs {nprocs!s}\n"
        "end\n"
        f"* xyz {charge!s} 1"
    )

    end_line = "*\n"

    file_name = molecule_name + stage

    with open(save_directory + file_name + ".inp", "w", newline="\n") as input_file:
        np.savetxt(
            input_file,
            final_structure,
            fmt="%s",
            header=input_specs,
            footer=end_line,
            comments="",
        )

In [30]:
def normalize(vector: npt.NDArray) -> npt.NDArray:
    """Determine the unit vector for a given vector."""

    norm = np.linalg.norm(vector)

    if norm != 0:
        return vector / norm
    else:
        return vector

In [None]:
def quaternion_builder(
    vector: npt.ArrayLike,
    angle: float,
) -> npt.NDArray:
    """Build a quaternion matrix.
    
    Parameters
    ----------
    vector : ArrayLike
        The vector to rotate around (can be list or NDArray)
    angle : float
        The angle to rotate by (in radians)
    
    Returns
    -------
    quaternion_matrix : NDArray
        A quaternion matrix that rotates by `angle` around `vector`
    """

    quaternion = [
        np.cos(angle / 2),
        vector[0] * np.sin(angle / 2),
        vector[1] * np.sin(angle / 2),
        vector[2] * np.sin(angle / 2),
    ]

    # The normalized quaternion vector
    norm_quat = normalize(quaternion)

    quaternion_matrix = np.array(
        [
            [
                1 - 2 * (norm_quat[2] ** 2 + norm_quat[3] ** 2),
                2 * (norm_quat[1] * norm_quat[2] - norm_quat[0] * norm_quat[3]),
                2 * (norm_quat[1] * norm_quat[3] + norm_quat[0] * norm_quat[2]),
            ],
            [
                2 * (norm_quat[1] * norm_quat[2] + norm_quat[0] * norm_quat[3]),
                1 - 2 * (norm_quat[1] ** 2 + norm_quat[3] ** 2),
                2 * (norm_quat[2] * norm_quat[3] - norm_quat[0] * norm_quat[1]),
            ],
            [
                2 * (norm_quat[1] * norm_quat[3] - norm_quat[0] * norm_quat[2]),
                2 * (norm_quat[2] * norm_quat[3] + norm_quat[0] * norm_quat[1]),
                1 - 2 * (norm_quat[1] ** 2 + norm_quat[2] ** 2),
            ],
        ]
    )

    return quaternion_matrix

In [32]:
def vector_angle(vector_one: npt.ArrayLike, vector_two: npt.ArrayLike) -> np.float64:
    """Calculate the angle (in radians) between two vectors."""

    unit_vector_one = normalize(vector_one)
    unit_vector_two = normalize(vector_two)

    angle: float = np.arccos(np.clip(np.dot(unit_vector_one, unit_vector_two), -1.0, 1.0))

    return angle

In [33]:
def distance_calculator(
    vector_one: npt.ArrayLike, vector_two: npt.ArrayLike
) -> np.float64:
    """Calculate the Euclidean distance between two vectors."""

    distance = np.absolute(
        np.sqrt(
            ((float(vector_one[0]) - float(vector_two[0])) ** 2)
            + ((float(vector_one[1]) - float(vector_two[1])) ** 2)
            + ((float(vector_one[2]) - float(vector_two[2])) ** 2)
        )
    )

    return distance

In [None]:
def from_xyz(file_path: str) -> tuple[list[str], npt.NDArray[np.float64]]:
    """Read in XYZ file format and return atomic symbols and coordinates
    
    Parameters
    ----------
    file_path : str
        Full path to an XYZ file.

    Returns
    -------
    atom_identities : list[str]
        A list of all of the atomic symbols
    atom_coords : NDArray
        A numpy array containing lists of XYZ coordinates for each atom
    """

    molecule_xyz = []

    with open(file_path) as f:
        for line in f:
            # Takes each line in the file minus the last character, which is just the \n
            line = line[:-1].split()
            if line:
                # If the line isn't empty then append to the molecule_xyz
                molecule_xyz.append(line)

    # List of all atomic symbols in molecule, in order from RDkit generated XYZ file
    atom_identities = [str(i[0]) for i in molecule_xyz[2:]]

    # Array with all atomic coordinates, without atomic symbols
    atom_coords = np.array([[float(j) for j in i[1:]] for i in molecule_xyz[2:]])

    return atom_identities, atom_coords

In [None]:
def nudge_matrix_generator(stage: str) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray]:
    """Creates the nudge matrices and vector for avoiding local minima during optimization.

    Parameters
    ----------
    stage : {"R", "P", "T"}
        Stage of the reaction ("R"=Reactant, "P"=Product, "T"=Transition)
    
    Returns
    -------
    nudge_rotate_x : NDArray
        An array that rotates around the x-axis by +/- 15 degrees
    nudge_rotate_y : NDArray
        An array that rotates around the y-axis by +/- 20 degrees 
    nudge_translate : NDArray
        A vector that nudges the molecule by a set amount
    """

    # xv_degree (15 Degrees)
    xv_degree = np.pi / 12
    # xx_degree (20 Degrees)
    xx_degree = np.pi / 9

    nudge_translate = np.array([0, 0, 0])

    nudge_rotate_x = np.empty([3, 3], dtype=float)
    nudge_rotate_y = np.empty([3, 3], dtype=float)

    if stage == "R":
        nudge_rotate_x = np.array(
            [
                [1, 0, 0],
                [0, np.cos(xv_degree), -np.sin(xv_degree)],
                [0, np.sin(xv_degree), np.cos(xv_degree)],
            ],
            dtype=float,
        )

        nudge_rotate_y = np.array(
            [
                [np.cos(xx_degree), 0, np.sin(xx_degree)],
                [0, 1, 0],
                [-np.sin(xx_degree), 0, np.cos(xx_degree)],
            ],
            dtype=float,
        )
    elif stage == "P":
        nudge_rotate_x = np.array(
            [
                [1, 0, 0],
                [0, np.cos(-xv_degree), -np.sin(-xv_degree)],
                [0, np.sin(-xv_degree), np.cos(-xv_degree)],
            ],
            dtype=float,
        )

        nudge_rotate_y = np.array(
            [
                [np.cos(-xx_degree), 0, np.sin(-xx_degree)],
                [0, 1, 0],
                [-np.sin(-xx_degree), 0, np.cos(-xx_degree)],
            ],
            dtype=float,
        )

    return nudge_rotate_x, nudge_rotate_y, nudge_translate

In [117]:
def structure_one_generator(
    molecule_alignment_quaternion: npt.NDArray,
    mol: Molecule,
) -> Structure:
    """Align a molecule along Z axis and return the structure"""

    new_coords = []

    for atom_position in mol.structure.coords:
        new_coord = np.dot(molecule_alignment_quaternion, atom_position)
        new_coords.append(new_coord)

    check_structure_one = Structure(symbols=mol.structure.symbols, coords=np.array(new_coords))
    
    return check_structure_one

In [127]:
mol = Molecule(
    smiles_string="N#Cc1[nH+]n[nH]c(C#N)1"
)

print(structure_one_generator(IDENTITY_MATRIX, mol))

	Symbol	X	Y	Z

1)	N	2.802694	2.177527	0.034835
2)	C	2.152430	1.225673	-0.050026
3)	C	1.336007	0.048022	-0.132875
4)	N	0.000000	0.000000	0.000000
5)	N	-0.347405	-1.272755	-0.145486
6)	N	0.691208	-2.049649	-0.365098
7)	C	1.772403	-1.242314	-0.362849
8)	C	3.099105	-1.706371	-0.567893
9)	N	4.180361	-2.078055	-0.737604
10)	H	-0.604612	0.811316	0.179139
11)	H	0.657422	-3.096141	-0.511050



In [None]:
def structure_two_generator(
    stage: str,
    z_rotation_offset: npt.NDArray,
    check_structure_one: Structure,
    distance_multiplier: float = 2.7,
    z_flipper: npt.NDArray = Z_REFLECTOR,
) -> list[list[float]]:
    """Generate the second structure that the proton will be transferred to.
    
    Parameters
    ----------
    stage : {"R", "P", "T"}
        Stage of the reaction ("R"=Reactant, "P"=Product, "T"=Transition)
    z_rotation_offset : NDArray
        A matrix that rotates around the Z-axis.
    check_structure_one : ArrayLike
        The first structure generated from `check_structure_one_generator()`.
    distance_multiplier : float, default=2.7
        The distance between the parent charge sites in Angstroms.

    Returns
    -------
    check_structure_two : list[list[float]]
        The second structure to which the proton will be transferred.
    """

    nudge_rotate_x, nudge_rotate_y, nudge_translate = nudge_matrix_generator(stage)

    nudge_rotate_x = IDENTITY_MATRIX
    nudge_rotate_y = IDENTITY_MATRIX

    new_coords = []

    for atom_position in check_structure_one.coords:
        if stage == "R":
            new_coord = np.dot(
                nudge_rotate_x,
                (
                    np.dot(
                        nudge_rotate_y,
                        np.dot(
                            (
                                (np.dot(z_flipper, atom_position))
                                + (distance_multiplier * Z_UNIT_VECTOR)
                            ),
                            z_rotation_offset
                        )
                    )
                ),
            ) + (nudge_translate)
        elif stage == "P":
            new_coord = np.dot(
                nudge_rotate_x,
                (
                    np.dot(
                        nudge_rotate_y,
                        np.dot(
                            (
                                (np.dot(z_flipper, atom_position))
                                + (distance_multiplier * Z_UNIT_VECTOR)
                            ),
                            z_rotation_offset
                        )
                    )
                ),
            ) + ((-1) * nudge_translate)
        elif stage == "T":
            new_coord = np.dot(
                z_rotation_offset,
                (
                    np.dot(z_flipper, atom_position)
                    + ((distance_multiplier - 0.1) * Z_UNIT_VECTOR)
                )
            )
        new_coords.append(new_coord)
        
    check_structure_two = Structure(check_structure_one.symbols, np.array(new_coords))

    return check_structure_two

In [None]:
def structure_checker(
    check_structure_one: npt.ArrayLike,
    check_structure_two: npt.ArrayLike,
    mol: Molecule,
    attempt_number: int,
) -> bool:
    """Check for overlap between the two molecules
    
    Parameters
    ----------
    check_structure_one : ArrayLike
        The structure generated from `check_structure_one_generator()`.
    check_structure_two : ArrayLike
        The structure generated from `check_structure_two_generator()`.
    mol : Molecule
        A Molecule object
    attempt_number : int
        The attempt number for this run, typical maximum is 2 (3 possible tries)

    Returns
    -------
    bool
        True indicates that the structures have some overlap 
        (atoms within 1.5 Angstroms of each other).
        False indicates there is no detected overlap.
    """

    n = mol.proton_position_indices[attempt_number]
    for i, atom_one in enumerate(check_structure_one):
        for j, atom_two in enumerate(check_structure_two):
            if (i == n) or (j == n - 1):
                continue
            # Only executes if not proton involved in transport
            if distance_calculator(atom_one, atom_two) < 1.5:
                return True
            else:
                continue
    # We checked all atoms, none overlapped
    return False

In [None]:
def final_structure_generator(
    check_structure_one: npt.ArrayLike,
    check_structure_two: npt.ArrayLike,
    stage: str,
    mol: Molecule,
    attempt_number: int,
    distance_multiplier: float = 2.7,
) -> list[list[str]]:
    """Generate the complete structure. Should only be used after checking for/handling overlap.
    
    Parameters
    ----------
    check_structure_one : ArrayLike
        The structure generated from `check_structure_one_generator()`.
    check_structure_two : ArrayLike
        The structure generated from `check_structure_two_generator()`.
    stage : {"R", "P", "T"}
        Stage of the reaction ("R"=Reactant, "P"=Product, "T"=Transition).
    mol : Molecule
        The current Molecule object.
    attempt_number : {0, 1, 2}
        The attempt number.
    distance_multiplier : float, default=2.7
        The distance between the parent charge sites in Angstroms.

    Returns
    -------
    final_structure : list[list[str]]
        The final assembled structure in a list of lists with the atomic symbol
        and the XYZ coordinates.
    """

    new_structure_one = []
    new_structure_two = []

    for i in range(len(check_structure_one)):
        new_structure_one.append([mol.atom_identities[i]] + check_structure_one[i])
        new_structure_two.append([mol.atom_identities[i]] + check_structure_two[i])

    reactant_proton = new_structure_one.pop(
        mol.proton_position_indices[attempt_number]
    )
    product_proton = new_structure_two.pop(
        mol.proton_position_indices[attempt_number]
    )
    transition_proton = ["H", 0.0, 0.0, (distance_multiplier - 0.1) / 2]

    final_structure = new_structure_one + new_structure_two

    if stage == "R":
        final_structure.append(reactant_proton)
    elif stage == "P":
        final_structure.append(product_proton)
    elif stage == "T":
        final_structure.append(transition_proton)

    return final_structure

In [None]:
def overlap_handler(
    check_structure_one: npt.ArrayLike,
    check_structure_two: npt.ArrayLike,
    stage: str,
    mol: Molecule,
    z_rotation_offset: npt.NDArray,
    distance_multiplier: float = 2.7,
) -> tuple[list[list[float]], list[list[float]], int]:
    """Fix molecule overlap issues

    First tries to rotate the molecule around the Z-axis,
    if that fails, switches to a different proton position.

    Parameters
    ----------
    check_structure_one : ArrayLike
        The structure generated from `check_structure_one_generator()`.
    check_structure_two : ArrayLike
        The structure generated from `check_structure_two_generator()`.
    stage : {"R", "P", "T"}
        Stage of the reaction ("R"=Reactant, "P"=Product, "T"=Transition).
    mol : Molecule
        The current Molecule object.
    z_rotation_offset : NDArray
        A matrix that rotates around the Z-axis.
    distance_multiplier : float, (default=2.7)
        The distance between the parent charge sites in Angstroms.

    Returns
    -------
    check_structure_one : list[list[float]]
        A corrected version of `check_structure_one`.
    check_structure_two : list[list[float]]
        A corrected version of `check_structure_two`.
    attempt_number : {0, 1, 2}
        Analogous to the proton position used.
    """

    proton_position_attempts = len(mol.proton_position_indices)
    attempt_number = 0

    while (
        structure_checker(check_structure_one, check_structure_two, mol, attempt_number)
        and attempt_number < proton_position_attempts
    ):
        z_rotation_offset = IDENTITY_MATRIX

        proton_position = mol.translated_atom_coords[
            int(mol.proton_position_indices[attempt_number])
        ]

        molecule_alignment_angle = vector_angle(proton_position, Z_UNIT_VECTOR)
        molecule_alignment_vector = normalize(np.cross(proton_position, Z_UNIT_VECTOR))
        molecule_alignment_quaternion = quaternion_builder(
            molecule_alignment_vector, molecule_alignment_angle
        )

        check_structure_one = structure_one_generator(
            molecule_alignment_quaternion, mol
        )
        check_structure_two = structure_two_generator(
            stage, z_rotation_offset, check_structure_one, distance_multiplier
        )

        # If the structure works, then return it and exit the function
        if not structure_checker(
            check_structure_one, check_structure_two, mol, attempt_number
        ):
            print("Structure was fixed with proton position #" + str(attempt_number))
            return check_structure_one, check_structure_two, attempt_number

        i = 0
        while (
            structure_checker(
                check_structure_one, check_structure_two, mol, attempt_number
            )
            and i < 8
        ):
            # 45 degree increments
            rotation_angle = i * (np.pi / 4)

            z_rotation_offset = np.array(
                [
                    [np.cos(rotation_angle), -np.sin(rotation_angle), 0],
                    [np.sin(rotation_angle), np.cos(rotation_angle), 0],
                    [0, 0, 1],
                ],
                dtype=float,
            )

            check_structure_two = structure_two_generator(
                stage, z_rotation_offset, check_structure_one, distance_multiplier
            )

            if not structure_checker(
                check_structure_one, check_structure_two, mol, attempt_number
            ):
                print(
                    f"Structure was fixed with proton position #{attempt_number} with a rotation of {i * 45} degrees."
                )
                return check_structure_one, check_structure_two, attempt_number

            i += 1

        attempt_number += 1
    print("Structure could not be fixed.")
    return check_structure_one, check_structure_two, attempt_number

In [None]:
# Structure generation for molecules with pre-existing structures

stage = "P"

distance_multiplier = DISTANCE_MULTIPLIERS[0]

multiple_molecule_data, xyz_file_path = multiple_input()

for molecule in multiple_molecule_data:
    molecule_name = molecule[1]
    atom_identities, atom_coords = from_xyz(xyz_file_path + molecule_name + ".xyz")

    protonated_atom_index, proton_position_indices, near_neighbor_index = substructure_match(molecule[0])

    structure = Molecule(
        molecule[0],
        molecule_name,
        atom_identities,
        atom_coords,
        protonated_atom_index,
        proton_position_indices,
    )

    attempt_number = 0

    proton_position = structure.get_proton_position(attempt_number)

    molecule_alignment_angle = vector_angle(proton_position, Z_UNIT_VECTOR)
    molecule_alignment_vector = normalize(np.cross(proton_position, Z_UNIT_VECTOR))
    molecule_alignment_quaternion = quaternion_builder(
        molecule_alignment_vector, molecule_alignment_angle
    )

    check_structure_one = structure_one_generator(
        molecule_alignment_quaternion, structure
    )

    new_alignment_angle = np.pi
    new_alignment_vector = normalize(
        np.cross(
            check_structure_one[proton_position_indices[0]],
            check_structure_one[near_neighbor_index]
        )
    )
    z_flipper = quaternion_builder(new_alignment_vector, new_alignment_angle)

    check_structure_two = structure_two_generator(
        stage,
        z_rotation_offset,
        check_structure_one,
        distance_multiplier,
        z_flipper,
    )

    print(molecule_name)

    attempt_number = 0

    if structure_checker(check_structure_one, check_structure_two, structure, 0):
        print("Atomic Overlap Detected, attempting to fix...")
        check_structure_one, check_structure_two, attempt_number = overlap_handler(
            check_structure_one,
            check_structure_two,
            stage,
            structure,
            z_rotation_offset,
            distance_multiplier,
        )

    final_structure = final_structure_generator(
        check_structure_one,
        check_structure_two,
        stage,
        structure,
        attempt_number,
        distance_multiplier,
    )

    with open(
        "C:/Users/Brock/OneDrive/new_molecules/WIP/" + str(structure.molecule_name) + "-" + stage + ".xyz",
        "w",
        newline="",
    ) as finalXYZ:
        headerstart = str(structure.get_num_atoms() * 2 - 1) + "\n"
        np.savetxt(finalXYZ, final_structure, fmt="%s", header=headerstart, comments="")

    #input_generator(structure.molecule_name, "-"+stage, final_structure, xyz_file_path)

#with open(xyz_file_path + "run.sh", "w", newline="\n") as batch:
#    for file in multiple_molecule_data:
#        batch.write(
#            "$HOME/orca/orca "
#            + file[1]
#            + "-"
#            + stage
#            + ".inp > "
#            + file[1]
#            + "-"
#            + stage
#            + ".out\n"
#        )


In [None]:
# TEST CELL

stage = "R"

distance_multiplier = DISTANCE_MULTIPLIERS[0]

multiple_molecule_data = []

with open("MoleculeTest.csv", newline="") as csvfile:
    data = csv.reader(csvfile)
    for row in data:
        multiple_molecule_data.append(row)

atom_identities, atom_coords = from_xyz("TestMolecule.xyz")

mol_info = {
    "protonated_atom_index": int(multiple_molecule_data[1][1]),
    "proton_position_indices": [
        int(i) for i in multiple_molecule_data[1][2:5] if i != "NA"
    ],
    "molecule_name": multiple_molecule_data[1][5],
    "atom_identities": atom_identities,
    "atom_coords": atom_coords,
}

structure = Molecule(multiple_molecule_data[1][0], **mol_info)

attempt_number = 0

proton_position = structure.get_proton_position(attempt_number)

molecule_alignment_angle = vector_angle(proton_position, Z_UNIT_VECTOR)
molecule_alignment_vector = normalize(np.cross(proton_position, Z_UNIT_VECTOR))
molecule_alignment_quaternion = quaternion_builder(
    molecule_alignment_vector, molecule_alignment_angle
)

check_structure_one = structure_one_generator(
    molecule_alignment_quaternion, structure
)
check_structure_two = structure_two_generator(
    stage, z_rotation_offset, check_structure_one, distance_multiplier
)

attempt_number = 0

if structure_checker(check_structure_one, check_structure_two, structure, 0):
    print(structure.molecule_name, "Warning: Overlap detected, working on fix...")
    check_structure_one, check_structure_two, attempt_number = overlap_handler(
        check_structure_one,
        check_structure_two,
        stage,
        structure,
        z_rotation_offset,
        distance_multiplier,
    )

final_structure = final_structure_generator(
    check_structure_one,
    check_structure_two,
    stage,
    structure,
    attempt_number,
    distance_multiplier,
)

with open(
    str(structure.molecule_name) + "-" + stage + ".xyz", "w", newline=""
) as FinalFile:
    headerstart = str(structure.get_num_atoms() * 2 - 1) + "\n"
    np.savetxt(FinalFile, final_structure, fmt="%s", header=headerstart, comments="")

In [None]:
# Molecule generation with only SMILES

stage = "R"

distance_multiplier = DISTANCE_MULTIPLIERS[0]

multiple_molecule_data, xyz_file_path = multiple_input()

for molecule in multiple_molecule_data:
    structure = Molecule(molecule[0], **{"molecule_name": molecule[1]})

    attempt_number = 0

    proton_position = structure.get_proton_position(attempt_number)

    molecule_alignment_angle = vector_angle(proton_position, Z_UNIT_VECTOR)
    molecule_alignment_vector = normalize(np.cross(proton_position, Z_UNIT_VECTOR))
    molecule_alignment_quaternion = quaternion_builder(
        molecule_alignment_vector, molecule_alignment_angle
    )

    check_structure_one = structure_one_generator(
        molecule_alignment_quaternion, structure
    )
    check_structure_two = structure_two_generator(
        stage, z_rotation_offset, check_structure_one, distance_multiplier
    )

    attempt_number = 0

    if structure_checker(check_structure_one, check_structure_two, structure, 0):
        print(structure.molecule_name, "Overlap detected, working on fix...")
        check_structure_one, check_structure_two, attempt_number = overlap_handler(
            check_structure_one,
            check_structure_two,
            stage,
            structure,
            z_rotation_offset,
            distance_multiplier,
        )

    final_structure = final_structure_generator(
        check_structure_one,
        check_structure_two,
        stage,
        structure,
        attempt_number,
        distance_multiplier,
    )

    with open(
        xyz_file_path + str(structure.molecule_name) + "-" + stage + ".xyz",
        "w",
        newline="",
    ) as finalXYZ:
        headerstart = str(structure.get_num_atoms() * 2 - 1) + "\n"
        np.savetxt(finalXYZ, final_structure, fmt="%s", header=headerstart, comments="")

    input_generator(structure.molecule_name, stage, final_structure, xyz_file_path)

with open(xyz_file_path + "run.sh", "w", newline="\n") as batch:
    for file in multiple_molecule_data:
        batch.write(
            "$HOME/orca/orca "
            + file[1]
            + "-"
            + stage
            + ".inp > "
            + file[1]
            + "-"
            + stage
            + ".out\n"
        )

In [None]:
# Generating single molecule structures

multiple_molecule_data, xyz_file_path = multiple_input()

charge = 0

for molecule in multiple_molecule_data:

    structure = Molecule(
        molecule[0],
        molecule_name=molecule[1],
        charge=charge,
    )

    neutral_molecule = []

    for i in range(structure.get_num_atoms()):
        neutral_molecule.append([structure.atom_identities[i]] + list(structure.atom_coords[i]))

    input_generator(
        molecule_name=structure.molecule_name,
        stage="",
        final_structure=neutral_molecule,
        save_directory=xyz_file_path,
        dft_method="wB97X-D3BJ",
        basis_set="def2-TZVP",
        optional_input_params="",
    )

with open(xyz_file_path + "run.sh", "w", newline="\n") as batch:
    for file in multiple_molecule_data:
        batch.write(
            "$HOME/orca/orca "
            + file[1]
            + ".inp > "
            + file[1]
            + ".out\n"
        )

In [None]:
# TEST NEW IDEA CELL

stage = "P"

distance_multiplier = DISTANCE_MULTIPLIERS[0]

multiple_molecule_data, xyz_file_path = multiple_input()

for molecule in multiple_molecule_data:

    atom_identities, atom_coords = from_smiles(molecule[0])
    protonated_atom_index, proton_position_indices, near_neighbor_index = substructure_match(molecule[0])

    structure = Molecule(
        molecule[0],
        molecule[1],
        atom_identities,
        atom_coords,
        protonated_atom_index,
        proton_position_indices,
    )

    attempt_number = 0

    proton_position = structure.get_proton_position(attempt_number)

    molecule_alignment_angle = vector_angle(proton_position, Z_UNIT_VECTOR)
    molecule_alignment_vector = normalize(np.cross(proton_position, Z_UNIT_VECTOR))
    molecule_alignment_quaternion = quaternion_builder(
        molecule_alignment_vector, molecule_alignment_angle
    )

    check_structure_one = structure_one_generator(
        molecule_alignment_quaternion, structure
    )

    new_alignment_angle = np.pi
    new_alignment_vector = normalize(
        np.cross(
            check_structure_one[proton_position_indices[0]],
            check_structure_one[near_neighbor_index]
        )
    )
    z_flipper = quaternion_builder(new_alignment_vector, new_alignment_angle)

    check_structure_two = structure_two_generator(
        stage, z_rotation_offset, check_structure_one, distance_multiplier, z_flipper
    )

    attempt_number = 0
    
    if structure_checker(check_structure_one, check_structure_two, structure, 0):
        print(structure.molecule_name, "Warning: Overlap detected, working on fix...")
        check_structure_one, check_structure_two, attempt_number = overlap_handler(
            check_structure_one,
            check_structure_two,
            stage,
            structure,
            z_rotation_offset,
            distance_multiplier,
        )
    
    final_structure = final_structure_generator(
        check_structure_one,
        check_structure_two,
        stage,
        structure,
        attempt_number,
        distance_multiplier,
    )

    with open(
        xyz_file_path + str(structure.molecule_name) + "-" + stage + ".xyz",
        "w",
        newline="",
    ) as finalXYZ:
        headerstart = str(structure.get_num_atoms() * 2 - 1) + "\n"
        np.savetxt(finalXYZ, final_structure, fmt="%s", header=headerstart, comments="")

    input_generator(structure.molecule_name, stage, final_structure, xyz_file_path)

with open(xyz_file_path + "run.sh", "w", newline="\n") as batch:
    for file in multiple_molecule_data:
        batch.write(
            "$HOME/orca/orca "
            + file[1]
            + "-"
            + stage
            + ".inp > "
            + file[1]
            + "-"
            + stage
            + ".out\n"
        )

In [None]:
# TEST NEW IDEA CELL

stage = "R"

distance_multiplier = DISTANCE_MULTIPLIERS[0]

mol_smiles = "[OH2+]CC#CC(F)(F)F"

print(substructure_match(mol_smiles))

atom_identities, atom_coords = from_smiles(mol_smiles)
protonated_atom_index, proton_position_indices, near_neighbor_index = substructure_match(mol_smiles)

structure = Molecule(
    mol_smiles,
    "test",
    atom_identities,
    atom_coords,
    protonated_atom_index,
    proton_position_indices,
)

attempt_number = 0

proton_position = structure.get_proton_position(attempt_number)

molecule_alignment_angle = vector_angle(proton_position, Z_UNIT_VECTOR)
molecule_alignment_vector = normalize(np.cross(proton_position, Z_UNIT_VECTOR))
molecule_alignment_quaternion = quaternion_builder(
    molecule_alignment_vector, molecule_alignment_angle
)

check_structure_one = structure_one_generator(
    molecule_alignment_quaternion, structure
)

new_alignment_angle = np.pi
new_alignment_vector = normalize(
    np.cross(
        check_structure_one[proton_position_indices[0]],
        check_structure_one[near_neighbor_index]
    )
)
z_flipper = quaternion_builder(new_alignment_vector, new_alignment_angle)

check_structure_two = structure_two_generator(
    stage, z_rotation_offset, check_structure_one, distance_multiplier, z_flipper
)

attempt_number = 0

if structure_checker(check_structure_one, check_structure_two, structure, 0):
    print(structure.molecule_name, "Warning: Overlap detected, working on fix...")
    check_structure_one, check_structure_two, attempt_number = overlap_handler(
        check_structure_one,
        check_structure_two,
        stage,
        structure,
        z_rotation_offset,
        distance_multiplier,
    )

final_structure = final_structure_generator(
    check_structure_one,
    check_structure_two,
    stage,
    structure,
    attempt_number,
    distance_multiplier,
)

with open(
    str(structure.molecule_name) + "-" + stage + "-rotate.xyz",
    "w",
    newline="",
) as finalXYZ:
    headerstart = str(structure.get_num_atoms() * 2 - 1) + "\n"
    np.savetxt(finalXYZ, final_structure, fmt="%s", header=headerstart, comments="")


In [None]:
# TEST NEW IDEA CELL

stage = "R"

distance_multiplier = DISTANCE_MULTIPLIERS[0]

mol_smiles = "[OH2+]CC#CC(F)(F)F"

print(substructure_match(mol_smiles))

protonated_atom_index, proton_position_indices = substructure_match(mol_smiles)

structure = Molecule(
    mol_smiles,
    "test",
    atom_identities,
    atom_coords,
    protonated_atom_index,
    proton_position_indices,
)

attempt_number = 0

proton_position = structure.get_proton_position(attempt_number)

molecule_alignment_angle = vector_angle(proton_position, Z_UNIT_VECTOR)
molecule_alignment_vector = normalize(np.cross(proton_position, Z_UNIT_VECTOR))
molecule_alignment_quaternion = quaternion_builder(
    molecule_alignment_vector, molecule_alignment_angle
)

check_structure_one = structure_one_generator(
    molecule_alignment_quaternion, structure
)
check_structure_two = structure_two_generator(
    stage, z_rotation_offset, check_structure_one, distance_multiplier
)

attempt_number = 0

if structure_checker(check_structure_one, check_structure_two, structure, 0):
    print(structure.molecule_name, "Warning: Overlap detected, working on fix...")
    check_structure_one, check_structure_two, attempt_number = overlap_handler(
        check_structure_one,
        check_structure_two,
        stage,
        structure,
        z_rotation_offset,
        distance_multiplier,
    )

final_structure = final_structure_generator(
    check_structure_one,
    check_structure_two,
    stage,
    structure,
    attempt_number,
    distance_multiplier,
)

with open(
    str(structure.molecule_name) + "-" + stage + "-reflect.xyz",
    "w",
    newline="",
) as finalXYZ:
    headerstart = str(structure.get_num_atoms() * 2 - 1) + "\n"
    np.savetxt(finalXYZ, final_structure, fmt="%s", header=headerstart, comments="")