In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
import numpy.typing as npt
import csv
from tkinter import Tk
from tkinter import filedialog

In [None]:
def smiles_to_molecule(smiles_string: str) -> tuple[list[str], npt.NDArray]:
    """
    Convert a SMILES string to a 3D molecule.

    ### Parameters
    1. smiles_string : str
        - Any valid SMILES string

    ### Returns
    1. atom_identities : list[str]
        - A list of all of the atomic symbols
    2. atom_coords : NDArray
        - A numpy array containing lists of XYZ coordinates for each atom
    """

    molecule = Chem.AddHs(Chem.MolFromSmiles(smiles_string))
    AllChem.EmbedMolecule(molecule, AllChem.ETKDGv3())

    xyz_string = Chem.rdmolfiles.MolToXYZBlock(molecule)
    molecule_xyz = [i.split() for i in xyz_string.split("\n")[2:-1]]

    atom_identities = [str(i[0]) for i in molecule_xyz]
    atom_coords = np.array([[float(j) for j in i[1:]] for i in molecule_xyz])

    return atom_identities, atom_coords

In [None]:
def substructure_match(smiles_string: str) -> tuple[int, list[int]]:
    """
    Generate an RDKit molecule and search the structure for protonated atoms.
    
    ### Parameters
    1. smiles_string : str
        - Any valid SMILES string

    ### Returns
    1. protonated_atom_index : int
        - The line number of the atom with an extra proton
    2. proton_position_indices : list[int]
        - A list containing any protons attached to the parent charge site
    """

    molecule = Chem.AddHs(Chem.MolFromSmiles(smiles_string))
    params = AllChem.ETKDGv3()
    AllChem.EmbedMolecule(molecule, params)

    charged_atom = Chem.MolFromSmarts("[#7H+,#7H2+,#7H3+,#8H+,#8H2+]")

    proton_atom = Chem.MolFromSmarts(
        "[$([#1][#7H+]),$([#1][#7H2+]),$([#1][#7H3+]),$([#1][#8H+]),$([#1][#8H2+])]"
    )

    proton_position = molecule.GetSubstructMatches(proton_atom)
    parent_position = molecule.GetSubstructMatches(charged_atom)

    protonated_atom_index: int = parent_position[0][0]
    proton_position_indices: list[int] = [i[0] for i in proton_position]

    return protonated_atom_index, proton_position_indices

In [None]:
class Molecule:
    """
    Class that contains all molecule data

    ### Parameters
    1. smiles_string : str, (required)
        - Any valid SMILES string
    2. molecule_name : str, (default=None)
        - The molecule name that will be used for naming output XYZ and INP files
    3. atom_identities : list[str], (default=None)
        - List of atomic symbols for a given molecule
    4. atom_coords : NDArray, (default=None)
        - A numpy array containing the molecule's coordinates in the same order as `atom_identities`
    5. protonated_atom_index : int, (default=None)
        - The line index of the atom that carries an extra proton
    6. proton_position_indices : list[int], (default=None)
        - A list of the line indexes for each proton attached to the protonated atom
    """

    def __init__(
        self,
        smiles_string: str,
        molecule_name: str = None,
        atom_identities: list[str] = None,
        atom_coords: npt.NDArray = None,
        protonated_atom_index: int = None,
        proton_position_indices: list[int] = None,
    ):
        self.smiles_string = smiles_string
        self.molecule_name = molecule_name if molecule_name is not None else "bean"
        self.atom_identities = (
            atom_identities
            if atom_identities is not None
            else smiles_to_molecule(self.smiles_string)[0]
        )
        self.atom_coords = (
            atom_coords
            if atom_coords is not None
            else smiles_to_molecule(self.smiles_string)[1]
        )
        self.protonated_atom_index = (
            protonated_atom_index
            if protonated_atom_index is not None
            else substructure_match(self.smiles_string)[0]
        )
        self.proton_position_indices = (
            proton_position_indices
            if proton_position_indices is not None
            else substructure_match(self.smiles_string)[1]
        )
        self.translated_atom_coords = (
            self.atom_coords - self.atom_coords[self.protonated_atom_index]
        )

    def get_proton_position(self, attempt_number: int) -> npt.NDArray:
        """
        Get the line number for the proton requested.

        ### Parameters
        1. attempt_number : int
            - A number from 0-2 (3 tries max).
        
        ### Returns
        1. NDArray
            - The XYZ coordinates of the requested proton.
        """
        return self.translated_atom_coords[
            int(self.proton_position_indices[attempt_number])
        ]

    def get_protonated_atom_position(self) -> npt.NDArray:
        """Get the XYZ coordinates of the parent charge site."""
        return self.translated_atom_coords[self.protonated_atom_index]

    def get_num_atoms(self) -> int:
        """Get the number of atoms in the molecule."""
        return len(self.atom_identities)

In [None]:
# Set commonly used variables

z_rotation_offset = np.array(
    [
        [-1, 0, 0], 
        [0, -1, 0], 
        [0,  0, 1]
    ],
    dtype=float
)

# Array of values for the distances between parent atoms
DISTANCE_MULTIPLIERS = [2.7, 2.8, 2.9, 3.0, 3.1, 3.2]

Z_UNIT_VECTOR = np.array([0.0, 0.0, 1.0])

Z_REFLECTOR = np.array(
    [
        [1, 0, 0],
        [0, 1, 0],
        [0, 0,-1]
    ],
    dtype=float)

IDENTITY_MATRIX = np.array(
    [
        [1, 0, 0],
        [0, 1, 0],
        [0, 0, 1]
    ],
    dtype=float)

In [None]:
def multiple_input() -> tuple[list, str]:
    """
    Read CSV files with multiple molecules and specify folder containing XYZ files

    ### Returns
    1. multiple_molecule_data : list
        - A list of lists each containing data about a molecule
    2. xyz_file_path : str
        - The path to a directory containing XYZ files for reading, will also write to this file
    """

    root = Tk()
    root.wm_attributes("-topmost", 1)
    root.withdraw()

    file_name = filedialog.askopenfilename(
        parent=root,
        initialdir="",
        title="Select a CSV",
        filetypes=(("CSV Files", "*.csv"), ("All files", "*")),
    )

    multiple_molecule_data = []

    with open(file_name, newline="") as csvfile:
        data = csv.reader(csvfile)
        for row in data:
            multiple_molecule_data.append(row)

    xyz_file_path = (
        filedialog.askdirectory(initialdir="", title="Select XYZ File Parent Directory")
        + "/"
    )

    return multiple_molecule_data[1:], xyz_file_path

In [None]:
def input_generator(
    molecule_name: str,
    stage: str,
    final_structure: list[list[str]],
    save_directory: str = "",
):
    """
    Generate an ORCA input file.

    ### Parameters
    1. molecule_name : str
        - The name of the molecule.
    2. stage : str
        - Either 'R' or 'P' for 'Reactant' or 'Product'.
    3. final_structure : list[list[str]]
        - The structure of the final assembled molecule.
    4. save_directory : str, (default="")
        - The directory that the input file will be saved to
    """
    input_specs = (
        "!Opt wB97X-D3BJ def2-TZVP LargePrint\n"
        "%geom\n"
        "\tMaxIter 200\n"
        "end\n"
        "%pal\n"
        "\tnprocs 20\n"
        "end\n"
        "* xyz 1 1"
    )

    end_line = "*\n"

    file_name = molecule_name + "-" + stage

    with open(save_directory + file_name + ".inp", "w", newline="\n") as input_file:
        np.savetxt(
            input_file,
            final_structure,
            fmt="%s",
            header=input_specs,
            footer=end_line,
            comments="",
        )

In [None]:
def normalize(vector: npt.NDArray) -> npt.NDArray:
    """Determine the unit vector for a given vector."""

    norm = np.linalg.norm(vector)

    if norm != 0:
        return vector / norm
    else:
        return vector

In [None]:
def quaternion_builder(
    vector: npt.ArrayLike,
    angle: float,
) -> npt.NDArray:
    """
    Build a quaternion matrix.
    
    ### Parameters
    1. vector : ArrayLike
        - The vector to rotate around (can be list or NDArray)
    2. angle : float
        - The angle to rotate by (in radians)
    
    ### Returns
    1. quaternion_matrix : NDArray
        - A quaternion matrix that rotates by `angle` around `vector`
    """

    quaternion = [
        np.cos(angle / 2),
        vector[0] * np.sin(angle / 2),
        vector[1] * np.sin(angle / 2),
        vector[2] * np.sin(angle / 2),
    ]

    # The normalized quaternion vector
    norm_quat = normalize(quaternion)

    quaternion_matrix = np.array(
        [
            [
                1 - 2 * (norm_quat[2] ** 2 + norm_quat[3] ** 2),
                2 * (norm_quat[1] * norm_quat[2] - norm_quat[0] * norm_quat[3]),
                2 * (norm_quat[1] * norm_quat[3] + norm_quat[0] * norm_quat[2]),
            ],
            [
                2 * (norm_quat[1] * norm_quat[2] + norm_quat[0] * norm_quat[3]),
                1 - 2 * (norm_quat[1] ** 2 + norm_quat[3] ** 2),
                2 * (norm_quat[2] * norm_quat[3] - norm_quat[0] * norm_quat[1]),
            ],
            [
                2 * (norm_quat[1] * norm_quat[3] - norm_quat[0] * norm_quat[2]),
                2 * (norm_quat[2] * norm_quat[3] + norm_quat[0] * norm_quat[1]),
                1 - 2 * (norm_quat[1] ** 2 + norm_quat[2] ** 2),
            ],
        ]
    )

    return quaternion_matrix

In [None]:
def vector_angle(vector_one: npt.ArrayLike, vector_two: npt.ArrayLike) -> np.float64:
    """Calculate the angle (in radians) between two vectors."""

    unit_vector_one = normalize(vector_one)
    unit_vector_two = normalize(vector_two)

    angle = np.arccos(np.clip(np.dot(unit_vector_one, unit_vector_two), -1.0, 1.0))

    return angle

In [None]:
def distance_calculator(
    vector_one: npt.ArrayLike, vector_two: npt.ArrayLike
) -> np.float64:
    """Calculate the Euclidean distance between two vectors."""

    distance = np.absolute(
        np.sqrt(
            ((float(vector_one[0]) - float(vector_two[0])) ** 2)
            + ((float(vector_one[1]) - float(vector_two[1])) ** 2)
            + ((float(vector_one[2]) - float(vector_two[2])) ** 2)
        )
    )

    return distance

In [None]:
def xyz_file_read(file_path: str) -> tuple[list[str], npt.NDArray[np.float64]]:
    """
    Read in XYZ file format and return atomic symbols and coordinates
    
    ### Parameters
    1. file_path : str
        - Full path to an XYZ file.

    ### Returns
    1. atom_identities : list[str]
        - A list of all of the atomic symbols
    2. atom_coords : NDArray
        - A numpy array containing lists of XYZ coordinates for each atom
    """

    molecule_xyz = []

    with open(file_path) as f:
        for line in f:
            # Takes each line in the file minus the last character, which is just the \n
            line = line[:-1].split()
            if line:
                # If the line isn't empty then append to the molecule_xyz
                molecule_xyz.append(line)

    # List of all atomic symbols in molecule, in order from RDkit generated XYZ file
    atom_identities = [str(i[0]) for i in molecule_xyz[1:]]

    # Array with all atomic coordinates, without atomic symbols
    atom_coords = np.array([[float(j) for j in i[1:]] for i in molecule_xyz[1:]])

    return atom_identities, atom_coords

In [None]:
def nudge_matrix_generator(stage: str) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray]:
    """
    Creates the nudge matrices and vector for avoiding local minima during optimization.

    ### Parameters
    1. stage : str
        - Must be either `"R"` (Reactant) or `"P"` (Product) or `"T"` (Transition State)
    
    ### Returns
    1. nudge_rotate_x : NDArray
        - An array that rotates around the x-axis by +/- 15 degrees
    2. nudge_rotate_y : NDArray
        - An array that rotates around the y-axis by +/- 20 degrees 
    3. nudge_translate : NDArray
        - A vector that nudges the molecule by a given amount
    """

    # xv_degree (15 Degrees)
    xv_degree = np.pi / 12
    # xx_degree (20 Degrees)
    xx_degree = np.pi / 9

    nudge_translate = np.array([0, 0, 0])

    nudge_rotate_x = np.empty([3, 3], dtype=float)
    nudge_rotate_y = np.empty([3, 3], dtype=float)

    if stage == "R":
        nudge_rotate_x = np.array(
            [
                [1, 0, 0],
                [0, np.cos(xv_degree), -np.sin(xv_degree)],
                [0, np.sin(xv_degree), np.cos(xv_degree)],
            ],
            dtype=float,
        )

        nudge_rotate_y = np.array(
            [
                [np.cos(xx_degree), 0, np.sin(xx_degree)],
                [0, 1, 0],
                [-np.sin(xx_degree), 0, np.cos(xx_degree)],
            ],
            dtype=float,
        )
    elif stage == "P":
        nudge_rotate_x = np.array(
            [
                [1, 0, 0],
                [0, np.cos(-xv_degree), -np.sin(-xv_degree)],
                [0, np.sin(-xv_degree), np.cos(-xv_degree)],
            ],
            dtype=float,
        )

        nudge_rotate_y = np.array(
            [
                [np.cos(-xx_degree), 0, np.sin(-xx_degree)],
                [0, 1, 0],
                [-np.sin(-xx_degree), 0, np.cos(-xx_degree)],
            ],
            dtype=float,
        )

    return nudge_rotate_x, nudge_rotate_y, nudge_translate

In [None]:
def check_structure_one_generator(
    molecule_alignment_quaternion: npt.NDArray,
    mol: Molecule,
) -> list[list[float]]:
    """Align a molecule along Z axis and return the structure"""

    check_structure_one: list[list[float]] = []

    for atom_position in mol.translated_atom_coords:
        new_coord = np.dot(molecule_alignment_quaternion, atom_position)
        new_coord_rounded = [float(round(number, 6)) for number in new_coord]
        check_structure_one.append(new_coord_rounded.copy())

    return check_structure_one

In [None]:
def check_structure_two_generator(
    stage: str,
    z_rotation_offset: npt.NDArray,
    check_structure_one: npt.ArrayLike,
    distance_multiplier: float = 2.7,
) -> list[list[float]]:
    """
    Generate the second structure that the proton will be transferred to.
    
    ### Parameters
    1. stage : str
        - Must be either `"R"` (Reactant) or `"P"` (Product) or `"T"` (Transition State).
    2. z_rotation_offset : NDArray
        - A matrix that rotates around the Z-axis.
    3. check_structure_one : ArrayLike
        - The first structure generated from `check_structure_one_generator()`.
    4. distance_multiplier : float, (default=2.7)
        - The distance between the parent charge sites in Angstroms.

    ### Returns
    1. check_structure_two : list[list[float]]
        - The second structure that the proton will be transferred to
    """

    nudge_rotate_x, nudge_rotate_y, nudge_translate = nudge_matrix_generator(stage)

    check_structure_two: list[list[float]] = []

    for atom_position in check_structure_one:
        if stage == "R":
            new_coord = np.dot(
                nudge_rotate_x,
                (
                    np.dot(
                        nudge_rotate_y,
                        (
                            np.dot(
                                z_rotation_offset, (np.dot(Z_REFLECTOR, atom_position))
                            )
                        )
                        + (distance_multiplier * Z_UNIT_VECTOR),
                    )
                ),
            ) + (nudge_translate)
        elif stage == "P":
            new_coord = np.dot(
                nudge_rotate_x,
                (
                    np.dot(
                        nudge_rotate_y,
                        (
                            np.dot(
                                z_rotation_offset, (np.dot(Z_REFLECTOR, atom_position))
                            )
                        )
                        + (distance_multiplier * Z_UNIT_VECTOR),
                    )
                ),
            ) + ((-1) * nudge_translate)
        elif stage == "T":
            new_coord = np.dot(
                z_rotation_offset,
                (
                    np.dot(Z_REFLECTOR, atom_position)
                    + ((distance_multiplier - 0.1) * Z_UNIT_VECTOR)
                ),
            )
        new_coord_rounded = [float(round(number, 6)) for number in new_coord]
        check_structure_two.append(new_coord_rounded.copy())

    return check_structure_two

In [None]:
def structure_checker(
    check_structure_one: npt.ArrayLike,
    check_structure_two: npt.ArrayLike,
    mol: Molecule,
    attempt_number: int,
) -> bool:
    """
    Check for overlap between the two molecules
    
    ### Parameters
    1. check_structure_one : ArrayLike
        - The structure generated from `check_structure_one_generator()`.
    2. check_structure_two : ArrayLike
        - The structure generated from `check_structure_two_generator()`.
    3. mol : Molecule
        - A Molecule object
    4. attempt_number : int
        - The attempt number for this run, typical maximum is 2 (3 possible tries)

    ### Returns
    1. bool
        - True indicates that the structures have some overlap 
        (atoms within 1.5 Angstroms of each other).
        - False indicates there is no detected overlap.
    """

    n = mol.proton_position_indices[attempt_number]
    for i, atom_one in enumerate(check_structure_one):
        for j, atom_two in enumerate(check_structure_two):
            if (i == n) or (j == n - 1):
                continue
            # Only executes if not proton involved in transport
            if distance_calculator(atom_one, atom_two) < 1.5:
                return True
            else:
                continue
    # We checked all atoms, none overlapped
    return False

In [None]:
def final_structure_generator(
    check_structure_one: npt.ArrayLike,
    check_structure_two: npt.ArrayLike,
    stage: str,
    mol: Molecule,
    attempt_number: int,
    distance_multiplier: float = 2.7,
) -> list[list[str]]:
    """
    Generate the complete structure. Should only be used after checking for/handling overlap.
    
    ### Parameters
    1. check_structure_one : ArrayLike
        - The structure generated from `check_structure_one_generator()`.
    2. check_structure_two : ArrayLike
        - The structure generated from `check_structure_two_generator()`.
    3. stage : str
        - Must be either `"R"` (Reactant) or `"P"` (Product) or `"T"` (Transition State).
    4. mol : Molecule
        - A Molecule object
    5. attempt_number : int
        - The attempt number for this run, typical maximum is 2 (3 possible tries)
    6. distance_multiplier : float, (default=2.7)
        - The distance between the parent charge sites in Angstroms.

    ### Returns
    1. final_structure : list[list[str]]
        - The final assembled structure in a list of lists with the atomic symbol
        and the XYZ coordinates
    """

    new_structure_one = []
    new_structure_two = []

    for i in range(len(check_structure_one)):
        new_structure_one.append([mol.atom_identities[i]] + check_structure_one[i])
        new_structure_two.append([mol.atom_identities[i]] + check_structure_two[i])

    reactant_proton = new_structure_one.pop(
        mol.proton_position_indices[attempt_number]
    )
    product_proton = new_structure_two.pop(
        mol.proton_position_indices[attempt_number]
    )
    transition_proton = ["H", 0.0, 0.0, (distance_multiplier - 0.1) / 2]

    final_structure = new_structure_one + new_structure_two

    if stage == "R":
        final_structure.append(reactant_proton)
    elif stage == "P":
        final_structure.append(product_proton)
    elif stage == "T":
        final_structure.append(transition_proton)

    return final_structure

In [None]:
def overlap_handler(
    check_structure_one: npt.ArrayLike,
    check_structure_two: npt.ArrayLike,
    stage: str,
    mol: Molecule,
    z_rotation_offset: npt.NDArray,
    distance_multiplier: float = 2.7,
) -> tuple[list[list[float]], list[list[float]], int]:
    """
    Fix molecule overlap issues

    First tries to rotate the molecule around the Z-axis,
    if that fails, switches to a different proton position.

    ### Parameters
    1. check_structure_one : ArrayLike
        - The structure generated from `check_structure_one_generator()`.
    2. check_structure_two : ArrayLike
        - The structure generated from `check_structure_two_generator()`.
    3. stage : str
        - Must be either `"R"` (Reactant) or `"P"` (Product) or `"T"` (Transition State).
    4. mol : Molecule
        - A Molecule object
    5. z_rotation_offset : NDArray
        - A matrix that rotates around the Z-axis.
    6. distance_multiplier : float, (default=2.7)
        - The distance between the parent charge sites in Angstroms.

    ### Returns
    1. check_structure_one : list[list[float]]
        - A corrected version of `check_structure_one`
    2. check_structure_two : list[list[float]]
        - A corrected version of `check_structure_one`
    3. attempt_number : int
        - Analagous to the proton position used
    """

    proton_positionAttempts = len(mol.proton_position_indices)
    attempt_number = 0

    while (
        structure_checker(check_structure_one, check_structure_two, mol, attempt_number)
        and attempt_number < proton_positionAttempts
    ):
        z_rotation_offset = np.array([[-1, 0, 0], [0, -1, 0], [0, 0, 1]], dtype=float)

        proton_position = mol.translated_atom_coords[
            int(mol.proton_position_indices[attempt_number])
        ]

        molecule_alignment_angle = vector_angle(proton_position, Z_UNIT_VECTOR)
        molecule_alignment_vector = normalize(np.cross(proton_position, Z_UNIT_VECTOR))
        molecule_alignment_quaternion = quaternion_builder(
            molecule_alignment_vector, molecule_alignment_angle
        )

        check_structure_one = check_structure_one_generator(
            molecule_alignment_quaternion, mol
        )
        check_structure_two = check_structure_two_generator(
            stage, z_rotation_offset, check_structure_one, distance_multiplier
        )

        # If the structure works, then return it and exit the function
        if not structure_checker(
            check_structure_one, check_structure_two, mol, attempt_number
        ):
            print("Structure was fixed with proton position #" + str(attempt_number))
            return check_structure_one, check_structure_two, attempt_number

        i = 0
        while (
            structure_checker(
                check_structure_one, check_structure_two, mol, attempt_number
            )
            and i < 8
        ):
            rotationAngle = i * (np.pi / 4)

            z_rotation_offset = np.array(
                [
                    [np.cos(rotationAngle), -np.sin(rotationAngle), 0],
                    [np.sin(rotationAngle), np.cos(rotationAngle), 0],
                    [0, 0, 1],
                ],
                dtype=float,
            )

            check_structure_two = check_structure_two_generator(
                stage, z_rotation_offset, check_structure_one, distance_multiplier
            )

            if not structure_checker(
                check_structure_one, check_structure_two, mol, attempt_number
            ):
                print(
                    "Structure was fixed with proton position #"
                    + str(attempt_number)
                    + " with a rotation of "
                    + str(i * 45)
                    + " degrees"
                )
                return check_structure_one, check_structure_two, attempt_number

            i += 1

        attempt_number += 1
    print("Structure could not be fixed.")
    return check_structure_one, check_structure_two, attempt_number

In [None]:
# Structure generation for molecules with pre-existing structures

stage = "R"

distance_multiplier = DISTANCE_MULTIPLIERS[0]

multiple_molecule_data, xyz_file_path = multiple_input()

for molecule in multiple_molecule_data:
    molecule_name = molecule[5]
    atom_identities, atom_coords = xyz_file_read(xyz_file_path + molecule_name + ".xyz")

    molInformation = {
        "protonated_atom_index": int(molecule[1]),
        "proton_position_indices": [int(i) for i in molecule[2:5] if i != "NA"],
        "molecule_name": molecule_name,
        "atom_identities": atom_identities,
        "atom_coords": atom_coords,
    }

    structure = Molecule(molecule[0], **molInformation)

    attempt_number = 0

    proton_position = structure.get_proton_position(attempt_number)

    molecule_alignment_angle = vector_angle(proton_position, Z_UNIT_VECTOR)
    molecule_alignment_vector = normalize(np.cross(proton_position, Z_UNIT_VECTOR))
    molecule_alignment_quaternion = quaternion_builder(
        molecule_alignment_vector, molecule_alignment_angle
    )

    check_structure_one = check_structure_one_generator(
        molecule_alignment_quaternion, structure
    )
    check_structure_two = check_structure_two_generator(
        stage, z_rotation_offset, check_structure_one, distance_multiplier
    )

    print(molecule_name)

    attempt_number = 0

    if structure_checker(check_structure_one, check_structure_two, structure, 0):
        print("PANIC")
        check_structure_one, check_structure_two, attempt_number = overlap_handler(
            check_structure_one,
            check_structure_two,
            stage,
            structure,
            z_rotation_offset,
            distance_multiplier,
        )

    final_structure = final_structure_generator(
        check_structure_one,
        check_structure_two,
        stage,
        structure,
        attempt_number,
        distance_multiplier,
    )

    with open(str(molecule_name) + "-" + stage + ".xyz", "w", newline="") as FinalFile:
        headerstart = str(structure.get_num_atoms() * 2 - 1) + "\n"
        np.savetxt(
            FinalFile, final_structure, fmt="%s", header=headerstart, comments=""
        )


In [None]:
# TEST CELL

stage = "R"

distance_multiplier = DISTANCE_MULTIPLIERS[0]

multiple_molecule_data = []

with open("MoleculeTest.csv", newline="") as csvfile:
    data = csv.reader(csvfile)
    for row in data:
        multiple_molecule_data.append(row)

atom_identities, atom_coords = xyz_file_read("TestMolecule.xyz")

molInformation = {
    "protonated_atom_index": int(multiple_molecule_data[1][1]),
    "proton_position_indices": [
        int(i) for i in multiple_molecule_data[1][2:5] if i != "NA"
    ],
    "molecule_name": multiple_molecule_data[1][5],
    "atom_identities": atom_identities,
    "atom_coords": atom_coords,
}

structure = Molecule(multiple_molecule_data[1][0], **molInformation)

attempt_number = 0

proton_position = structure.get_proton_position(attempt_number)

molecule_alignment_angle = vector_angle(proton_position, Z_UNIT_VECTOR)
molecule_alignment_vector = normalize(np.cross(proton_position, Z_UNIT_VECTOR))
molecule_alignment_quaternion = quaternion_builder(
    molecule_alignment_vector, molecule_alignment_angle
)

check_structure_one = check_structure_one_generator(
    molecule_alignment_quaternion, structure
)
check_structure_two = check_structure_two_generator(
    stage, z_rotation_offset, check_structure_one, distance_multiplier
)

attempt_number = 0

if structure_checker(check_structure_one, check_structure_two, structure, 0):
    print(structure.molecule_name, "Warning: Overlap detected, working on fix...")
    check_structure_one, check_structure_two, attempt_number = overlap_handler(
        check_structure_one,
        check_structure_two,
        stage,
        structure,
        z_rotation_offset,
        distance_multiplier,
    )

final_structure = final_structure_generator(
    check_structure_one,
    check_structure_two,
    stage,
    structure,
    attempt_number,
    distance_multiplier,
)

with open(
    str(structure.molecule_name) + "-" + stage + ".xyz", "w", newline=""
) as FinalFile:
    headerstart = str(structure.get_num_atoms() * 2 - 1) + "\n"
    np.savetxt(FinalFile, final_structure, fmt="%s", header=headerstart, comments="")

In [None]:
# Molecule generation with only SMILES

stage = "R"

distance_multiplier = DISTANCE_MULTIPLIERS[0]

multiple_molecule_data, xyz_file_path = multiple_input()

for molecule in multiple_molecule_data:
    structure = Molecule(molecule[0], **{"molecule_name": molecule[1]})

    attempt_number = 0

    proton_position = structure.get_proton_position(attempt_number)

    molecule_alignment_angle = vector_angle(proton_position, Z_UNIT_VECTOR)
    molecule_alignment_vector = normalize(np.cross(proton_position, Z_UNIT_VECTOR))
    molecule_alignment_quaternion = quaternion_builder(
        molecule_alignment_vector, molecule_alignment_angle
    )

    check_structure_one = check_structure_one_generator(
        molecule_alignment_quaternion, structure
    )
    check_structure_two = check_structure_two_generator(
        stage, z_rotation_offset, check_structure_one, distance_multiplier
    )

    attempt_number = 0

    if structure_checker(check_structure_one, check_structure_two, structure, 0):
        print(structure.molecule_name, "Overlap detected, working on fix...")
        check_structure_one, check_structure_two, attempt_number = overlap_handler(
            check_structure_one,
            check_structure_two,
            stage,
            structure,
            z_rotation_offset,
            distance_multiplier,
        )

    final_structure = final_structure_generator(
        check_structure_one,
        check_structure_two,
        stage,
        structure,
        attempt_number,
        distance_multiplier,
    )

    with open(
        xyz_file_path + str(structure.molecule_name) + "-" + stage + ".xyz",
        "w",
        newline="",
    ) as finalXYZ:
        headerstart = str(structure.get_num_atoms() * 2 - 1) + "\n"
        np.savetxt(finalXYZ, final_structure, fmt="%s", header=headerstart, comments="")

    input_generator(structure.molecule_name, stage, final_structure, xyz_file_path)

with open(xyz_file_path + "run.sh", "w", newline="\n") as batch:
    for file in multiple_molecule_data:
        batch.write(
            "$HOME/orca/orca "
            + file[1]
            + "-"
            + stage
            + ".inp > "
            + file[1]
            + "-"
            + stage
            + ".out\n"
        )