In [2]:
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
import numpy.typing as npt
import csv
from tkinter import Tk
from tkinter import filedialog
from copy import deepcopy
from dataclasses import dataclass

In [3]:
#smilesString = input("Input SMILES Identifier: ") # Pulls the SMILES string for the molecule we are examining
smilesString = 'N#Cc1[nH][nH+]nc(C#N)1'
molecule = Chem.AddHs(Chem.MolFromSmiles(smilesString)) # Takes the SMILES string in and puts out a molecule then adds the hydrogens to the molecule
params = AllChem.ETKDGv3() # Pulls creation parameters from ETKDGv3 set in RDkit
AllChem.EmbedMolecule(molecule, params) # Puts the molecule into 3 dimensions

xyzString = Chem.rdmolfiles.MolToXYZBlock(molecule)

numAtoms = int(xyzString.split()[0])
moleculeXYZ = [i.split() for i in xyzString.split("\n")[2:-1]]

atomIdentities = [str(i[0]) for i in moleculeXYZ] # List of all atomic symbols in molecule, in order from RDkit generated XYZ file
atomCoords = np.array([[float(j) for j in i[1:]] for i in moleculeXYZ]) # Array with all atomic coordinates, without atomic symbols

In [4]:
# substructureMatch function takes the RDkit molecule and searches the structure for protonated atoms

def substructureMatch(
        smilesString: str
        ):

    molecule = Chem.AddHs(Chem.MolFromSmiles(smilesString)) # Takes the SMILES string in and puts out a molecule then adds the hydrogens to the molecule
    params = AllChem.ETKDGv3() # Pulls creation parameters from ETKDGv3 set in RDkit
    AllChem.EmbedMolecule(molecule, params) # Puts the molecule into 3 dimensions

    chargedAtom = Chem.MolFromSmarts("[#7H+,#7H2+,#7H3+,#8H+,#8H2+]") # These are the smarts codes for protonated tertiary, secondary, and primary amines, as well as protonated carbonyls or ethers, and protonated alcohols
    protonAtom = Chem.MolFromSmarts("[$([#1][#7H+]),$([#1][#7H2+]),$([#1][#7H3+]),$([#1][#8H+]),$([#1][#8H2+])]") # This detects the protons that are connected to those parent charge sites using recursive SMARTS
    protonPosition = molecule.GetSubstructMatches(protonAtom)
    parentPosition = molecule.GetSubstructMatches(chargedAtom)

    protonatedAtomIndex: int = parentPosition[0][0] # Row index for protonated atom
    protonPositionIndeces = [i[0] for i in protonPosition] # List of indeces for the protons attached to the protonated atom

    return protonatedAtomIndex, protonPositionIndeces

In [5]:
# Sets commonly used variables

zRotationOffset = np.array([[-1, 0, 0], 
                            [0, -1, 0], 
                            [0,  0, 1]], dtype=float) # Rotates by 180 degrees about X axis

DISTANCE_MULTIPLIERS = [2.7, 2.8, 2.9, 3.0, 3.1, 3.2] # Array of values for the distances between parent atoms

Z_UNIT_VECTOR = np.array([0, 0, 1])

Z_REFLECTOR = np.array([[1, 0, 0],
                        [0, 1, 0],
                        [0, 0,-1]], dtype=float) # Reflects across the XY plane


In [6]:
# multipleInput function for reading CSVs of information in place of specifying individual SMILES

def multipleInput():

    Tk().withdraw()
    filename = filedialog.askopenfilename()

    moleculeData = []

    with open(filename, newline='') as csvfile:
        data = csv.reader(csvfile)
        for row in data:
            moleculeData.append(row)

    Tk().withdraw()
    xyzFilePath = filedialog.askdirectory() + '/'

    return moleculeData[1:], xyzFilePath

In [7]:
# normalize function takes in a vector and returns the corresponding unit vector

def normalize(
        vector: npt.NDArray
        ):

    norm = np.linalg.norm(vector) # Returns the norm of the inputted vector

    if norm == 0: # If the norm is zero there will be a divide by zero error, this checks for that
        norm + 1 # If the norm is indeed zero, this adds 1 to it, thereby eliminating the divide by zero error, and avoids changing the vector magnitude
    
    return vector/norm # Returns the unit vector

In [8]:
# quaternionBuilder function that takes a 3D vector to rotate around and an angle and generates a rotation matrix

def quaternionBuilder(
        vector: npt.ArrayLike, 
        angle: npt.DTypeLike
        ):

    quaternion = [np.cos(angle/2), vector[0] * np.sin(angle/2), vector[1] * np.sin(angle/2), vector[2] * np.sin(angle/2)] # Defines the quaternion vector

    normalizedQuaternion = normalize(quaternion)

    quaternionMatrix = np.array([[1 - 2 * (normalizedQuaternion[2]**2 + normalizedQuaternion[3]**2), 2 * (normalizedQuaternion[1] * normalizedQuaternion[2] - normalizedQuaternion[0] * normalizedQuaternion[3]), 2 * (normalizedQuaternion[1] * normalizedQuaternion[3] + normalizedQuaternion[0] * normalizedQuaternion[2])],
                                 [2 * (normalizedQuaternion[1] * normalizedQuaternion[2] + normalizedQuaternion[0] * normalizedQuaternion[3]), 1 - 2 * (normalizedQuaternion[1]**2 + normalizedQuaternion[3]**2), 2 * (normalizedQuaternion[2] * normalizedQuaternion[3] - normalizedQuaternion[0] * normalizedQuaternion[1])],
                                 [2 * (normalizedQuaternion[1] * normalizedQuaternion[3] - normalizedQuaternion[0] * normalizedQuaternion[2]), 2 * (normalizedQuaternion[2] * normalizedQuaternion[3] + normalizedQuaternion[0] * normalizedQuaternion[1]), 1 - 2 * (normalizedQuaternion[1]**2 + normalizedQuaternion[2]**2)]])

    return quaternionMatrix

In [9]:
# vectorAngle function determines the angle between two vectors

def vectorAngle(
        vectorOne: npt.ArrayLike, 
        vectorTwo: npt.ArrayLike
        ):

    unitVectorOne = normalize(vectorOne)
    unitVectorTwo = normalize(vectorTwo)

    angle = np.arccos(np.clip(np.dot(unitVectorOne, unitVectorTwo), -1.0, 1.0)) # Takes the arccos of the dot product of the two vectors, the clip part ensures that vectors which are already either aligned or orthogonal do not return NaN

    return angle

In [10]:
# distanceCalculator calculates the Euclidean distance between two vectors

def distanceCalculator(
        vectorOne: npt.ArrayLike,
        vectorTwo: npt.ArrayLike
        ):

    distance = np.absolute(np.sqrt(((float(vectorOne[0]) - float(vectorTwo[0]))**2) + ((float(vectorOne[1]) - float(vectorTwo[1]))**2) + ((float(vectorOne[2]) - float(vectorTwo[2]))**2)))

    return distance # Returns the distance

In [11]:
# xyzFileRead function reads an XYZ file and puts it into a list of lists, each containing a single atom's information

def xyzFileRead(
        filePath: str
        ):

    #Tk().withdraw()
    #filePath = filedialog.askopenfilename()

    moleculeXYZ = [] #Creating an array for the coordinates to go in

    with open(filePath) as f:
        for line in f:
            line = line[:-1].split() # Takes each line in the file minus the last character, which is just the \n
            if line: # If the line isn't empty (i.e. the second line and any trailing lines are usually empty) then append to the moleculeXYZ
                moleculeXYZ.append(line) 

    atomIdentities = [str(i[0]) for i in moleculeXYZ[1:]] # List of all atomic symbols in molecule, in order from RDkit generated XYZ file
    atomCoords = np.array([[float(j) for j in i[1:]] for i in moleculeXYZ[1:]]) # Array with all atomic coordinates, without atomic symbols
    numAtoms = len(atomIdentities)

    return atomIdentities, atomCoords, numAtoms

In [12]:
# nudgeMatrixGenerator function creates the nudge matrices and vector for avoiding local minima during optimization

def nudgeMatrixGenerator(stage):

    nudgeTranslate = np.array([0, 0, 0])
    
    nudgeRotateX = np.empty([3,3], dtype=float)
    nudgeRotateY = np.empty([3,3], dtype=float)

    if stage == 'R':
        nudgeRotateX = np.array([[1, 0, 0],
                                 [0, np.cos(np.pi / 12), -np.sin(np.pi / 12)], 
                                 [0, np.sin(np.pi / 12),  np.cos(np.pi / 12)]], dtype=float) # 

        nudgeRotateY = np.array([[np.cos(np.pi / 9), 0,  np.sin(np.pi / 9)],
                                 [0, 1, 0], 
                                 [-np.sin(np.pi / 9), 0, np.cos(np.pi / 9)]], dtype=float)
    elif stage == 'P':
        nudgeRotateX = np.array([[1, 0, 0],
                                 [0, np.cos(-np.pi / 12), -np.sin(-np.pi / 12)], 
                                 [0, np.sin(-np.pi / 12),  np.cos(-np.pi / 12)]], dtype=float)

        nudgeRotateY = np.array([[np.cos(-np.pi / 9), 0,  np.sin(-np.pi / 9)],
                                 [0, 1, 0], 
                                 [-np.sin(-np.pi / 9), 0, np.cos(-np.pi / 9)]], dtype=float)
        
    return nudgeRotateX, nudgeRotateY, nudgeTranslate

In [13]:
# checkStructureOneGenerator function generates the first molecule's coordinates, aligns the proton and the parent atom along the Z-axis

def checkStructureOneGenerator(
        moleculeAlignmentQuaternion: npt.NDArray, 
        translatedAtomCoords: npt.NDArray,
        numAtoms: int
        ):

    checkStructureOne = [] # Creates a new empty array for the first structure to go in

    for z in range(numAtoms): # Begins a for loop to iterate through all the atoms in the first molecule
        atomPosition = translatedAtomCoords[z]
        newCoord = np.dot(moleculeAlignmentQuaternion, atomPosition) # Applies the rotation matrix to the position of the atom
        newCoordRounded = [float(round(number,6)) for number in newCoord] # Truncates the coordinates to six decimals
        checkStructureOne.append(newCoordRounded.copy()) # Takes the first molecule and adds it into the first check structure array

    return checkStructureOne

In [14]:
# checkStructureTwoGenerator function generates the second structure, applies slight rotations to avoid optimizing into a local minimum structure

def checkStructureTwoGenerator(
        stage: str,
        distanceMultiplier: int,
        zRotationOffset: npt.NDArray,
        numAtoms: int,
        checkStructureOne: npt.ArrayLike
        ):

    nudgeRotateX, nudgeRotateY, nudgeTranslate = nudgeMatrixGenerator(stage)
    checkStructureTwo = [] # Creates a new empty array for the second structure to go in

    for z in range(numAtoms): # Begins a for loop to iterate through all the atoms in the second molecule
        atomPosition = checkStructureOne[z]
        if stage == 'R':
            newCoord = np.dot(nudgeRotateX, (np.dot(nudgeRotateY, (np.dot(zRotationOffset, (np.dot(Z_REFLECTOR, atomPosition)))) + (distanceMultiplier*Z_UNIT_VECTOR) ))) + (nudgeTranslate) # Applies the rotation operaor, then offsets the molecule by the specified distance, then rotates 180 degrees around the Z axis
        elif stage == 'P':
            newCoord = np.dot(nudgeRotateX, (np.dot(nudgeRotateY, (np.dot(zRotationOffset, (np.dot(Z_REFLECTOR, atomPosition)))) + (distanceMultiplier*Z_UNIT_VECTOR) ))) + ((-1)*nudgeTranslate) # Applies the rotation operaor, then offsets the molecule by the specified distance, then rotates 180 degrees around the Z axis
        elif stage == 'T':
            newCoord = np.dot(zRotationOffset, (np.dot(Z_REFLECTOR, atomPosition) + ((distanceMultiplier-0.1) * Z_UNIT_VECTOR)))
        
        newCoordRounded = [float(round(number,6)) for number in newCoord] # Truncates the coordinates to six decimals
        checkStructureTwo.append(newCoordRounded.copy()) # Appends the new line to the bottom of the array with the second check structure

    return checkStructureTwo

In [15]:
# structureChecker function checks the two molecules' positions to see if there is any undesired atom overlap

def structureChecker(
        checkStructureOne: npt.ArrayLike, 
        checkStructureTwo: npt.ArrayLike,
        protonPositionIndeces: list,
        attemptNumber: int,
        ):

    structureOne = deepcopy(checkStructureOne)
    structureTwo = deepcopy(checkStructureTwo)

    structureOne.pop(protonPositionIndeces[attemptNumber]) # Deletes the proton from the first check structure
    structureTwo.pop(protonPositionIndeces[attemptNumber]-1) # Deletes the proton from the second check structure

    values = [] # Starts a list that will be used to append all of the atom-atom distances for checking the structure placement

    for structureTwoAtom in structureTwo: # Iterates through all of the atoms in the second check structure
        for structureOneAtom in structureOne: # Starts iterating through all of the atoms in the first check structure
            distance = distanceCalculator(structureOneAtom, structureTwoAtom) # Calculates the distance between the two atoms
            values.append(distance) # Appends the distances to the values array

    return any([i<1.5 for i in values])

In [16]:
# finalStructureGenerator function creates the final structure

def finalStructureGenerator(
        checkStructureOne: npt.ArrayLike, 
        checkStructureTwo: npt.ArrayLike, 
        distanceMultiplier: int,
        stage: str,
        protonPositionIndeces: list,
        attemptNumber: int
        ):

    structureOne = deepcopy(checkStructureOne)
    structureTwo = deepcopy(checkStructureTwo)

    newStructureOne = []
    newStructureTwo = []

    for i in range(len(structureOne)):
        structureOne[i].insert(0, atomIdentities[i])
        newStructureOne.append(structureOne[i])
        structureTwo[i].insert(0, atomIdentities[i])
        newStructureTwo.append(structureTwo[i])

    reactantProton = newStructureOne.pop(protonPositionIndeces[attemptNumber]) # Saves the position of the first proton of interest
    productProton = newStructureTwo.pop(protonPositionIndeces[attemptNumber]) # Saves the position of the second proton of interest
    transitionProton = ['H', 0., 0., (distanceMultiplier-0.1)/2]

    finalStructure = newStructureOne + newStructureTwo

    if stage == 'R': # Checks if making reactant or product
        finalStructure.append(reactantProton) # Appends the reactant proton to the end of the list
    elif stage == 'P': # Checks if making reactant or product
        finalStructure.append(productProton) # Appends the product proton to the end of the list
    elif stage == 'T':
        finalStructure.append(transitionProton)

    return finalStructure

In [17]:
# Handles structure overlap issues

def overlapHandler(
        checkStructureOne: npt.ArrayLike,
        checkStructureTwo: npt.ArrayLike,
        stage: str,
        distanceMultiplier: float,
        translatedAtomCoords: npt.NDArray,
        protonPositionIndeces: list,
        numAtoms: int,
        zRotationOffset: npt.NDArray
        ):

    protonPositionAttempts = len(protonPositionIndeces)
    attemptNumber = 0

    while structureChecker(checkStructureOne, checkStructureTwo, protonPositionIndeces, attemptNumber) == True and attemptNumber < protonPositionAttempts:

        zRotationOffset = np.array([[-1, 0, 0], 
                                    [0, -1, 0], 
                                    [0,  0, 1]], dtype=float) # Rotates by 180 degrees about X axis

        protonPosition = translatedAtomCoords[int(protonPositionIndeces[attemptNumber])]

        moleculeAlignmentAngle = vectorAngle(protonPosition, Z_UNIT_VECTOR)
        moleculeAlignmentVector = normalize(np.cross(protonPosition, Z_UNIT_VECTOR))
        moleculeAlignmentQuaternion = quaternionBuilder(moleculeAlignmentVector, moleculeAlignmentAngle)

        checkStructureOne = checkStructureOneGenerator(moleculeAlignmentQuaternion, translatedAtomCoords, numAtoms)
        checkStructureTwo = checkStructureTwoGenerator(stage, distanceMultiplier, zRotationOffset, numAtoms, checkStructureOne)
        
        # If the structure works, then return it and exit the function
        if structureChecker(checkStructureOne, checkStructureTwo, protonPositionIndeces, attemptNumber) == False:
            print('Structure was fixed with proton position #'+str(attemptNumber))
            return checkStructureOne, checkStructureTwo, attemptNumber

        i = 0
        while structureChecker(checkStructureOne, checkStructureTwo, protonPositionIndeces, attemptNumber) == True and i < 8:

            rotationAngle = i * (np.pi/4)

            zRotationOffset = np.array([[np.cos(rotationAngle), -np.sin(rotationAngle), 0], 
                                        [np.sin(rotationAngle),  np.cos(rotationAngle), 0], 
                                        [0, 0, 1]], dtype=float)
            
            checkStructureTwo = checkStructureTwoGenerator(stage, distanceMultiplier, zRotationOffset, numAtoms, checkStructureOne)

            if structureChecker(checkStructureOne, checkStructureTwo, protonPositionIndeces, attemptNumber) == False:
                print('Structure was fixed with proton position #'+str(attemptNumber)+' with a rotation of '+str(i*45)+' degrees')
                return checkStructureOne, checkStructureTwo, attemptNumber

            i += 1
        
        attemptNumber += 1
    print('Structure could not be fixed.')
    return checkStructureOne, checkStructureTwo, attemptNumber

In [None]:
# This is for if you want to run structure generation from a CSV with multiple molecules

#
# DO NOT USE DURING TESTING
#

# stage = input('Input stage (R, P, T): ')
stage = 'R'

distanceMultiplier = DISTANCE_MULTIPLIERS[0]

moleculeData, xyzFilePath = multipleInput()

for molecule in moleculeData:

    protonatedAtomIndex = int(molecule[1])
    protonPositionIndeces = [int(i) for i in molecule[2:5] if i != 'NA']
    moleculeNumber = molecule[5]

    atomIdentities, atomCoords, numAtoms = xyzFileRead(xyzFilePath+moleculeNumber+'.xyz')

    protonatedAtomPosition = atomCoords[protonatedAtomIndex] # Original position of parent atom
    translatedAtomCoords = atomCoords - protonatedAtomPosition # Puts the parent atom at (0, 0, 0) and translates all atoms to be relative to it
    protonPosition = translatedAtomCoords[int(protonPositionIndeces[0])] # Coordinates for the proton involved in transport

    moleculeAlignmentAngle = vectorAngle(protonPosition, Z_UNIT_VECTOR)
    moleculeAlignmentVector = normalize(np.cross(protonPosition, Z_UNIT_VECTOR))
    moleculeAlignmentQuaternion = quaternionBuilder(moleculeAlignmentVector, moleculeAlignmentAngle)

    checkStructureOne = checkStructureOneGenerator(moleculeAlignmentQuaternion, translatedAtomCoords, numAtoms)

    checkStructureTwo = checkStructureTwoGenerator(stage, distanceMultiplier, zRotationOffset, numAtoms, checkStructureOne)

    print(moleculeNumber)

    attemptNumber = 0

    if structureChecker(checkStructureOne, checkStructureTwo, protonPositionIndeces, 0):
        print("PANIC")
        checkStructureOne, checkStructureTwo, attemptNumber = overlapHandler(
                                                                checkStructureOne, 
                                                                checkStructureTwo, 
                                                                stage, 
                                                                distanceMultiplier, 
                                                                translatedAtomCoords,
                                                                protonPositionIndeces, 
                                                                numAtoms, 
                                                                zRotationOffset
                                                                )

    finalStructure = finalStructureGenerator(checkStructureOne, checkStructureTwo, 
                                             distanceMultiplier, stage, protonPositionIndeces, attemptNumber)

    with open(str(moleculeNumber)+'-'+stage+'.xyz', 'w', newline='') as FinalFile: # creates a text file and specifies that it is supposed to be written to
        headerstart = str(numAtoms*2-1) + '\n' # Determines the number of atoms and converts to a string
        np.savetxt(FinalFile, finalStructure, fmt='%s', header=headerstart,comments='') # Saves the numpy array as a txt file for reading later.



In [21]:
# This is for if you want to run structure generation from a CSV with multiple molecules

stage = 'R'

distanceMultiplier = DISTANCE_MULTIPLIERS[0]

moleculeData, xyzFilePath = multipleInput()

protonatedAtomIndex = int(moleculeData[0][1])
protonPositionIndeces = [int(i) for i in moleculeData[0][2:5] if i != 'NA']
moleculeNumber = moleculeData[0][5]

atomIdentities, atomCoords, numAtoms = xyzFileRead(xyzFilePath+moleculeNumber+'.xyz')

protonatedAtomPosition = atomCoords[protonatedAtomIndex] # Original position of parent atom
translatedAtomCoords = atomCoords - protonatedAtomPosition # Puts the parent atom at (0, 0, 0) and translates all atoms to be relative to it
protonPosition = translatedAtomCoords[int(protonPositionIndeces[0])] # Coordinates for the proton involved in transport

moleculeAlignmentAngle = vectorAngle(protonPosition, Z_UNIT_VECTOR)
moleculeAlignmentVector = normalize(np.cross(protonPosition, Z_UNIT_VECTOR))
moleculeAlignmentQuaternion = quaternionBuilder(moleculeAlignmentVector, moleculeAlignmentAngle)

checkStructureOne = checkStructureOneGenerator(moleculeAlignmentQuaternion, translatedAtomCoords, numAtoms)

checkStructureTwo = checkStructureTwoGenerator(stage, distanceMultiplier, zRotationOffset, numAtoms, checkStructureOne.copy())

print(moleculeNumber)

if structureChecker(checkStructureOne, checkStructureTwo, protonPositionIndeces, 0):
    print("PANIC")
    checkStructureOne, checkStructureTwo, attemptNumber = overlapHandler(
                                                checkStructureOne, 
                                                checkStructureTwo, 
                                                stage, 
                                                distanceMultiplier, 
                                                translatedAtomCoords, 
                                                protonPositionIndeces, 
                                                numAtoms, 
                                                zRotationOffset
                                                )

finalStructure = finalStructureGenerator(checkStructureOne, checkStructureTwo, 
                                            distanceMultiplier, stage, protonPositionIndeces, attemptNumber)

with open('Test-'+stage+'.xyz', 'w', newline='') as FinalFile: # creates a text file and specifies that it is supposed to be written to
    headerstart = str(numAtoms*2-1) + '\n' # Determines the number of atoms and converts to a string
    np.savetxt(FinalFile, finalStructure, fmt='%s', header=headerstart,comments='') # Saves the numpy array as a txt file for reading later.

TestMolecule
PANIC
Structure was fixed with proton position #1


In [None]:
import easyxtb