In [3]:
import molli as ml
from pprint import pprint
import numpy as np
from tqdm import tqdm
from molli.math import rotation_matrix_from_vectors
import pandas as pd
from scipy.spatial.transform import Rotation

def set_origin(ml_mol:ml.Molecule,i: int):
    ml_mol.translate(-1*ml_mol.coords[i])

def rot_rz(ml_mol: ml.Molecule, c0:ml.Atom, c1: ml.Atom):
    v1 = ml_mol.vector(c0, c1)
    t_matrix = rotation_matrix_from_vectors(v1, np.array([1,0,0]))
    ml_mol.transform(t_matrix)

def align_diol(diol_alk_dict:dict, alk_mlib: ml.MoleculeLibrary, diol_mlib:ml.MoleculeLibrary, align_diol_mlib: ml.MoleculeLibrary):

    with alk_mlib.reading(), diol_mlib.reading(), align_diol_mlib.writing():
        for diol_name in tqdm(diol_mlib):

            ml_diol = diol_mlib[diol_name]
            diol_c0, diol_c1 = [ml_diol.get_atom(x) for x in ml_diol.attrib['C Order']]
            diol_c0_idx, diol_c1_idx = [ml_diol.get_atom_index(x) for x in [diol_c0, diol_c1]]

            diol_q_atoms = [ml_diol.get_atom(x) for x in ml_diol.attrib['Q1Q4 Order']]
            diol_q1a,diol_q4a = diol_q_atoms
            diol_q1a_idx, diol_q4a_idx = [ml_diol.get_atom_index(x) for x in diol_q_atoms]

            alk_name = diol_alk_dict[diol_name]

            ml_alk = alk_mlib[alk_name]
            alk_c0,alk_c1 = [ml_alk.get_atom(x) for x in ml_alk.attrib['C Order']]
            alk_c0_idx, alk_c1_idx = [ml_alk.get_atom_index(x) for x in [alk_c0,alk_c1]]

            alk_q_atoms = [ml_alk.get_atom(x) for x in ml_alk.attrib['Q Order']]
            alk_q1a,alk_q2a,alk_q3a,alk_q4a = alk_q_atoms
            alk_q1a_idx,alk_q2a_idx,alk_q3a_idx,alk_q4a_idx = [ml_alk.get_atom_index(x) for x in alk_q_atoms]



            #Sets Alkene Carbon C0 to be the origin
            set_origin(ml_diol, diol_c0_idx)

            #Rotates molecule such that alkene atoms are along the X-axis (C0 --> C1)
            rot_rz(ml_diol, diol_c0, diol_c1)

            alk_subgeom = ml_alk.coord_subset([alk_q1a, alk_q4a, alk_c0, alk_c1])
            diol_subgeom = ml_diol.coord_subset([diol_q1a, diol_q4a, diol_c0, diol_c1])

            _R, rmsd = Rotation.align_vectors(diol_subgeom, alk_subgeom)
            R = _R.as_matrix()
            ml_diol.transform(R)

            rot_rz(ml_diol, diol_c0, diol_c1)

            align_diol_mlib[diol_name] = ml_diol
        
        print(align_diol_mlib)


In [4]:
max_iter = 10000

DB_df = pd.read_csv("SAD_Database.csv")

diol_alk_dict = dict(DB_df[["Product ID", "Reactant ID"]].values)

alk_BFSVol_mlib = ml.MoleculeLibrary("6_7_Realign_3BFSVol.mlib")
diol_BFSVol_mlib = ml.MoleculeLibrary(f"5_2_Diol_Q1Q4_3BFSVol_Assign_{max_iter}iter.mlib")
align_BFSVol_mlib = ml.MoleculeLibrary(f"5_3_Diol_3BFSVol_Realign_{max_iter}iter.mlib", readonly=False, overwrite=True)

align_diol(diol_alk_dict=diol_alk_dict, alk_mlib=alk_BFSVol_mlib, diol_mlib=diol_BFSVol_mlib, align_diol_mlib=align_BFSVol_mlib)

100%|██████████| 888/888 [00:04<00:00, 185.78it/s]

MoleculeLibrary(backend=UkvCollectionBackend('5_3_Diol_3BFSVol_Realign_10000iter.mlib'), n_items=888)





In [5]:
alk_maxvol_mlib = ml.MoleculeLibrary("6_7_Realign_MaxVol.mlib")
diol_maxvol_mlib = ml.MoleculeLibrary(f"5_2_Diol_Q1Q4_MaxVol_Assign_{max_iter}iter.mlib")
align_maxvol_mlib = ml.MoleculeLibrary(f"5_3_Diol_MaxVol_Realign_{max_iter}iter.mlib", readonly=False, overwrite=True)

align_diol(diol_alk_dict=diol_alk_dict, alk_mlib=alk_maxvol_mlib, diol_mlib=diol_maxvol_mlib, align_diol_mlib=align_maxvol_mlib)

100%|██████████| 942/942 [00:05<00:00, 186.43it/s]

MoleculeLibrary(backend=UkvCollectionBackend('5_3_Diol_MaxVol_Realign_10000iter.mlib'), n_items=942)



