In [5]:
import molli as ml
from pprint import pprint
import numpy as np
from tqdm import tqdm
import pandas as pd

def create_n_vec(ml_mol:ml.Molecule, q1: ml.Atom, c0: ml.Atom, c1:ml.Atom):
    q1c0_v = ml_mol.vector(q1,c0)
    c0c1_v = ml_mol.vector(c0,c1)

    n_vec = np.cross(q1c0_v,c0c1_v)
    z_vec = np.array([0,0,1])
    dot_test = np.dot(z_vec, n_vec)

    assert (test_sign := np.sign(dot_test)) == 1, f'Alkene vector sign not correct!: {test_sign}, n_vec = {n_vec}, dot_test={dot_test}'

    return n_vec

def find_oh(ml_mol: ml.Molecule, c0: ml.Atom, c1: ml.Atom, q1: ml.Atom, q4: ml.Atom):
    connected_atoms = [c1, q1, q4]
    c0_O = None
    for atom in ml_mol.connected_atoms(c0):
        if atom not in connected_atoms:
            assert atom.element == ml.Element.O, f'Found non oxygen atom: {atom}'
            c0_O = atom
            break
    
    if c0_O:
        return c0_O
    else:
        raise ValueError("Oxygen not found!")

def calc_align(diol_alk_dict: dict, alk_mlib:ml.MoleculeLibrary, diol_mlib: ml.MoleculeLibrary, res_mlib: ml.MoleculeLibrary):

    #The alkene has been flipped such that Q1 is in the bottom left corner, so the values need to be flipped
    # addn_faces = {1: 'Top', -1: 'Bottom'}
    addn_faces = {1: 'Bottom', -1: 'Top'}

    with alk_mlib.reading(), diol_mlib.reading(), res_mlib.writing():
        for diol_name in tqdm(diol_mlib):

            ml_diol = diol_mlib[diol_name]
            diol_c0, diol_c1 = [ml_diol.get_atom(x) for x in ml_diol.attrib['C Order']]
            diol_c0_idx, diol_c1_idx = [ml_diol.get_atom_index(x) for x in [diol_c0, diol_c1]]

            diol_q_atoms = [ml_diol.get_atom(x) for x in ml_diol.attrib['Q1Q4 Order']]
            diol_q1a,diol_q4a = diol_q_atoms
            diol_q1a_idx, diol_q4a_idx = [ml_diol.get_atom_index(x) for x in diol_q_atoms]

            alk_name = diol_alk_dict[diol_name]

            ml_alk = alk_mlib[alk_name]
            alk_c0,alk_c1 = [ml_alk.get_atom(x) for x in ml_alk.attrib['C Order']]
            alk_c0_idx, alk_c1_idx = [ml_alk.get_atom_index(x) for x in [alk_c0,alk_c1]]
            alk_type = ml_alk.attrib['_Alkene_Type']

            alk_q_atoms = [ml_alk.get_atom(x) for x in ml_alk.attrib['Q Order']]
            alk_q1a,alk_q2a,alk_q3a,alk_q4a = alk_q_atoms
            alk_q1a_idx,alk_q2a_idx,alk_q3a_idx,alk_q4a_idx = [ml_alk.get_atom_index(x) for x in alk_q_atoms]

            #Finds the vector of the OH
            diol_c0_O = find_oh(ml_diol, diol_c0, diol_c1, diol_q1a, diol_q4a)
            co_vec = ml_diol.vector(diol_c0, diol_c0_O)

            #Finds the vector of the 
            react_n = create_n_vec(ml_alk, alk_q1a, alk_c0, alk_c1)

            val = np.sign(np.dot(react_n, co_vec))

            ml_diol.attrib['Addition'] = addn_faces[val]
            ml_diol.attrib['_Alkene_Type'] = alk_type

            res_mlib[diol_name] = ml_diol
        
        print(res_mlib)

In [6]:
max_iter = 10000

DB_df = pd.read_csv("SAD_Database.csv")

diol_alk_dict = dict(DB_df[["Product ID", "Reactant ID"]].values)

In [7]:
alk_BFSVol_mlib = ml.MoleculeLibrary("6_7_Realign_3BFSVol.mlib")
diol_BFSVol_mlib = ml.MoleculeLibrary(f"5_3_Diol_3BFSVol_Realign_{max_iter}iter.mlib")
res_BFSVol_mlib = ml.MoleculeLibrary(f"6_1_Diol_3BFSVol_Assign_{max_iter}iter.mlib", readonly=False, overwrite=True)

calc_align(
    diol_alk_dict=diol_alk_dict,
    alk_mlib=alk_BFSVol_mlib,
    diol_mlib=diol_BFSVol_mlib,
    res_mlib=res_BFSVol_mlib
)

100%|██████████| 888/888 [00:04<00:00, 219.36it/s]

MoleculeLibrary(backend=UkvCollectionBackend('6_1_Diol_3BFSVol_Assign_10000iter.mlib'), n_items=888)





In [8]:
alk_maxvol_mlib = ml.MoleculeLibrary("6_7_Realign_3BFSVol.mlib")
diol_maxvol_mlib = ml.MoleculeLibrary(f"5_3_Diol_MaxVol_Realign_{max_iter}iter.mlib")
res_maxvol_mlib = ml.MoleculeLibrary(f"6_1_Diol_MaxVol_Assign_{max_iter}iter.mlib", readonly=False, overwrite=True)

calc_align(
    diol_alk_dict=diol_alk_dict,
    alk_mlib=alk_maxvol_mlib,
    diol_mlib=diol_maxvol_mlib,
    res_mlib=res_maxvol_mlib
)

100%|██████████| 942/942 [00:04<00:00, 216.28it/s]

MoleculeLibrary(backend=UkvCollectionBackend('6_1_Diol_MaxVol_Assign_10000iter.mlib'), n_items=942)



