In [1]:
import pickle
from rdkit import Chem
from rdkit.Chem import AllChem as ac
from rdkit.Chem.PropertyMol import PropertyMol
import molli as ml
from molli.external import openbabel as mob
import pandas as pd
from tqdm import tqdm

DB_df = pd.read_csv('SAD_Database.csv')

alk_type_map = {DB_df['Reactant ID'][i] : DB_df['Olefin Type'][i] for i in DB_df.index}

with open('1_Database_Alkene_Identify.pkl', 'rb') as f:
    DB_mols = pickle.load(f)

DB_mols_w_h = list()
ex_props = ['_Name', "_Alkene_Type", "_Alkene", "_Canonical_SMILES"]

mlib = ml.MoleculeLibrary('2_DB_mols.mlib', overwrite=True, readonly=False)

In [None]:
with mlib.writing():
    for rdmol in tqdm(DB_mols):
        try:
            #Updates some key properties missing
            name = rdmol.GetProp("_Name")

            can_smiles = Chem.MolToSmiles(rdmol, canonical=True)
            rdmol.SetProp("_Canonical_SMILES", can_smiles)
            alk_type = alk_type_map[name]
            rdmol.SetProp("_Alkene_Type", alk_type)

            #Adds Hydrogens and Optimizes Molecule
            rdmol_w_h = PropertyMol(Chem.AddHs(rdmol))
            ac.EmbedMolecule(rdmol_w_h)
            ac.MMFFOptimizeMolecule(rdmol_w_h)

            #This creates the Molli Molecule Object and Appends Properties to it
            mlmol = ml.loads(
                Chem.MolToMolBlock(rdmol_w_h), 
                fmt='mol',
                parser='openbabel',
                otype='molecule',
                name = name)
            mlmol.attrib['_Canonical_SMILES'] = can_smiles
            mlmol.attrib["_Alkene_Type"] = alk_type
            mlmol.attrib["_Alkene"] = rdmol.GetProp("_Alkene")
            mlib[name] = mlmol

            for prop in ex_props:
                rdmol_w_h.SetProp(prop, rdmol.GetProp(prop))
            DB_mols_w_h.append(rdmol_w_h)

        #Exception sometimes occurs in RDKit
        #This is fixed by doing optimization with openbabel
        except Exception as e:
            print(e)
            print(name)
            print(rdmol.GetProp("_Canonical_SMILES"))
            mlmol = ml.loads(
                Chem.MolToMolBlock(rdmol_w_h), 
                fmt='mol',
                parser='openbabel',
                otype='molecule',
                name = name)
            mlmol = mob.obabel_optimize(
                mlmol,
                ff='MMFF94',
                inplace=False
            )
            mlmol.attrib['_Canonical_SMILES'] = can_smiles
            mlmol.attrib["_Alkene_Type"] = alk_type
            mlmol.attrib["_Alkene"] = rdmol.GetProp("_Alkene")
            mlib[name] = mlmol

            #Updates all properties
            for prop in ex_props:
                rdmol_w_h.SetProp(prop, rdmol.GetProp(prop))
            DB_mols_w_h.append(rdmol_w_h)
            print('Fixed!')
            continue

100%|██████████| 784/784 [01:01<00:00, 12.73it/s]


In [3]:
with open('2_DB_rdmol_w_h.pkl', 'wb') as f:
    pickle.dump(DB_mols_w_h, f)