In [1]:
import pandas as pd
import pickle
from rdkit import Chem
from rdkit.Chem import AllChem as ac
from rdkit.Chem.PropertyMol import PropertyMol
import molli as ml
from molli.external import openbabel as mob
from tqdm import tqdm

db_df = pd.read_csv("SAD_Database.csv")

with open("1_Database_Diol_Identify.pkl", "rb") as f:
    prod_mols = pickle.load(f)

for rd_mol in tqdm(prod_mols):
    current_name = rd_mol.GetProp("_Name")
    olefin_type_arr = db_df.query("`Product ID` == @current_name")[
        "Olefin Type"
    ].to_numpy()
    if olefin_type_arr.shape != (1,):
        assert all([True for i in olefin_type_arr if i == olefin_type_arr[0]]), "rip"

    olefin_type = olefin_type_arr[0]
    rd_mol.SetProp("_Alkene_Type", f"{olefin_type}")
    all_centers = Chem.FindMolChiralCenters(
        rd_mol, force=True, includeUnassigned=True, useLegacyImplementation=False
    )
    rd_mol.SetProp("_CIP", all_centers)

ex_props = ["_Name", "_Alkene_Type", "_CIP", "_Canonical_SMILES"]

with open(f"2_Diol_Type_and_CIP.pkl", "wb") as f:
    pickle.dump(prod_mols, f)

100%|██████████| 987/987 [00:11<00:00, 83.60it/s]


In [2]:
mlib = ml.MoleculeLibrary(
    f"2_Diol_Unordered.mlib",
    overwrite=True,
    readonly=False,
)
ex_mols_w_h = list()

with mlib.writing():
    for rdmol in tqdm(prod_mols):
        try:
            name = rdmol.GetProp("_Name")
            rdmol.SetProp("_Canonical_SMILES", Chem.MolToSmiles(rdmol, canonical=True))
            rdmol_w_h = PropertyMol(Chem.AddHs(rdmol))
            ac.EmbedMolecule(rdmol_w_h)
            ac.MMFFOptimizeMolecule(rdmol_w_h)
            mlmol = mob.loads_obmol(Chem.MolToMolBlock(rdmol_w_h), "mol")
            mlib[name] = mlmol
            ex_mols_w_h.append(rdmol_w_h)
            for prop in ex_props:
                rdmol_w_h.SetProp(prop, rdmol.GetProp(prop))
            ex_mols_w_h.append(rdmol_w_h)
        except Exception as e:
            print(e)
            print(name)
            print(rdmol.GetProp("_Canonical_SMILES"))
            continue

with open(f"2_Diol_Unordered_w_H.pkl", "wb") as f:
    pickle.dump(ex_mols_w_h, f)

100%|██████████| 987/987 [01:41<00:00,  9.73it/s]
