To run this notebook. 
<pre>pip install parallel_pandas</pre>


In [1]:
from parallel_pandas import ParallelPandas
import useful_rdkit_utils as uru
import pandas as pd
from rdkit.Chem import PandasTools
from rdkit.Chem.rdMolDescriptors import CalcNumUnspecifiedAtomStereoCenters

`Read some data

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/molecularinformatics/Computational-ADME/refs/heads/main/ADME_public_set_3521.csv")

Add a molecule column

In [3]:
PandasTools.AddMoleculeColumnToFrame(df,smilesCol="SMILES")

Calculate the nummber of unspecified stereocenters in each molecule

In [4]:
df["num_unspec_stereo"] = df.ROMol.apply(CalcNumUnspecifiedAtomStereoCenters)

Remove molecules with unspecified stereocenters

In [5]:
df = df.query("num_unspec_stereo == 0")

As a quick test only use the first 100 rows in the table

In [6]:
df = df.head(100).copy()

Initialize ParallelPandas

In [7]:
ParallelPandas.initialize(n_cpu=16, split_factor=4, disable_pr_bar=False)

Generate conformers 

In [8]:
df['mol_3d'] = df.ROMol.p_apply(uru.gen_conformers,executor='threads')

GEN_CONFORMERS DONE:   0%|          | 0/100 [00:00<?, ?it/s]

Refine the conformers

In [9]:
df['refined_mol'] = df.mol_3d.p_apply(uru.refine_conformers,executor='threads')

REFINE_CONFORMERS DONE:   0%|          | 0/100 [00:00<?, ?it/s]

Create a new column with conformer energies

In [10]:
df['conformer_energies'] = df.refined_mol.apply(uru.get_conformer_energies)

In [11]:
df

Unnamed: 0,Internal ID,Vendor ID,SMILES,CollectionName,LOG HLM_CLint (mL/min/kg),LOG MDR1-MDCK ER (B-A/A-B),LOG SOLUBILITY PH 6.8 (ug/mL),LOG PLASMA PROTEIN BINDING (HUMAN) (% unbound),LOG PLASMA PROTEIN BINDING (RAT) (% unbound),LOG RLM_CLint (mL/min/kg),ROMol,num_unspec_stereo,mol_3d,refined_mol,conformer_energies
0,Mol1,317714313,CNc1cc(Nc2cccn(-c3ccccn3)c2=O)nn2c(C(=O)N[C@@H...,emolecules,0.675687,1.493167,0.089905,0.991226,0.518514,1.392169,<rdkit.Chem.rdchem.Mol object at 0x1637069d0>,0,<rdkit.Chem.rdchem.Mol object at 0x16376c890>,<rdkit.Chem.rdchem.Mol object at 0x16376c890>,"[17.272818241492686, 5.190032063139654, 23.610..."
1,Mol2,324056965,CCOc1cc2nn(CCC(C)(C)O)cc2cc1NC(=O)c1cccc(C(F)F)n1,emolecules,0.675687,1.040780,0.550228,0.099681,0.268344,1.027920,<rdkit.Chem.rdchem.Mol object at 0x163706a40>,0,<rdkit.Chem.rdchem.Mol object at 0x16376c0b0>,<rdkit.Chem.rdchem.Mol object at 0x16376c0b0>,"[111.8178727753974, 122.94786020724965, 110.93..."
2,Mol3,304005766,CN(c1ncc(F)cn1)[C@H]1CCCNC1,emolecules,0.675687,-0.358806,,2.000000,2.000000,1.027920,<rdkit.Chem.rdchem.Mol object at 0x163706ab0>,0,<rdkit.Chem.rdchem.Mol object at 0x16376d540>,<rdkit.Chem.rdchem.Mol object at 0x16376d540>,"[-23.87837311911521, -21.92784489461851, -22.7..."
3,Mol4,194963090,CC(C)(Oc1ccc(-c2cnc(N)c(-c3ccc(Cl)cc3)c2)cc1)C...,emolecules,0.675687,1.026662,1.657056,-1.158015,-1.403403,1.027920,<rdkit.Chem.rdchem.Mol object at 0x163706b20>,0,<rdkit.Chem.rdchem.Mol object at 0x16376d930>,<rdkit.Chem.rdchem.Mol object at 0x16376d930>,"[69.81233993444012, 58.54347170983991, 69.1780..."
4,Mol5,324059015,CC(C)(O)CCn1cc2cc(NC(=O)c3cccc(C(F)(F)F)n3)c(C...,emolecules,0.996380,1.010597,,1.015611,1.092264,1.629093,<rdkit.Chem.rdchem.Mol object at 0x163706b90>,0,<rdkit.Chem.rdchem.Mol object at 0x16376d380>,<rdkit.Chem.rdchem.Mol object at 0x16376d380>,"[136.74067880373738, 138.68122297714956, 136.7..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,Mol119,320368312,O=C(Nc1cnccc1-c1ccc(Cl)cc1)c1ccnc(NC(=O)C2CC2)c1,emolecules,1.479503,0.466764,,0.369958,0.847634,1.880242,<rdkit.Chem.rdchem.Mol object at 0x163715e00>,0,<rdkit.Chem.rdchem.Mol object at 0x16376c660>,<rdkit.Chem.rdchem.Mol object at 0x16376c660>,"[18.06945309573915, 6.322747028114591, 6.32274..."
119,Mol120,89942274,CN1C(N)=N[C@](C)(c2cc(NC(=O)c3ccc(F)cn3)ccc2F)...,emolecules,0.675687,1.970452,,1.560469,1.491418,1.915521,<rdkit.Chem.rdchem.Mol object at 0x163715e70>,0,<rdkit.Chem.rdchem.Mol object at 0x16376c7b0>,<rdkit.Chem.rdchem.Mol object at 0x16376c7b0>,"[-135.20344723102627, -128.95226004570176, -13..."
121,Mol122,511449,CCCCNC(=O)NS(=O)(=O)c1ccc(C)cc1,emolecules,0.675687,0.250980,,0.439333,0.440122,1.027920,<rdkit.Chem.rdchem.Mol object at 0x163715f50>,0,<rdkit.Chem.rdchem.Mol object at 0x1637163b0>,<rdkit.Chem.rdchem.Mol object at 0x1637163b0>,"[-160.5903445925706, -159.87353633747696, -155..."
124,Mol125,139135,CN(C)C(=O)C1(Cc2ccccc2-c2ccccc2)CCN(C(=O)c2cnn...,emolecules,2.666524,1.399850,,1.041353,1.146841,3.097607,<rdkit.Chem.rdchem.Mol object at 0x1637160a0>,0,<rdkit.Chem.rdchem.Mol object at 0x16376c6d0>,<rdkit.Chem.rdchem.Mol object at 0x16376c6d0>,"[49.68904473569186, 50.21535866800538, 44.1310..."
