In [7]:
from rdkit.Chem import Descriptors, rdMolDescriptors

In [17]:
needed = [
    "GetNumAtoms",
    "ExactMolWt",
    "NumRings",
    "NumAromaticRings",
    "NumAliphaticRings",
    "NumLipinskiHBD",
    "NumLipinskiHBA",
    "MolLogP",
    "TPSA",
    "NumRotatableBonds",
    "GetAtomicNum",
    "fr_halogen",
    "NumBridgeheadAtoms",
    "FractionCSP3",
]

In [18]:
max([len(n) for n in needed])

18

In [19]:
rdkit_desc_names = [d[0] for d in Descriptors._descList]

for n in needed:
    if n in rdkit_desc_names:
        print(f"{n:19}: ok")
    else:
        print(f"{n:19}: no")

GetNumAtoms        : no
ExactMolWt         : ok
NumRings           : no
NumAromaticRings   : ok
NumAliphaticRings  : ok
NumLipinskiHBD     : no
NumLipinskiHBA     : no
MolLogP            : ok
TPSA               : ok
NumRotatableBonds  : ok
GetAtomicNum       : no
fr_halogen         : ok
NumBridgeheadAtoms : ok
FractionCSP3       : ok


In [20]:
def search_desc(string):
    for d in rdkit_desc_names:
        if string in d:
            print(d)

In [21]:
search_desc("Ring")

NumAliphaticRings
NumAromaticRings
NumSaturatedRings
RingCount


In [23]:
search_desc("ipinski")

In [24]:
search_desc("Num")

NumValenceElectrons
NumRadicalElectrons
NumAliphaticCarbocycles
NumAliphaticHeterocycles
NumAliphaticRings
NumAmideBonds
NumAromaticCarbocycles
NumAromaticHeterocycles
NumAromaticRings
NumAtomStereoCenters
NumBridgeheadAtoms
NumHAcceptors
NumHDonors
NumHeteroatoms
NumHeterocycles
NumRotatableBonds
NumSaturatedCarbocycles
NumSaturatedHeterocycles
NumSaturatedRings
NumSpiroAtoms
NumUnspecifiedAtomStereoCenters


In [25]:
desc = [
    "ExactMolWt",
    "RingCount",
    "NumAromaticRings",
    "NumAliphaticRings",
    "NumHDonors",
    "NumHAcceptors",
    "MolLogP",
    "TPSA",
    "NumRotatableBonds",
    "fr_halogen",
    "NumBridgeheadAtoms",
    "FractionCSP3",
]

In [27]:
len(Descriptors._descList)

217

In [28]:
tmp = dict(Descriptors._descList)
len(tmp)

217

In [29]:
tmp

{'MaxAbsEStateIndex': <function rdkit.Chem.EState.EState.MaxAbsEStateIndex(mol, force=1)>,
 'MaxEStateIndex': <function rdkit.Chem.EState.EState.MaxEStateIndex(mol, force=1)>,
 'MinAbsEStateIndex': <function rdkit.Chem.EState.EState.MinAbsEStateIndex(mol, force=1)>,
 'MinEStateIndex': <function rdkit.Chem.EState.EState.MinEStateIndex(mol, force=1)>,
 'qed': <function rdkit.Chem.QED.qed(mol, w=QEDproperties(MW=0.66, ALOGP=0.46, HBA=0.05, HBD=0.61, PSA=0.06, ROTB=0.65, AROM=0.48, ALERTS=0.95), qedProperties=None)>,
 'SPS': <function rdkit.Chem.SpacialScore.SPS(mol, normalize=True)>,
 'MolWt': <function rdkit.Chem.Descriptors.<lambda>(*x, **y)>,
 'HeavyAtomMolWt': <function rdkit.Chem.Descriptors.HeavyAtomMolWt(x)>,
 'ExactMolWt': <function rdkit.Chem.Descriptors.<lambda>(*x, **y)>,
 'NumValenceElectrons': <function rdkit.Chem.Descriptors.NumValenceElectrons(mol)>,
 'NumRadicalElectrons': <function rdkit.Chem.Descriptors.NumRadicalElectrons(mol)>,
 'MaxPartialCharge': <function rdkit.Chem

In [30]:
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent))

from src.utils import calculate_selected_descriptors

In [32]:
import pandas as pd

PATH = Path.cwd().parent / "data" / "interim"
df = pd.read_csv(PATH / "compounds_GreinerL_cleaned.csv")
df.head()

Unnamed: 0,cmpd_id,taut_smiles
0,5a,CCOC(=O)N1CCC2(c3ccc(OC)c(OC)c3)Cc3[nH]c4ccccc...
1,5b,CCOC(=O)N1CCC2(c3ccc(OC)c(OC)c3)Cc3[nH]c4ccc(F...
2,5c,CCOC(=O)N1CC[C@@]2(c3ccc(OC)c(OC)c3)Cc3[nH]c4c...
3,5d,CCOC(=O)N1CCC2(c3ccc(OC)c(OC)c3)Cc3[nH]c4ccc(B...
4,5e,CCOC(=O)N1CCC2(c3cccc(OC)c3)Cc3[nH]c4ccc(OC(F)...


In [35]:
from rdkit import Chem
from rdkit.Chem import PandasTools

PandasTools.AddMoleculeColumnToFrame(df, smilesCol="taut_smiles")

In [36]:
calculate_selected_descriptors(df["ROMol"][0])

{'ExactMolWt': 420.204907376,
 'RingCount': 5,
 'NumAromaticRings': 3,
 'NumAliphaticRings': 2,
 'NumHDonors': 1,
 'NumHAcceptors': 4,
 'MolLogP': 4.452500000000003,
 'TPSA': 63.79,
 'NumRotatableBonds': 4,
 'fr_halogen': 0,
 'NumBridgeheadAtoms': 0,
 'FractionCSP3': 0.4,
 'NumOxygen': 4,
 'NumNitrogen': 2,
 'LipinskiViolations': 0,
 'VeberViolations': 0}

In [39]:
df = pd.concat((df,df["ROMol"].apply(calculate_selected_descriptors).apply(pd.Series)), axis=1)

In [40]:
df

Unnamed: 0,cmpd_id,taut_smiles,ROMol,ExactMolWt,RingCount,NumAromaticRings,NumAliphaticRings,NumHDonors,NumHAcceptors,MolLogP,TPSA,NumRotatableBonds,fr_halogen,NumBridgeheadAtoms,FractionCSP3,NumOxygen,NumNitrogen,LipinskiViolations,VeberViolations
0,5a,CCOC(=O)N1CCC2(c3ccc(OC)c(OC)c3)Cc3[nH]c4ccccc...,<rdkit.Chem.rdchem.Mol object at 0x7081b81b0900>,420.204907,5.0,3.0,2.0,1.0,4.0,4.4525,63.79,4.0,0.0,0.0,0.4,4.0,2.0,0.0,0.0
1,5b,CCOC(=O)N1CCC2(c3ccc(OC)c(OC)c3)Cc3[nH]c4ccc(F...,<rdkit.Chem.rdchem.Mol object at 0x7081b81b1930>,438.195486,5.0,3.0,2.0,1.0,4.0,4.5916,63.79,4.0,1.0,0.0,0.4,4.0,2.0,0.0,0.0
2,5c,CCOC(=O)N1CC[C@@]2(c3ccc(OC)c(OC)c3)Cc3[nH]c4c...,<rdkit.Chem.rdchem.Mol object at 0x7081b81b1850>,454.165935,5.0,3.0,2.0,1.0,4.0,5.1059,63.79,4.0,1.0,0.0,0.4,4.0,2.0,1.0,0.0
3,5d,CCOC(=O)N1CCC2(c3ccc(OC)c(OC)c3)Cc3[nH]c4ccc(B...,<rdkit.Chem.rdchem.Mol object at 0x7081b3843f40>,498.115419,5.0,3.0,2.0,1.0,4.0,5.215,63.79,4.0,1.0,0.0,0.4,4.0,2.0,1.0,0.0
4,5e,CCOC(=O)N1CCC2(c3cccc(OC)c3)Cc3[nH]c4ccc(OC(F)...,<rdkit.Chem.rdchem.Mol object at 0x7081b38ac0b0>,474.176642,5.0,3.0,2.0,1.0,4.0,5.3425,63.79,4.0,3.0,0.0,0.4,4.0,2.0,1.0,0.0
5,5f,CCOC(=O)N1CCC2(c3ccc(OC)c(OC)c3)Cc3[nH]c4ccc(S...,<rdkit.Chem.rdchem.Mol object at 0x7081b38ac120>,499.177707,5.0,3.0,2.0,2.0,6.0,3.0999,123.95,5.0,0.0,0.0,0.4,6.0,3.0,0.0,0.0
6,5g,CCOC(=O)N1CCC2(c3ccc(OC)c(OC)c3)Cc3[nH]c4ccc(C...,<rdkit.Chem.rdchem.Mol object at 0x7081b38ac190>,464.194737,5.0,3.0,2.0,2.0,5.0,4.1507,101.09,5.0,0.0,0.0,0.384615,6.0,2.0,0.0,0.0
7,5h,CCOC(=O)N1CCC2(c3ccc(OC)c(OC)c3)Cc3[nH]c4ncccc...,<rdkit.Chem.rdchem.Mol object at 0x7081b38ac200>,421.200156,5.0,3.0,2.0,1.0,5.0,3.8475,76.68,4.0,0.0,0.0,0.416667,4.0,3.0,0.0,0.0
8,5i,CCOC(=O)N1CCC2(c3ccc(OC)c(OC)c3)Cc3ncccc3CC12,<rdkit.Chem.rdchem.Mol object at 0x7081b38ac270>,382.189257,4.0,2.0,2.0,0.0,5.0,3.3662,60.89,4.0,0.0,0.0,0.454545,4.0,2.0,0.0,0.0
9,5j,CCOC(=O)N1CCC2(c3ccc(OC)c(OC)c3)Cc3nccc(C)c3CC12,<rdkit.Chem.rdchem.Mol object at 0x7081b38ac2e0>,396.204907,4.0,2.0,2.0,0.0,5.0,3.67462,60.89,4.0,0.0,0.0,0.478261,4.0,2.0,0.0,0.0


In [None]:
# taken from https://github.com/rdkit/rdkit/issues/2279

import os
from rdkit.Chem import RDConfig

sys.path.append(RDConfig.RDContribDir)

from NP_Score import npscorer

In [45]:
fscore = npscorer.readNPModel()
def score_np(mol):
    return npscorer.scoreMol(mol, fscore)

reading NP model ...
model in


In [47]:
df["NP_likeness"] = df["ROMol"].apply(score_np)

In [48]:
from rdkit.Chem import QED

df["QED"] = df["ROMol"].apply(QED.default)