In [1]:
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors

In [2]:
drugbank_input = Chem.SDMolSupplier('../data/drugbank.sdf')
drugbank = [m for m in drugbank_input if m]

In [9]:
def lipinski_wt_limit(m):
    return Chem.Descriptors.MolWt(m) <= 500

def lipinski_logp_limit(m):
    return Descriptors.MolLogP(m) <= 5

def lipinski_hba_limit(m):
    return rdMolDescriptors.CalcNumLipinskiHBA(m) <= 10

def lipinski_hbd_limit(m):
      return rdMolDescriptors.CalcNumLipinskiHBD(m) <= 5
    
def lipinski_violations(m):
    return 4 - sum((lipinski_wt_limit(m),
                   lipinski_logp_limit(m), 
                   lipinski_hba_limit(m), 
                   lipinski_hbd_limit(m)))

In [4]:
sum(lipinski_wt_limit(m) for m in drugbank)

6253

In [5]:
sum(lipinski_logp_limit(m) for m in drugbank)

6577

In [10]:
violations = [lipinski_violations(m) for m in drugbank]


In [11]:
sum(violations)/len(violations)


0.45049226441631507

In [13]:
jchem_violated = [bool(int(m.GetProp('JCHEM_RULE_OF_FIVE')))
                 if m.HasProp('JCHEM_RULE_OF_FIVE')
                 else None for m in drugbank]

In [14]:
disagreeing = [x for x in zip(violations, jchem_violated) if (x[0] < 2) != x[1]]

In [15]:
disagreeing

[(2, True),
 (2, True),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (0, False),
 (0, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (0, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (0, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (0, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (0, False),
 (0, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (2, True),
 (1, False),
 (1, False),
 (0, False),
 (1, False),
 (1, False),
 (1, False),
 (4, None),
 (1, False),
 (0, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (1, False),
 (0