### Function to Get some properties:

In [1]:
from rdkit import Chem
import numpy as np
import pandas as pd
from rdkit.Chem import PandasTools, QED, Descriptors, rdMolDescriptors
def cal_mol_props(smi, verbose=False):
    """
    Obtain drug-like properties of inputted compound's smiles
    return mw,logp,hbd,hba,psa,rob,qed,chiral_center
    """
    try:
        m=Chem.MolFromSmiles(smi)
        if not m:
            return None,None,None,None,None,None,None,None
        mw = np.round(Descriptors.MolWt(m),1)
        logp = np.round(Descriptors.MolLogP(m),2)
        hbd = rdMolDescriptors.CalcNumLipinskiHBD(m)
        hba = rdMolDescriptors.CalcNumLipinskiHBA(m)
        psa = np.round(Descriptors.TPSA(m),1)
        rob= rdMolDescriptors.CalcNumRotatableBonds(m)
        qed= np.round(QED.qed(m),2)
        chiral_center=len(Chem.FindMolChiralCenters(m,includeUnassigned=True))
        if verbose:
            print ('Mw ',mw)
            print ('Logp ',logp)
            print ('HBD ', hbd)
            print ('HBA ', hba)
            print ('TPSA ', psa)
            print ('RotB ', rob)
            print ('QED ', qed)
            print ('chiral_center ', chiral_center)
        return mw,logp,hbd,hba,psa,rob,qed,chiral_center
    
    except Exception as e:
        print (e)
        return None,None,None,None,None,None,None,None

### Example:

### Step 1: Get Decoy's RO5 properties:

In [3]:
inputfn = "decoy.csv"
print("The input data is "+inputfn)
data = pd.read_csv(inputfn)
data.head()

The input data is decoy.csv


Unnamed: 0,nwname,fname,smile,CHA score,CCA score
0,501ak,P156229042.sdf,C([C@H]1[C@@H]([C@H]([C@H]([C@H](O1)OC[C@H]2[C...,0.364,0.24
1,502ak,P156228673.sdf,Cc1cc(=O)oc2c1ccc(c2)O[C@H]3[C@@H]([C@@H]([C@@...,0.364,0.24
2,503ak,P156228672.sdf,Cc1cc(=O)oc2c1ccc(c2)O[C@H]3[C@@H]([C@@H]([C@@...,0.061,0.23
3,504ak,P156228670.sdf,Cc1cc(=O)oc2c1ccc(c2)O[C@H]3[C@@H]([C@@H]([C@@...,0.303,0.24
4,505ak,P156228650.sdf,C([C@H]1[C@H]([C@H]([C@H]([C@H](O1)O[C@H]2[C@@...,0.364,0.24


In [4]:
smidata = list(data["smile"])
ls = []
for smi in smidata:
    mw,logp,hbd,hba,psa,rob,qed,chiral_center=cal_mol_props(smi)
    line=[mw,logp,hbd,hba,psa,rob,qed,chiral_center]
    ls.append(line)
ndf=pd.DataFrame(data=ls,columns=["mws","logps","hbds","hbas","psas","robs","qeds","chiral_centes"])
ndf.head()

Unnamed: 0,mws,logps,hbds,hbas,psas,robs,qeds,chiral_centes
0,504.4,-7.57,11,16,268.7,8,0.15,14
1,582.6,-2.61,7,15,226.5,8,0.16,10
2,582.6,-2.61,7,15,226.5,8,0.16,10
3,582.6,-2.61,7,15,226.5,8,0.16,10
4,504.4,-7.57,11,16,268.7,8,0.15,14


In [36]:
mdf = pd.concat([data,ndf],axis=1) #add the ro5 properties
data = mdf
mws=data["logps"]
logps=data["logps"]
hbds=data["hbds"]
hbas=data["hbas"]
robs=data["robs"]
scores=[]
for i in range(len(mws)):
    score = ""
    mw,logp,hbd,hba,rob,qed = mws[i],logps[i],hbds[i],hbas[i],robs[i],qeds[i]
    if mw >= 500:
        score += "+"
    else:
        score += "-"
    if logp >= 5:
        score += "+"
    else:
        score += "-"
    if hbd >= 5:
        score += "+"
    else:
        score += "-"
    if hba >= 10:
        score += "+"
    else:
        score += "-"
    if rob >= 10:
        score += "+"
    else:
        score += "-"
    if qed <= 0.5:
        score += "+"
    else:
        score += "-"
    scores.append(score)
scorenums = []
for i in scores:
    i = i.replace("-","")
    score = len(i)
    scorenums.append(score)

mdf["RO5scores"]=scores
mdf["RO5scoresnum"]=scorenums

In [38]:
mdf.head()

Unnamed: 0,nwname,fname,smile,CHA score,CCA score,mws,logps,hbds,hbas,psas,robs,qeds,chiral_centes,RO5scores,RO5scoresnum
0,501ak,P156229042.sdf,C([C@H]1[C@@H]([C@H]([C@H]([C@H](O1)OC[C@H]2[C...,0.364,0.24,504.4,-7.57,11,16,268.7,8,0.15,14,--++-+,3
1,502ak,P156228673.sdf,Cc1cc(=O)oc2c1ccc(c2)O[C@H]3[C@@H]([C@@H]([C@@...,0.364,0.24,582.6,-2.61,7,15,226.5,8,0.16,10,--++-+,3
2,503ak,P156228672.sdf,Cc1cc(=O)oc2c1ccc(c2)O[C@H]3[C@@H]([C@@H]([C@@...,0.061,0.23,582.6,-2.61,7,15,226.5,8,0.16,10,--++-+,3
3,504ak,P156228670.sdf,Cc1cc(=O)oc2c1ccc(c2)O[C@H]3[C@@H]([C@@H]([C@@...,0.303,0.24,582.6,-2.61,7,15,226.5,8,0.16,10,--++-+,3
4,505ak,P156228650.sdf,C([C@H]1[C@H]([C@H]([C@H]([C@H](O1)O[C@H]2[C@@...,0.364,0.24,504.4,-7.57,11,16,268.7,8,0.15,14,--++-+,3


### Step 2: Export the CSV:

In [39]:
mdf.to_csv("decoy_ro5.csv",index=False) #export the produce data