In [1]:
from rdkit import Chem
from rdkit.Chem import Descriptors
import pandas as pd

In [2]:
# 定义一个函数来计算分子的分子量、脂水分配系数、可旋转键数、拓扑分子极性表面积、环数、芳香环数、氢键供体数和氢键受体数
def calc_mol_properties(smiles):
    mol = Chem.MolFromSmiles(smiles)
    mol_weight = Descriptors.MolWt(mol)
    logp = Descriptors.MolLogP(mol)
    rotatable_bonds = Descriptors.NumRotatableBonds(mol)
    tpsa = Descriptors.TPSA(mol)
    Rings = Descriptors.RingCount(mol)
    AroRings = Descriptors.NumAromaticRings(mol)
    h_donors = Descriptors.NumHDonors(mol)
    h_acceptors = Descriptors.NumHAcceptors(mol)
    return mol_weight, logp, rotatable_bonds, tpsa,Rings, AroRings, h_donors, h_acceptors

In [4]:
smiles_agonist = pd.read_csv('stan_data.csv')[0:609]['stan_smiles'].tolist()

In [5]:
smiles_agonist

['O=S(=O)(c1cccc(-c2cn3cc(Cl)ccc3n2)c1)N1CCCCCC1',
 'Cc1cccn2cc(-c3cccc(S(=O)(=O)N4CCCCCC4)c3)nc12',
 'COc1cc(-c2nn3c(-c4ccc(Cl)cc4)nnc3s2)cc(OC)c1OC',
 'O=C(O)Cc1c(-c2ccc(Cl)cc2)nc2ccccn12',
 'Cc1ccc(Cl)cc1-n1ncc2c1ncn1c(-c3ccccc3O)nnc21',
 'Cc1[nH]c2ccc(F)cc2c(=O)c1CN1CCc2ccccc2C1',
 'Cc1c(-c2nc(-c3ccccc3F)no2)nnn1-c1ccc(F)c(Cl)c1',
 'Cc1ccccc1-n1ncc2c1ncn1c(-c3ccccc3O)nnc21',
 'Cc1ccc(-c2nnc3c4cnn(-c5ccccc5Cl)c4ncn23)cc1',
 'N#Cc1nc(-c2ccccc2F)oc1S(=O)(=O)c1ccc(Cl)cc1',
 'N#Cc1nc(-c2ccccc2)oc1S(=O)(=O)Cc1ccc(Cl)cc1Cl',
 'Cn1c(=O)c2c(nc3n(-c4cccc(Cl)c4)c(-c4cccc([N+](=O)[O-])c4)cn23)n(C)c1=O',
 'Cc1ccc(Sc2oc(-c3ccccc3F)nc2C#N)cc1',
 'Cc1ccc(-n2ncc3c2ncn2c(-c4ccccc4Cl)nnc32)c(C)c1',
 'Cc1c(-c2nc(-c3ccccc3F)no2)nnn1-c1ccc(Br)cc1',
 'Oc1cc2ccccc2cc1-c1nnc2c3cnn(-c4ccc(F)cc4)c3ncn12',
 'Cc1ccc(S(=O)(=O)c2oc(-c3ccccc3F)nc2C#N)cc1',
 'COc1ccc(-c2nnc3c4cnn(-c5ccc(Cl)cc5)c4ncn23)cc1OC',
 'CCN1CCN(C(=O)c2cc(-c3ccccc3)nc3c2c(C)nn3C)CC1',
 'Cc1ccc(-c2nc3c(C)cccn3c2CC(=O)O)cc1C',
 'CCOC(=O)c1c(N

In [10]:
smiles_antagonist = pd.read_csv('stan_data.csv')[609:]['stan_smiles'].tolist()

In [11]:
smiles_antagonist

['Br.O=c1[nH]oc(C2CCNCC2)c1-c1cccc([N+](=O)[O-])c1',
 'Br.O=c1[nH]oc(C2CCNCC2)c1-c1csc(-c2ccccc2)c1',
 'Br.CC(C)(C)c1ccc(-c2c(C3CCNCC3)o[nH]c2=O)cc1',
 'O=c1[nH]oc(C2CCNCC2)c1-c1cccc2ccccc12',
 'NC(=O)C1=CCC(N)C1',
 'NC1CC=C(C(=O)O)C1',
 'CCCCCNC(=O)C1=CCC(N)C1',
 'O=P1(O)CCC(CO)C1',
 'CCONC(=O)C1=CCC(N)C1',
 'N=c1ccc(-c2cccc(OCc3ccccc3)c2)nn1CCCC(=O)O',
 'Br.Cc1nn(O)cc1C1CCNCC1',
 'N=c1ccc(-c2cccc(N)c2)nn1CCCC(=O)O',
 'CCOC(=O)c1ncn2c1CN=C(c1ccc(F)cc1)c1cc(F)ccc1-2',
 'CCCCP(=O)(O)C1=CCC(N)C1',
 'CC1=NN=C(c2cc3c(C(F)(F)F)cccc3s2)c2cc3oc(=O)[nH]c3cc2C1',
 'c1ccc2c(NCCCCCCCNc3c4c(nc5ccccc35)CCCC4)c3c(nc2c1)CCCC3',
 'O=c1[nH]oc(C2CCNCC2)c1Cc1ccc2ccccc2c1-c1ccccc1',
 'Br.Br.Nc1cccc(-c2c(C3CCNCC3)o[nH]c2=O)c1',
 'Br.O=c1[nH]oc(C2CCNCC2)c1-c1cccnc1',
 'O=c1[nH]oc(C2CCNCC2)c1C(Br)c1ccc2ccccc2c1',
 'Cl.O=c1[nH]nc(C2CCNCC2)o1',
 'CCCC(O)CCC=CC=CC#CC#CC=CCO',
 'CCCCP(=O)(O)C1=CCC(NC(=O)CCCCCNc2ccc([N+](=O)[O-])c3nonc23)C1',
 'COc1ccc2cnc3c(C(=O)NCc4ccccc4OC)cnn3c2c1',
 'Br.On1ncc(C2CCNCC2)c1-c1

# agonist

In [12]:
data_agonist = {'smiles': [], 'weight': [], 'logp': [], 'rotatable_bonds': [], 'tpsa': [],'rings': [],'AroRings': [], 'h_donors': [], 'h_acceptors': []}

In [13]:
for smiles in smiles_agonist:
    mol_weight, logp, rotatable_bonds, tpsa,rings,AroRings,h_donors, h_acceptors = calc_mol_properties(smiles)
    data_agonist['smiles'].append(smiles)
    data_agonist['weight'].append(mol_weight)
    data_agonist['logp'].append(logp)
    data_agonist['rotatable_bonds'].append(rotatable_bonds)
    data_agonist['tpsa'].append(tpsa)
    data_agonist['rings'].append(rings)
    data_agonist['AroRings'].append(AroRings)
    data_agonist['h_donors'].append(h_donors)
    data_agonist['h_acceptors'].append(h_acceptors)

In [14]:
df_agonist = pd.DataFrame(data_agonist)
df_agonist

Unnamed: 0,smiles,weight,logp,rotatable_bonds,tpsa,rings,AroRings,h_donors,h_acceptors
0,O=S(=O)(c1cccc(-c2cn3cc(Cl)ccc3n2)c1)N1CCCCCC1,389.908,4.21940,3,54.68,4,3,0,4
1,Cc1cccn2cc(-c3cccc(S(=O)(=O)N4CCCCCC4)c3)nc12,369.490,3.87442,3,54.68,4,3,0,4
2,COc1cc(-c2nn3c(-c4ccc(Cl)cc4)nnc3s2)cc(OC)c1OC,402.863,4.19900,5,70.77,4,4,0,8
3,O=C(O)Cc1c(-c2ccc(Cl)cc2)nc2ccccn12,286.718,3.28180,3,54.60,3,3,1,3
4,Cc1ccc(Cl)cc1-n1ncc2c1ncn1c(-c3ccccc3O)nnc21,376.807,3.79762,2,81.13,5,5,1,7
...,...,...,...,...,...,...,...,...,...
604,NCCCS(=O)(=O)O,139.176,-0.77700,3,80.39,0,0,2,3
605,O=C(O)CCC(=O)OC1N=C(c2ccccc2Cl)c2cc(Br)ccc2NC1=O,465.687,3.62610,5,105.06,3,2,2,5
606,CC1N=C(c2ccccc2F)c2cc(Br)ccc2NC1=O,347.187,3.76620,1,41.46,3,2,1,2
607,CC(C)(C)OC(=O)NCc1n[nH]c(=O)o1.NCc1n[nH]c(=O)o1,330.301,-0.79080,3,182.13,2,2,4,9


In [15]:
df_agonist.to_csv('agonist_MolecularProperties.csv')

# non_agonist

In [16]:
data_antagonist = {'smiles': [], 'weight': [], 'logp': [], 'rotatable_bonds': [], 'tpsa': [],'rings': [],'AroRings': [], 'h_donors': [], 'h_acceptors': []}

In [18]:
for smiles in smiles_antagonist:
    mol_weight, logp, rotatable_bonds, tpsa,rings,AroRings,h_donors, h_acceptors = calc_mol_properties(smiles)
    data_antagonist['smiles'].append(smiles)
    data_antagonist['weight'].append(mol_weight)
    data_antagonist['logp'].append(logp)
    data_antagonist['rotatable_bonds'].append(rotatable_bonds)
    data_antagonist['tpsa'].append(tpsa)
    data_antagonist['rings'].append(rings)
    data_antagonist['AroRings'].append(AroRings)
    data_antagonist['h_donors'].append(h_donors)
    data_antagonist['h_acceptors'].append(h_acceptors)

In [20]:
df_antagonist = pd.DataFrame(data_antagonist)
df_antagonist

Unnamed: 0,smiles,weight,logp,rotatable_bonds,tpsa,rings,AroRings,h_donors,h_acceptors
0,Br.O=c1[nH]oc(C2CCNCC2)c1-c1cccc([N+](=O)[O-])c1,370.203,2.58800,3,101.17,3,2,2,5
1,Br.O=c1[nH]oc(C2CCNCC2)c1-c1csc(-c2ccccc2)c1,407.333,4.40830,3,58.03,4,3,2,4
2,Br.CC(C)(C)c1ccc(-c2c(C3CCNCC3)o[nH]c2=O)cc1,381.314,3.97730,2,58.03,3,2,2,3
3,O=c1[nH]oc(C2CCNCC2)c1-c1cccc2ccccc12,294.354,3.25510,2,58.03,4,3,2,3
4,NC(=O)C1=CCC(N)C1,126.159,-0.48080,1,69.11,1,0,2,2
...,...,...,...,...,...,...,...,...,...
637,NC1=NC(O)C2C3OC4(O)OC(C(O)C2(N1)C4O)C3(O)CO,319.270,-5.51980,1,190.25,5,0,8,11
638,C1CCc2nnnn2CC1,138.174,0.39950,0,43.60,2,1,0,4
639,O=C(NN=Cc1ccccc1O)c1ccccc1O,256.261,1.86170,3,81.92,2,2,3,4
640,CC12CC(=O)C3C(CCC4CC(O)CCC43C)C1CC(=N)N2,304.434,2.49817,0,73.18,4,0,3,3


In [21]:
df_antagonist.to_csv('antagonist_MolecularProperties.csv')