In [1]:
from rdkit import Chem
from rdkit.Chem import Descriptors
import pandas as pd

In [2]:
# 定义一个函数来计算分子的分子量、脂水分配系数、可旋转键数、拓扑分子极性表面积、环数、芳香环数、氢键供体数和氢键受体数
def calc_mol_properties(smiles):
    mol = Chem.MolFromSmiles(smiles)
    mol_weight = Descriptors.MolWt(mol)
    logp = Descriptors.MolLogP(mol)
    rotatable_bonds = Descriptors.NumRotatableBonds(mol)
    tpsa = Descriptors.TPSA(mol)
    Rings = Descriptors.RingCount(mol)
    AroRings = Descriptors.NumAromaticRings(mol)
    h_donors = Descriptors.NumHDonors(mol)
    h_acceptors = Descriptors.NumHAcceptors(mol)
    return mol_weight, logp, rotatable_bonds, tpsa,Rings, AroRings, h_donors, h_acceptors

In [20]:
smiles_agonist = pd.read_csv('GABAA.csv',encoding='gb18030')[0:194]['smiles'].tolist()

In [21]:
smiles_agonist

['C(CC(=O)O)CN',
 'CC1C(C(C(C(O1)OC2C(C(COC2OC3CCC4(C5CCC6C7C(CC(OC78CC6(C5(CCC4C3(C)C)C)CO8)C=C(C)C)(C)O)C)O)OC9C(C(C(C(O9)COC1C(C(C(C(O1)CO)O)O)O)O)O)OC1C(C(C(CO1)O)O)O)O)O)O',
 'C1CNCC2C1(O2)C(=O)O',
 'CC1=C(CCCl)SC=N1',
 'CC(=CCCC(C)(C1CCC2(C1C(CC3C2(CCC4C3(CCC(C4(C)C)OC5C(C(C(C(O5)CO)O)O)OC6C(C(C(C(O6)CO)O)O)O)C)C)O)C)OC7C(C(C(C(O7)COC8C(C(C(O8)CO)O)O)O)O)O)C',
 'COC1=C(OC)C(OC)=CC(/C=C/C(O)=O)=C1',
 'CCOC(=O)C1=CN=CN1C(C)C2=CC=CC=C2.Cl',
 'O=C(O)/C=C/CN',
 'Cl/C(C1=C(C=C(Cl)C=C1)Cl)=C\\N2C=NC=N2',
 'CC(C)C1=C(C(=CC=C1)C(C)C)O',
 'CCC#CC(C)C1(C(=O)NC(=O)N(C1=O)C)CC=C',
 'CCCC(C)C1(C(=O)NC(=S)NC1=O)CC',
 'C(OC(C(F)(F)F)C(F)(F)F)F',
 'COC1=CC(=CC(=C1OC)OC)/C=C/C(=O)O',
 'O=C(N)CCC/N=C(C1=CC=C(Cl)C=C1)\\C2=CC(F)=CC=C2O',
 'FC1=CC=CC=C1C2=NN=C3C=C(C(C)(C)C)C(OCC4=NC=NN4CC)=NN32',
 '[2H]C(CCN)([2H])C(O)=O',
 'CN1C=NC2=C1C=CN=C2OCC3=CC=CC=C3Cl',
 'C1=C(ONC1=O)CN',
 'CC(C)C12CCC(O1)(C(CCC3(C(O3)CCC4(C(C2)O4)C)C)OC(=O)C)C',
 'O=C(OCC)C1=C(C2=C(NC3=C2C=C(C=C3)OCC4=CC=CC=C4)C=N1)COC',
 'CC(

In [22]:
smiles_antagonist = pd.read_csv('GABAA.csv',encoding='gb18030')[194:]['smiles'].tolist()

In [24]:
smiles_antagonist

['CC1=CC(=NO1)C2=NN=CC3=C(C(=NN32)OCC4=NC=NN4C)C(C)(C)C',
 'C[N+]1(CCC2=CC3=C(C=C2C1C4C5=C(C6=C(C=C5)OCO6)C(=O)O4)OCO3)C.[Br-]',
 'COC1=CC=C(C=C1)C2=NN(C(=N)C=C2)CCCC(=O)O.Br',
 'CC1=C(SC(=C1)C2=NO[C@@](C2)(C3=CC(=C(C(=C3)Cl)Cl)Cl)C(F)(F)F)C(=O)NCC(=O)NCC(F)(F)F',
 'CC1=NN=C(C2=CC3=C(C=C2C1)NC(=O)O3)C4=CC5=C(C=CC=C5S4)F',
 'C1=COC(=C1)CNC2=CC(=C(C=C2C(=O)O)S(=O)(=O)N)Cl',
 'CC(=C)C1C2C3C4(C(C1C(=O)O2)(CC5C4(O5)C(=O)O3)O)C',
 'CN1CCC2=CC3=C(C=C2C1C4C5=C(C6=C(C=C5)OCO6)C(=O)O4)OCO3',
 'CC(=O)NC1CCC2=CC(=C(C(=C2C3=CC=C(C(=O)C=C13)SC)OC)OC)OC4C(C(C(C(O4)CO)O)O)O',
 'C1CCN2C(C1)C34CC2C=CC3=CC(=O)O4',
 'CCN1CC2(CCC(C34C2CC(C31)C56C4CC(=O)C(C5)C(=C)C6O)O)C',
 'C1=CC=C(C(=C1)C2=CC(=O)C3=C(O2)C=CC(=C3)O)O',
 'C1CCC2=NN=NN2CC1',
 'C1=CC=C(C(=C1)/C=N/NC(=O)C2=CC=CC=C2O)O',
 'C1=CC(=CC=C1C=O)O',
 'C1C2=C(N=CN2C3=C(C=C(C=C3)Br)C4=NC=NN41)C(F)F',
 'C1CCC2=NC3=CC=CC=C3C(=C2C1)NCCCCCCCNC4=C5CCCCC5=NC6=CC=CC=C64.Cl.Cl',
 'CCC12COP(=O)(OC1)OC2',
 'CC12CCC(CC1CCC3C2CCC4(C3CC=C4C5=CC=CC=C5)C)O',
 'C[N+]1(

# agonist

In [25]:
data_agonist = {'smiles': [], 'weight': [], 'logp': [], 'rotatable_bonds': [], 'tpsa': [],'rings': [],'AroRings': [], 'h_donors': [], 'h_acceptors': []}

In [26]:
for smiles in smiles_agonist:
    mol_weight, logp, rotatable_bonds, tpsa,rings,AroRings,h_donors, h_acceptors = calc_mol_properties(smiles)
    data_agonist['smiles'].append(smiles)
    data_agonist['weight'].append(mol_weight)
    data_agonist['logp'].append(logp)
    data_agonist['rotatable_bonds'].append(rotatable_bonds)
    data_agonist['tpsa'].append(tpsa)
    data_agonist['rings'].append(rings)
    data_agonist['AroRings'].append(AroRings)
    data_agonist['h_donors'].append(h_donors)
    data_agonist['h_acceptors'].append(h_acceptors)

In [27]:
df_agonist = pd.DataFrame(data_agonist)
df_agonist

Unnamed: 0,smiles,weight,logp,rotatable_bonds,tpsa,rings,AroRings,h_donors,h_acceptors
0,C(CC(=O)O)CN,103.121,-0.19010,3,63.32,0,0,2,2
1,CC1C(C(C(C(O1)OC2C(C(COC2OC3CCC4(C5CCC6C7C(CC(...,1207.364,-2.71820,13,393.98,11,0,14,26
2,C1CNCC2C1(O2)C(=O)O,143.142,-0.79810,1,61.86,2,0,2,3
3,CC1=C(CCCl)SC=N1,161.657,2.23282,2,12.89,1,1,0,2
4,CC(=CCCC(C)(C1CCC2(C1C(CC3C2(CCC4C3(CCC(C4(C)C...,1079.281,-1.56350,16,357.06,8,0,14,22
...,...,...,...,...,...,...,...,...,...
189,CC(C)C1=NOC(=N1)C2=C3CN=C(C4=C(N3C=N2)C=CC(=C4...,411.440,4.51710,3,69.10,5,4,0,6
190,C(C1=NNC(=O)O1)N.Cl,151.553,-0.75660,1,84.91,1,1,2,4
191,CC(C)C1=NOC(=N1)C2=C3CN=C(C4=C(N3C=N2)C=CC(=C4...,411.440,4.51710,3,69.10,5,4,0,6
192,CNC(=O)C1=C2CN=C(C3=C(N2C=N1)C=CC(=C3)C#C)C4=C...,341.374,1.95920,2,72.17,4,3,1,5


In [28]:
df_agonist.to_csv('agonist_MolecularProperties.csv')

# non_agonist

In [29]:
data_antagonist = {'smiles': [], 'weight': [], 'logp': [], 'rotatable_bonds': [], 'tpsa': [],'rings': [],'AroRings': [], 'h_donors': [], 'h_acceptors': []}

In [30]:
for smiles in smiles_antagonist:
    mol_weight, logp, rotatable_bonds, tpsa,rings,AroRings,h_donors, h_acceptors = calc_mol_properties(smiles)
    data_antagonist['smiles'].append(smiles)
    data_antagonist['weight'].append(mol_weight)
    data_antagonist['logp'].append(logp)
    data_antagonist['rotatable_bonds'].append(rotatable_bonds)
    data_antagonist['tpsa'].append(tpsa)
    data_antagonist['rings'].append(rings)
    data_antagonist['AroRings'].append(AroRings)
    data_antagonist['h_donors'].append(h_donors)
    data_antagonist['h_acceptors'].append(h_acceptors)

In [31]:
df_antagonist = pd.DataFrame(data_antagonist)
df_antagonist

Unnamed: 0,smiles,weight,logp,rotatable_bonds,tpsa,rings,AroRings,h_donors,h_acceptors
0,CC1=CC(=NO1)C2=NN=CC3=C(C(=NN32)OCC4=NC=NN4C)C...,368.401,2.09272,4,109.05,4,4,0,10
1,C[N+]1(CCC2=CC3=C(C=C2C1C4C5=C(C6=C(C=C5)OCO6)...,462.296,-0.26670,1,63.22,6,2,0,6
2,COC1=CC=C(C=C1)C2=NN(C(=N)C=C2)CCCC(=O)O.Br,368.231,2.48087,6,88.20,2,2,2,5
3,CC1=C(SC(=C1)C2=NO[C@@](C2)(C3=CC(=C(C(=C3)Cl)...,596.764,6.00722,6,79.79,3,2,2,5
4,CC1=NN=C(C2=CC3=C(C=C2C1)NC(=O)O3)C4=CC5=C(C=C...,365.389,4.24430,1,70.72,5,4,1,5
...,...,...,...,...,...,...,...,...,...
190,CSC1=NN(C(=C1C#N)N)C2=C(C=C(C=C2Cl)C(F)(F)F)Cl,367.183,4.37368,2,67.63,2,2,1,5
191,CC1=C(C(=C2C=NN(C2=N1)CCCCC#C)N)C(=O)OCC=C,312.373,2.46822,7,83.03,2,2,1,6
192,CCOC(=O)C1=NC=C2C(=C1)C3=C(N2)C=CC(=C3)OCC4=CC...,346.386,4.47180,5,64.21,4,4,1,4
193,CCC1=CC2=C(C=C1)N3C=C(N=C3C=C2OC)C(=O)C4=CC=CC=N4,331.375,3.68450,4,56.49,4,4,0,5


In [32]:
df_antagonist.to_csv('antagonist_MolecularProperties.csv')