In [1]:
from rdkit import Chem
from rdkit.Chem import Descriptors
import pandas as pd

In [2]:
# 定义一个函数来计算分子的分子量、脂水分配系数、可旋转键数、拓扑分子极性表面积、环数、芳香环数、氢键供体数和氢键受体数
def calc_mol_properties(smiles):
    mol = Chem.MolFromSmiles(smiles)
    mol_weight = Descriptors.MolWt(mol)
    logp = Descriptors.MolLogP(mol)
    rotatable_bonds = Descriptors.NumRotatableBonds(mol)
    tpsa = Descriptors.TPSA(mol)
    Rings = Descriptors.RingCount(mol)
    AroRings = Descriptors.NumAromaticRings(mol)
    h_donors = Descriptors.NumHDonors(mol)
    h_acceptors = Descriptors.NumHAcceptors(mol)
    return mol_weight, logp, rotatable_bonds, tpsa,Rings, AroRings, h_donors, h_acceptors

In [3]:
smiles_agonist = pd.read_csv('refined_gabaa.csv')[0:827]['smiles'].tolist()
smiles_non_agonist = pd.read_csv('refined_gabaa.csv')[827:]['smiles'].tolist()

# agonist

In [4]:
data_agonist = {'smiles': [], 'weight': [], 'logp': [], 'rotatable_bonds': [], 'tpsa': [],'rings': [],'AroRings': [], 'h_donors': [], 'h_acceptors': []}

In [5]:
for smiles in smiles_agonist:
    mol_weight, logp, rotatable_bonds, tpsa,rings,AroRings,h_donors, h_acceptors = calc_mol_properties(smiles)
    data_agonist['smiles'].append(smiles)
    data_agonist['weight'].append(mol_weight)
    data_agonist['logp'].append(logp)
    data_agonist['rotatable_bonds'].append(rotatable_bonds)
    data_agonist['tpsa'].append(tpsa)
    data_agonist['rings'].append(rings)
    data_agonist['AroRings'].append(AroRings)
    data_agonist['h_donors'].append(h_donors)
    data_agonist['h_acceptors'].append(h_acceptors)

In [6]:
df_agonist = pd.DataFrame(data_agonist)
df_agonist

Unnamed: 0,smiles,weight,logp,rotatable_bonds,tpsa,rings,AroRings,h_donors,h_acceptors
0,Cc1ccc(-c2nc(C#N)c(S(=O)(=O)N(C)CCO)o2)cc1,321.358,1.13450,5,107.43,2,2,1,6
1,CCOC(=O)c1cn2c(n1)CCC2,180.207,1.00600,2,44.12,2,1,0,4
2,N#Cc1ccc(CSc2nc(=O)cc(O)[nH]2)cc1,259.290,1.63948,3,89.77,2,2,2,5
3,Fc1ccc(-c2nnnn2-c2ccc(Br)cn2)cc1,320.125,2.62590,2,56.49,3,3,0,5
4,CCc1ccc(NC(=O)c2c(C(=O)Nc3ccc(C(=O)OC)cc3)sc3n...,490.541,3.91802,6,118.87,4,4,2,8
...,...,...,...,...,...,...,...,...,...
822,Cc1cc(-c2nnc3c4ccccc4c(OCc4cn(C)nn4)nn23)no1,362.353,1.94842,4,109.05,5,5,0,10
823,O=C(Nc1c(-c2cccs2)nc2cc(Br)ccn12)c1ccc(Cl)cc1,432.730,5.73100,3,46.40,4,4,1,4
824,O=C(Nc1c(C2CCCC2)nc2ccc(Br)cn12)c1ccccc1,384.277,5.00670,3,46.40,4,3,1,3
825,COc1ccc(Nc2cc3c(cn2)ncn3CC2CC2)c(OC)c1,324.384,3.60210,6,61.20,4,3,1,6


In [7]:
df_agonist.to_csv('agonist_MolecularProperties.csv')

# non_agonist

In [8]:
data_non_agonist = {'smiles': [], 'weight': [], 'logp': [], 'rotatable_bonds': [], 'tpsa': [],'rings': [],'AroRings': [], 'h_donors': [], 'h_acceptors': []}

In [9]:
for smiles in smiles_non_agonist:
    mol_weight, logp, rotatable_bonds, tpsa,rings,AroRings,h_donors, h_acceptors = calc_mol_properties(smiles)
    data_non_agonist['smiles'].append(smiles)
    data_non_agonist['weight'].append(mol_weight)
    data_non_agonist['logp'].append(logp)
    data_non_agonist['rotatable_bonds'].append(rotatable_bonds)
    data_non_agonist['tpsa'].append(tpsa)
    data_non_agonist['rings'].append(rings)
    data_non_agonist['AroRings'].append(AroRings)
    data_non_agonist['h_donors'].append(h_donors)
    data_non_agonist['h_acceptors'].append(h_acceptors)

In [10]:
df_non_agonist = pd.DataFrame(data_non_agonist)
df_non_agonist

Unnamed: 0,smiles,weight,logp,rotatable_bonds,tpsa,rings,AroRings,h_donors,h_acceptors
0,CSc1nn(-c2c(Cl)cc(C(F)(F)F)cc2Cl)c(N)c1-c1ccccc1C,432.298,6.47742,3,43.84,3,3,1,4
1,[O-][n+]1nc2c(-c3nc[nH]n3)cnn2c2cc(Cl)ccc21,287.670,0.95440,1,98.70,4,4,1,6
2,CC(=O)C1CCC2C3CCC4CC(O)CCC4(C)C3CCC12C,318.501,4.59520,1,37.30,4,0,1,2
3,O=C(NC1CCCCC1)C1Cc2c(sc3ccccc23)CN1,314.454,3.36450,2,41.13,4,2,2,3
4,CCCCNc1c(C(=O)OCC)c(C)nc2c1cnn2CC,304.394,3.14832,7,69.04,2,2,1,6
...,...,...,...,...,...,...,...,...,...
1077,On1ncc(C2CCNCC2)c1-c1cccc(-c2ccccc2)c1,319.408,3.92140,3,50.08,4,3,2,4
1078,CC1SC(=N)NC1CC(=O)O,174.225,0.48937,2,73.18,1,0,3,3
1079,COc1ccc2cnc3c(C(=O)NC(C)C)cnn3c2c1,284.319,2.02930,3,68.52,3,3,1,5
1080,COc1ccc2cnc3c(C(N)=O)cnn3c2c1,242.238,0.99000,2,82.51,3,3,1,5


In [11]:
df_non_agonist.to_csv('non_agonist_MolecularProperties.csv')