# 读写分子

In [3]:
from rdkit import Chem

# 读化合物分子·example
m = Chem.MolFromSmiles('Cc1ccccc1')   #读取Smiles
# m = Chem.MolFromMolFile('data/input.mol')    #读取.mol文件
# stringWithMolData=open('data/input.mol','r').read()
# m = Chem.MolFromMolBlock(stringWithMolData)

# 写化合物分子·example
# m = Chem.MolFromMolFile('data/chiral.mol')
Chem.MolToSmiles(m) # 结果显示： 'C[C@H](O)c1ccccc1'

'Cc1ccccc1'

# 化学描述符生成
大多数描述符可通过集中式rdkit.Chem.Descriptors模块从Python直接使用

In [4]:
from rdkit.Chem import Descriptors

# 读取化合物分子
m = Chem.MolFromSmiles('c1ccccc1C(=O)O')
Descriptors.TPSA(m)
Descriptors.MolLogP(m)

1.3848

# 分子指纹提取 & 分子相似性计算
化合物分子指纹主要分为三种，分别是:
1. Topological FP

In [9]:
from rdkit import DataStructs

# 提取分子指纹 
m1 = Chem.MolFromSmiles('CCOC')
m2 = Chem.MolFromSmiles('CCO')
fps1 = Chem.RDKFingerprint(m1)
fps2 = Chem.RDKFingerprint(m2)

# 计算分子相似性
from rdkit import DataStructs
DataStructs.FingerprintSimilarity(fps1,fps2)

# 分子指纹默认参数: 
# - minimum path size: 1 bond 
# - maximum path size: 7 bonds 
# - fingerprint size: 2048 bits 
# - number of bits set per hash: 2 
# - minimum fingerprint size: 64 bits 
# - target on-bit density 0.0

0.6

2. MACCS Keys

In [11]:
from rdkit.Chem import MACCSkeys

# 提取分子指纹MACCS
ms = [Chem.MolFromSmiles('CCOC'), Chem.MolFromSmiles('CCO'),
    Chem.MolFromSmiles('COC')]
fps = [MACCSkeys.GenMACCSKeys(x) for x in ms]

# 计算分子相似性
from rdkit import DataStructs
DataStructs.FingerprintSimilarity(fps[0],fps[1])

0.5

3. Atom Pairs and Topological Torsions

In [12]:
from rdkit.Chem.AtomPairs import Pairs

# 提取分子指纹MACCS
ms = [Chem.MolFromSmiles('CCOC'), Chem.MolFromSmiles('CCO'),
    Chem.MolFromSmiles('COC')]
pairFps = [Pairs.GetAtomPairFingerprint(x) for x in ms]
#pairFps = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in ms]

# 计算分子相似性
from rdkit import DataStructs
DataStructs.DiceSimilarity(pairFps[0],pairFps[1])

0.2222222222222222