# (1)从SMILES/SMARTS直接读取

In [6]:
from rdkit import Chem

smi = 'C[C@H](O)c1ccccc1'

mol = Chem.MolFromSmiles(smi)
sms = Chem.MolFromSmarts('Cc1ccccc1')

print(mol)
print(type(mol))
print(sms)
print(type(sms))

<rdkit.Chem.rdchem.Mol object at 0x000001B0E98540B0>
<class 'rdkit.Chem.rdchem.Mol'>
<rdkit.Chem.rdchem.Mol object at 0x000001B0E9854120>
<class 'rdkit.Chem.rdchem.Mol'>


# (2)批量读取smiles
- 从.smi批量读取

In [8]:
suppl = Chem.SmilesMolSupplier('data/batch_smiles.smi', delimiter='\t')
print(suppl)
mols = [Chem.MolToSmiles(mol) for mol in suppl]
print(mols)

<rdkit.Chem.rdmolfiles.SmilesMolSupplier object at 0x000001B0E983C860>
['C1=CC=CC=CC=C1', 'c1ccccc1', 'c1ccoc1']


- 从文本批量读取


In [12]:
with open('data/batch_smiles.smi', 'r') as f:
    mols_text = f.read()
suppl = Chem.SmilesMolSupplierFromText(mols_text, delimiter='\t')
mols = [Chem.MolToSmiles(mol) for mol in suppl]
print(mols)

['C1=CC=CC=CC=C1', 'c1ccccc1', 'c1ccoc1']


- 读取DataFrame中的SMILES:

`PandasTools.AddMoleculeColumnToFrame(frame, smilesCol, molCol, includeFingerPrints)`


In [15]:
import pandas as pd
from rdkit.Chem import PandasTools, Descriptors
df = pd.read_csv('data/smiles_df.csv')
df

Unnamed: 0,Name,SMILES
0,Lanreotide,c1(c2c(cccc2)[nH]c1)C[C@@H]1NC(=O)[C@@H](Cc2cc...
1,Lansoprazole,Cc1c(OCC(F)(F)F)ccnc1CS(=O)c2nc3ccccc3[nH]2
2,Laromustine,CS(=O)(=O)[N@@](N(S(=O)(=O)C)C(=O)NC)CCCl
3,Laropiprant,CS(=O)(=O)c1cc(cc2c1n(c3c2CC[C@@H]3CC(=O)O)Cc4...
4,Larotaxel,O=C1[C@@]23[C@@H]([C@@]4(OC(=O)C)[C@@H](C[C@@H...


In [18]:
PandasTools.AddMoleculeColumnToFrame(df, 'SMILES', 'mol', includeFingerprints=True)
df['MW'] = df['mol'].apply(Descriptors.MolWt)
print(df.head())

           Name                                             SMILES  \
0    Lanreotide  c1(c2c(cccc2)[nH]c1)C[C@@H]1NC(=O)[C@@H](Cc2cc...   
1  Lansoprazole        Cc1c(OCC(F)(F)F)ccnc1CS(=O)c2nc3ccccc3[nH]2   
2   Laromustine          CS(=O)(=O)[N@@](N(S(=O)(=O)C)C(=O)NC)CCCl   
3   Laropiprant  CS(=O)(=O)c1cc(cc2c1n(c3c2CC[C@@H]3CC(=O)O)Cc4...   
4     Larotaxel  O=C1[C@@]23[C@@H]([C@@]4(OC(=O)C)[C@@H](C[C@@H...   

                                                 mol        MW  
0  <rdkit.Chem.rdchem.Mol object at 0x000001B0EBD...  1096.347  
1  <rdkit.Chem.rdchem.Mol object at 0x000001B0EBD...   369.368  
2  <rdkit.Chem.rdchem.Mol object at 0x000001B0EBC...   307.781  
3  <rdkit.Chem.rdchem.Mol object at 0x000001B0EBC...   435.904  
4  <rdkit.Chem.rdchem.Mol object at 0x000001B0EBC...   831.912  


2. 读入sdf文件
- 从.sdf里批量读取

In [20]:
from rdkit import Chem
suppl = Chem.SDMolSupplier('data/batch.sdf')
mols = [Chem.MolToSmiles(mol) for mol in suppl if mol]
print(mols)

['C1=C\\C=C/C=C\\C=C/1', 'c1ccccc1', 'c1ccoc1']


3. 读入mol文件
- 从.mol里读取

In [31]:
mol = Chem.MolFromMolFile('data/output.mol')
print(Chem.MolToSmiles(mol))

C1CCC1


4. 读入多肽字符串

In [25]:
seq = 'GGGGG'
mol = Chem.MolFromSequence(seq)
smi = Chem.MolToSmiles(mol)
print("smi", smi)

smi NCC(=O)NCC(=O)NCC(=O)NCC(=O)NCC(=O)O


5. 读入inchi

In [26]:
# 

6. 其他格式

In [27]:
# 