In [7]:
from meeko import MoleculePreparation
from rdkit import Chem
from rdkit.Chem import AllChem
import pandas as pd
from meeko import MoleculePreparation

class LigandPrep:
    def __init__(self, smiles: str):
        """
        初始化LigandPrep类。
        
        :param smiles: 分子的SMILES字符串
        """
        self.smiles = smiles
        

    def smile2sdf(self, output_file: str):
        """
        准备分子并保存为sdf文件。
        
        :param output_file: 输出sdf文件的路径
        """
        # 添加3D坐标
        output_file_pdbqt = output_file.split(".")[0] + '.pdbqt'
        self.rdkit_mol = Chem.MolFromSmiles(smiles)
        self.rdkit_mol = Chem.AddHs(self.rdkit_mol)  # 添加氢原子
        if AllChem.EmbedMolecule(self.rdkit_mol) != 0:
            raise ValueError(f"无法为分子生成3D坐标: {smiles}")
        # AllChem.UFFOptimizeMolecule(self.rdkit_mol)  # 优化分子构象, UFF
        AllChem.MMFFOptimizeMolecule(self.rdkit_mol) # 优化分子构象
        Chem.MolToMolFile(self.rdkit_mol, output_file)
        AllChem.EmbedMolecule(self.rdkit_mol)
        sdf2pdbqt_prep = MoleculePreparation()
        sdf2pdbqt_prep.prepare(self.rdkit_mol)
        sdf2pdbqt_prep.write_pdbqt_file(output_file_pdbqt)

In [8]:

# 读取CSV文件
df = pd.read_csv("datasets/DL_pred/top.csv")

# 遍历每一行并处理SMILES字符串
for index, row in df.iterrows():
    smiles = row['SMILES']
    ligand_id = row['ID']
    output_file = f"ligand_{ligand_id}.sdf"
    
    try:
        ligand_prep = LigandPrep(smiles)
        ligand_prep.smile2sdf(output_file)
        print(f"分子已保存为 {output_file}")
    except ValueError as e:
        print(f"行 {index} 的 SMILES 无效: {smiles}")




分子已保存为 ligand_200493.sdf
分子已保存为 ligand_201017.sdf
分子已保存为 ligand_200937.sdf
分子已保存为 ligand_200259.sdf
