In [2]:
# 导入必要的库
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem, PandasTools
from psikit import Psikit

# 加载 SMILES 数据为 DataFrame
smiles_file = "smiles.csv"  # 替换为你的文件路径
smiles_data = pd.read_csv(smiles_file)
print(f"Loaded {len(smiles_data)} SMILES from {smiles_file}.")

# 添加 ROMol 列 (分子对象)
PandasTools.AddMoleculeColumnToFrame(smiles_data, smilesCol='smiles')
print("Added ROMol column.")

# 添加 n_atoms 列 (每个分子的原子数)
smiles_data['n_atoms'] = smiles_data['ROMol'].map(lambda x: x.GetNumAtoms() if x is not None else 0)
print("Added n_atoms column.")

# 查看数据结构
smiles_data.head()


Loaded 11 SMILES from smiles.csv.
Added ROMol column.
Added n_atoms column.


Unnamed: 0,smiles,ROMol,n_atoms
0,C=C,<rdkit.Chem.rdchem.Mol object at 0x145efccf74a0>,2
1,C=O,<rdkit.Chem.rdchem.Mol object at 0x145efca2c7b0>,2
2,C#N,<rdkit.Chem.rdchem.Mol object at 0x145efca2c6d0>,2
3,CCO,<rdkit.Chem.rdchem.Mol object at 0x145efca2c890>,3
4,CCC(=O)O,<rdkit.Chem.rdchem.Mol object at 0x145efca2c900>,5


In [3]:
from rdkit import Chem
import pandas as pd

# 读取 SMILES 数据
smiles_list = [
    'C=C', 'C=O', 'C#N', 'CCO', 'CCC(=O)O', 'CN=C=O',
    'C1=CCC=CC1', 'C1=CC=CC=C1', 'C1=CC=CC=C1',
    'C1CCC2CCCCC2C1', 'C1CCC(C2CCCCC2)CC1'
]

# 创建 DataFrame
df = pd.DataFrame(smiles_list, columns=['smiles'])

# 将 SMILES 转换为分子对象
df['ROMol'] = df['smiles'].apply(Chem.MolFromSmiles)

# 检查是否有无效的分子
invalid_mols = df[df['ROMol'].isnull()]
if not invalid_mols.empty:
    print("以下 SMILES 无法转换为分子对象：")
    print(invalid_mols['smiles'])


In [4]:
from rdkit.Chem import AllChem

def prepare_molecule(mol):
    mol_with_h = Chem.AddHs(mol)  # 添加显式氢原子
    if AllChem.EmbedMolecule(mol_with_h, AllChem.ETKDG()) != 0:
        print("3D 嵌入失败")
        return None
    if AllChem.UFFOptimizeMolecule(mol_with_h) != 0:
        print("UFF 优化失败")
        return None
    return mol_with_h

df['PreparedMol'] = df['ROMol'].apply(prepare_molecule)

# 检查是否有准备失败的分子
failed_preparations = df[df['PreparedMol'].isnull()]
if not failed_preparations.empty:
    print("以下分子准备失败：")
    print(failed_preparations['smiles'])


In [5]:
from psikit import Psikit

# 初始化 Psikit
pk = Psikit(debug=True, threads=4, memory=12)

# 修改 Psi4 的选项以加速优化
pk.psi4.set_options({
    'basis': 'sto-3g',  # 使用最小基组
    'scf_type': 'df',   # 密度拟合加速
    'reference': 'rhf', # Hartree-Fock 参考
    'geom_maxiter': 50, # 限制几何优化迭代次数
    'maxiter': 50       # 限制 SCF 迭代次数
})

# 定义优化函数
def optimize_molecule(mol):
    if mol is None:
        return None
    pk.mol = mol
    try:
        # 调用 Psikit 的优化方法
        energy = pk.optimize()
        return energy
    except Exception as e:
        print(f"优化失败：{e}")
        return None

# 应用优化到 DataFrame 中的分子
df['OptimizedEnergy'] = df['PreparedMol'].apply(optimize_molecule)

# 检查是否有优化失败的分子
failed_optimizations = df[df['OptimizedEnergy'].isnull()]
if not failed_optimizations.empty:
    print("以下分子优化失败：")
    print(failed_optimizations['smiles'])



  Memory set to  11.176 GiB by Python driver.
  Threads set to 4 by Python driver.
Optimizer: Optimization complete!
Optimizer: Optimization complete!
Optimizer: Optimization complete!
Optimizer: Optimization complete!
Optimizer: Optimization complete!
Optimizer: Optimization complete!
Optimizer: Optimization complete!
Optimizer: Optimization complete!
Optimizer: Optimization complete!
Optimizer: Optimization complete!
Optimizer: Optimization complete!


In [7]:
import pandas as pd

# 设置显示所有行
pd.set_option('display.max_rows', None)

# 显示完整的 DataFrame
display(df)


Unnamed: 0,smiles,ROMol,PreparedMol,OptimizedEnergy
0,C=C,<rdkit.Chem.rdchem.Mol object at 0x145efc9daf20>,<rdkit.Chem.rdchem.Mol object at 0x145efc9fb970>,-78.038769
1,C=O,<rdkit.Chem.rdchem.Mol object at 0x145efc9daf90>,<rdkit.Chem.rdchem.Mol object at 0x145efc9daa50>,-113.869536
2,C#N,<rdkit.Chem.rdchem.Mol object at 0x145efc9fb040>,<rdkit.Chem.rdchem.Mol object at 0x145efc9fbb30>,-92.876942
3,CCO,<rdkit.Chem.rdchem.Mol object at 0x145efc9fb0b0>,<rdkit.Chem.rdchem.Mol object at 0x145efc9fb9e0>,-154.089919
4,CCC(=O)O,<rdkit.Chem.rdchem.Mol object at 0x145efc9fb120>,<rdkit.Chem.rdchem.Mol object at 0x145efc9fb820>,-266.858985
5,CN=C=O,<rdkit.Chem.rdchem.Mol object at 0x145efc9fb190>,<rdkit.Chem.rdchem.Mol object at 0x145efc9fbd60>,-206.795593
6,C1=CCC=CC1,<rdkit.Chem.rdchem.Mol object at 0x145efc9fb200>,<rdkit.Chem.rdchem.Mol object at 0x145efc9fbe40>,-231.846762
7,C1=CC=CC=C1,<rdkit.Chem.rdchem.Mol object at 0x145efc9fb5f0>,<rdkit.Chem.rdchem.Mol object at 0x145efc9fbeb0>,-230.713524
8,C1=CC=CC=C1,<rdkit.Chem.rdchem.Mol object at 0x145efc9fb660>,<rdkit.Chem.rdchem.Mol object at 0x145efc9fbf20>,-230.713524
9,C1CCC2CCCCC2C1,<rdkit.Chem.rdchem.Mol object at 0x145efc9fb6d0>,<rdkit.Chem.rdchem.Mol object at 0x145efc9fbf90>,-389.200656
