In [1]:
import alfabet

In [9]:
from alfabet.fragment import canonicalize_smiles

s_raw = 'C1CC([C@H]3[C@@](C1)(C)[C@H]2CC[C@H](C)[C@H]([C@@]2(CC3)C)CCCC)(C)C'
s_can = canonicalize_smiles(s_raw)
print(s_can)

CCCC[C@@H]1[C@@H](C)CC[C@H]2[C@@]1(C)CC[C@H]1C(C)(C)CCC[C@]21C


In [None]:
from alfabet.prediction import check_input
is_outlier, missing_atom, missing_bond = check_input(s_can)
print("outlier?", is_outlier, "missing_atom?", missing_atom, "missing_bond?", missing_bond)
# 这里不是报错，只是告诉你：这个分子是否在 alfabet 的“舒适区”


In [None]:
from alfabet.prediction import predict_bdes
df = predict_bdes(s_can, draw=False)
print(df.columns.tolist())
df.head(3) # 你应该能看到 start_atom / end_atom / bde_pred 等列


In [None]:
# 预测前先不加H
from rdkit import Chem
mol = Chem.MolFromSmiles(s_can)      # 注意：没有 AddHs()
N = mol.GetNumAtoms()
# 断言：alfabet 的索引应该都在 [0, N-1]
assert df['start_atom'].between(0, N-1).all()
assert df['end_atom'].between(0, N-1).all()
print("Index alignment ✓  (alfabet indices match RDKit non-H atoms)")


In [None]:
required = {'start_atom','end_atom','bde_pred'}
assert required.issubset(df.columns), f"missing columns: {required - set(df.columns)}"
print("Columns ✓  (alfabet outputs satisfy the contract)")


In [None]:
# 加H验证实验
from rdkit import Chem
from alfabet.fragment import canonicalize_smiles
from alfabet.prediction import predict_bdes

s = canonicalize_smiles("CH4")
mol_noH = Chem.MolFromSmiles(s)
mol_H = Chem.AddHs(mol_noH)

print("不加氢:", mol_noH.GetNumAtoms())  # 1
print("加氢:", mol_H.GetNumAtoms())      # 5

bde_df = predict_bdes(s)
print(bde_df[['start_atom','end_atom','bde_pred']].head())
# 你会看到 alfabet 报出的 end_atom 有时超出 0..N-1 或是 -1，
# 这些就是“氢键”的信号
