In [None]:
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
from rdkit import Chem
import os
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdMolAlign

In [39]:
base_dir = '/home/shaeo/cadd_training/20250906_unimol-docking2'
unimol_dir = '/home/shaeo/opt/Uni-Mol'

## インプット用のリガンドファイル作成

In [90]:
list_ligs = [
    '5HG8', '5UG8', '5UG9', '5UGC'
]
for lig in list_ligs:
    sup = Chem.SDMolSupplier(base_dir + f'/data/ligand_{lig}.sdf', removeHs=False)
    mol = sup[0]
    mol.RemoveAllConformers()   # 配座を削除

    # 3D埋め込み（ETKDGv3）
    #params = AllChem.ETKDGv3()
    #AllChem.EmbedMolecule(mol, params)
    #AllChem.UFFOptimizeMolecule(mol, maxIters=500) # リラクゼーション

    Chem.MolToMolFile(mol, base_dir + f'/data/input_{lig}.sdf')


## Gridファイルの作成
- リドッキング、クロスドッキング用に5HG8をベースとして作成

In [85]:
def calculated_docking_grid_sdf(ligand_path, json_path, add_size=10):
    mol = Chem.MolFromMolFile(str(ligand_path), sanitize=False)
    coords = mol.GetConformer(0).GetPositions().astype(np.float32)
    min_xyz = [min(coord[i] for coord in coords) for i in range(3)]
    max_xyz = [max(coord[i] for coord in coords) for i in range(3)]
    center = np.mean(coords, axis=0)
    size = [abs(max_xyz[i] - min_xyz[i]) for i in range(3)]
    center_x, center_y, center_z = center
    size_x, size_y, size_z = size
    size_x = size_x + add_size
    size_y = size_y + add_size
    size_z = size_z + add_size
    grid_info = {
        "center_x": float(center_x),
        "center_y": float(center_y),
        "center_z": float(center_z),
        "size_x": float(size_x),
        "size_y": float(size_y),
        "size_z": float(size_z)
    }
    with open(json_path, 'w') as f:
        json.dump(grid_info, f, indent=4)
    print('Center: ({:.6f}, {:.6f}, {:.6f})'.format(center_x, center_y, center_z))
    print('Size: ({:.6f}, {:.6f}, {:.6f})'.format(size_x, size_y, size_z))

In [86]:
ligand_path = base_dir + '/data/ligand_5HG8.sdf'
grid_path = base_dir + f'/data/grid_5HG8_as{add_size}.json'
calculated_docking_grid_sdf(ligand_path, grid_path, add_size=10)

Center: (-14.236317, 15.683612, -26.085293)
Size: (19.967600, 18.967199, 21.901600)


## メタ情報のファイル作成

In [87]:
result_dir = base_dir + '/result'
os.makedirs(result_dir, exist_ok=True)
df = pd.DataFrame(columns=['input_protein', 'input_ligand', 'input_docking_grid', 'output_ligand_name'])
meta_info_file = result_dir + '/test_one2one.csv'
protein_path = base_dir + '/data/receptor_5HG8.pdb'
for i, lig in tqdm(enumerate(list_ligs)):
    ligand_path = base_dir + f'/data/input_{lig}.sdf'
    predict_name = f'docked_{lig}'
    df.loc[i] = [protein_path, ligand_path, grid_path, predict_name]
print(df.info())
print(df.head(3))
df.to_csv(meta_info_file, index= False)

4it [00:00, 1243.03it/s]

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   input_protein       4 non-null      object
 1   input_ligand        4 non-null      object
 2   input_docking_grid  4 non-null      object
 3   output_ligand_name  4 non-null      object
dtypes: object(4)
memory usage: 160.0+ bytes
None
                                       input_protein  \
0  /home/stake/cadd_training/20250906_unimol-dock...   
1  /home/stake/cadd_training/20250906_unimol-dock...   
2  /home/stake/cadd_training/20250906_unimol-dock...   

                                        input_ligand  \
0  /home/stake/cadd_training/20250906_unimol-dock...   
1  /home/stake/cadd_training/20250906_unimol-dock...   
2  /home/stake/cadd_training/20250906_unimol-dock...   

                                  input_docking_grid output_ligand_name  
0  /home/stake/cadd_training/20250906_unimo




## ドッキングシミュレーション

In [91]:
%run $unimol_dir/unimol_docking_v2/interface/demo.py \
    --mode batch_one2one \
    --batch-size 8 \
    --steric-clash-fix \
    --conf-size 10 \
    --cluster \
    --input-batch-file $meta_info_file \
    --output-ligand-dir $result_dir \
    --model-dir $unimol_dir/unimol_docking_v2_240517.pt

Namespace(model_dir='/home/stake/opt/Uni-Mol/unimol_docking_v2_240517.pt', input_protein='protein.pdb', input_ligand='ligand.sdf', input_batch_file='/home/stake/cadd_training/20250906_unimol-docking2/result/test_one2one.csv', input_docking_grid='docking_grid.json', output_ligand_name='ligand_predict', output_ligand_dir='/home/stake/cadd_training/20250906_unimol-docking2/result', mode='batch_one2one', batch_size=8, nthreads=8, conf_size=10, cluster=True, use_current_ligand_conf=False, steric_clash_fix=True)
Start preprocessing data...
Number of ligands: 4


4it [00:07,  2.00s/it]


Total num: 4, Success: 4, Failed: 0
Done!
fused_multi_tensor is not installed corrected
fused_rounding is not installed corrected
fused_layer_norm is not installed corrected
fused_rms_norm is not installed corrected
fused_softmax is not installed corrected
2025-09-06 14:37:02 | INFO | unimol.inference | loading model(s) from /home/stake/opt/Uni-Mol/unimol_docking_v2_240517.pt
2025-09-06 14:37:03 | INFO | unimol.tasks.docking_pose_v2 | ligand dictionary: 30 types
2025-09-06 14:37:03 | INFO | unimol.tasks.docking_pose_v2 | pocket dictionary: 9 types
2025-09-06 14:37:04 | INFO | unimol.inference | Namespace(no_progress_bar=False, log_interval=50, log_format='simple', tensorboard_logdir='', wandb_project='', wandb_name='', seed=1, cpu=False, fp16=True, bf16=False, bf16_sr=False, allreduce_fp32_grad=False, fp16_no_flatten_grads=False, fp16_init_scale=4, fp16_scale_window=256, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir='/home/stake/opt/Uni-Mol/unimol

Consider using tensor.detach() first. (Triggered internally at /pytorch/torch/csrc/autograd/generated/python_variable_methods.cpp:835.)
  loss = float(closure())


2025-09-06 14:37:15 | INFO | unimol.inference | Done inference! 
Start converting model predictions into sdf files...


100%|██████████| 4/4 [00:00<00:00, 1861.86it/s]

Done!



  0%|          | 0/4 [00:00<?, ?it/s]

/home/stake/cadd_training/20250906_unimol-docking2/result/docked_5HG8.sdf-CCC(=O)Nc1cccc(Oc2nc(Nc3cnn(C)c3)nc3[nH]ccc23)c1-RMSD:35.386
/home/stake/cadd_training/20250906_unimol-docking2/result/docked_5UG8.sdf-CCC(=O)N[C@@H]1CN(c2nc(Nc3cnn(C)c3)c3ncn(C(C)C)c3n2)C[C@@H]1F-RMSD:35.6295
/home/stake/cadd_training/20250906_unimol-docking2/result/docked_5UGC.sdf-CCC(=O)N[C@H]1CN(c2nc(Nc3cn(C)nc3OC)c3ncn(C)c3n2)C[C@@H]1F-RMSD:36.3474


 25%|██▌       | 1/4 [00:05<00:16,  5.49s/it]

/home/stake/cadd_training/20250906_unimol-docking2/result/docked_5UG9.sdf-CCC(=O)N[C@H]1CN(c2nc(Nc3cn(C)nc3OC)c3ncn(C(C)C)c3n2)C[C@@H]1F-RMSD:35.5306


100%|██████████| 4/4 [00:06<00:00,  1.61s/it]

output ligands path:
 ['/home/stake/cadd_training/20250906_unimol-docking2/result/docked_5HG8.sdf', '/home/stake/cadd_training/20250906_unimol-docking2/result/docked_5UG8.sdf', '/home/stake/cadd_training/20250906_unimol-docking2/result/docked_5UG9.sdf', '/home/stake/cadd_training/20250906_unimol-docking2/result/docked_5UGC.sdf']
Average time:  7.528137445449829 sec.
Total time:  30.112549781799316 sec.
All processes done!





## RMSD算出

In [None]:
def substructure_rmsd(ref_mol, prb_mol, smarts):
    ref = Chem.RemoveHs(ref_mol, sanitize=False)
    prb = Chem.RemoveHs(prb_mol, sanitize=False)
    rms = rdMolAlign.GetBestRMS(prb, ref)
    return rms

In [144]:
for lig in list_ligs:
    ref_sup = Chem.SDMolSupplier(base_dir + f'/data/ligand_{lig}.sdf', removeHs=False, sanitize=False)
    prb_sup = Chem.SDMolSupplier(result_dir + f'/docked_{lig}.sdf', removeHs=False, sanitize=False)
    ref_mol = ref_sup[0]
    prb_mol = prb_sup[0]
    smarts = Chem.MolToSmarts(prb_mol)
    rms = substructure_rmsd(ref_mol, prb_mol, smarts)
    print(lig, f'RMSD {rms:.3f}Å')

5HG8 RMSD 0.545Å
5UG8 RMSD 1.160Å
5UG9 RMSD 0.928Å
5UGC RMSD 1.158Å
