In [1]:
import os
import pandas as pd
from rdkit import Chem
import subprocess
from rdkit.Chem import AllChem
from tqdm import tqdm


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\ichan\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "C:\Users\ichan\anaconda3\Lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
    app.start()
  File "C:\Users\ichan\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 701, in start
    self.io_loop.start()
  File "C:\Users\ichan\anaconda3\Lib\site-packa

AttributeError: _ARRAY_API not found

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [None]:
# 1) 환경 설정
vina_path      = r"C:\Program Files (x86)\The Scripps Research Institute\vina.exe"
receptor_pdbqt = r"C:\Users\ichan\Desktop\data\receptor.pdbqt"
csv_path       = r"C:\Users\ichan\Desktop\data\train.csv"

# 2) 출력 폴더 준비
output_folder = "./vina_output"
ligand_folder = "./ligands"
os.makedirs(output_folder, exist_ok=True)
os.makedirs(ligand_folder, exist_ok=True)

# 3) grid box (예시)
center = (22.57, 22.77, 11.81)
size   = (5.58, 13.38, 10.50)

df, results = pd.read_csv(csv_path), []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    smiles = row["Canonical_SMILES"]
    try:
        # 1) RDKit로 3D 구조 생성
        mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
        AllChem.EmbedMolecule(mol, AllChem.ETKDG())
        AllChem.UFFOptimizeMolecule(mol)

        # 2) 순수 PDB 파일로 저장
        pdb_file   = os.path.join(ligand_folder, f"ligand_{idx}.pdb")
        pdbqt_file = os.path.join(ligand_folder, f"ligand_{idx}.pdbqt")
        out_pdbqt  = os.path.join(output_folder,  f"ligand_{idx}_out.pdbqt")
        log_file   = os.path.join(output_folder,  f"ligand_{idx}.log")

        Chem.MolToPDBFile(mol, pdb_file)

        # 3) OBabel로 PDB→PDBQT 변환
        subprocess.run([
            "obabel", pdb_file,
            "-O", pdbqt_file,
            "--partialcharge", "gasteiger"
        ], check=True, capture_output=True)

        # 4) Vina 도킹
        cmd = [
            vina_path,
            "--receptor", receptor_pdbqt,
            "--ligand",   pdbqt_file,
            "--out",      out_pdbqt,
            "--log",      log_file,
            "--center_x", str(center[0]),
            "--center_y", str(center[1]),
            "--center_z", str(center[2]),
            "--size_x",   str(size[0]),
            "--size_y",   str(size[1]),
            "--size_z",   str(size[2]),
            "--exhaustiveness", "4"
        ]
        subprocess.run(cmd, check=True, capture_output=True)

        # 5) affinity 추출
        affinity = None
        with open(log_file) as f:
            for line in f:
                if line.strip().startswith("1"):
                    affinity = float(line.split()[1])
                    break

        results.append({
            "Index":    idx,
            "SMILES":   smiles,
            "Affinity": affinity
        })

    except subprocess.CalledProcessError as e:
        print(f"[오류 {idx}] OBabel/Vina 실패\n", e.stderr.decode())
        results.append({"Index": idx, "SMILES": smiles, "Affinity": None})
    except Exception as e:
        print(f"[예외 {idx}]: {e}")
        results.append({"Index": idx, "SMILES": smiles, "Affinity": None})

# 결과 저장
pd.DataFrame(results).to_csv("docking_results.csv", index=False)
print("✅ 도킹 파이프라인 완료 – docking_results.csv 생성됨")


  0%|          | 1/1681 [00:00<25:59,  1.08it/s]

[Vina/OBabel 에러 0]
 

Parse error on line 22 in file ".\ligands\ligand_0.pdbqt": Unknown or inappropriate tag



  0%|          | 3/1681 [00:26<5:07:33, 11.00s/it]