In [1]:
from pathlib import Path
import numpy as np

PROJECT = Path(r"C:\Users\ashak\ad-quantum-repurpose")
RAW = PROJECT / "data" / "raw"
PROC = PROJECT / "data" / "processed"
PROT_RAW = RAW / "proteins"
PROT_PROC = PROC / "proteins"
PROT_PROC.mkdir(parents=True, exist_ok=True)

pdb_path = PROT_RAW / "4DVF.pdb"
print("PDB exists:", pdb_path.exists(), pdb_path)


PDB exists: True C:\Users\ashak\ad-quantum-repurpose\data\raw\proteins\4DVF.pdb


In [2]:
import re

het_coords = []
with open(pdb_path, "r") as f:
    for line in f:
        if line.startswith("HETATM"):
            resname = line[17:20].strip()
            if resname not in {"HOH","WAT"}:
                x = float(line[30:38]); y = float(line[38:46]); z = float(line[46:54])
                het_coords.append([x,y,z])

if not het_coords:
    raise RuntimeError("No non-water HETATM found in 4DVF; cannot auto-center grid.")

centroid = np.mean(np.array(het_coords), axis=0)
print("Docking box center (x,y,z):", centroid.round(3).tolist())
# Save for later steps
np.savetxt(PROT_PROC / "grid_center_xyz.txt", centroid.reshape(1,3), fmt="%.3f")


Docking box center (x,y,z): [3.061, -0.128, 17.532]


In [5]:
from pdbfixer import PDBFixer
from openmm.app import PDBFile

fixer = PDBFixer(filename=str(pdb_path))

# Remove hetero atoms (ligands, ions) and waters for docking
fixer.heterogens = False  # removes ligands/ions
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()

# Add hydrogens at physiological pH
fixer.addMissingHydrogens(pH=7.4)

receptor_pdb = PROT_PROC / "BACE1_prepared.pdb"
with open(receptor_pdb, "w") as out:
    PDBFile.writeFile(fixer.topology, fixer.positions, out, keepIds=True)

print("Wrote receptor:", receptor_pdb, "size:", receptor_pdb.stat().st_size, "bytes")


Wrote receptor: C:\Users\ashak\ad-quantum-repurpose\data\processed\proteins\BACE1_prepared.pdb size: 1251380 bytes


In [6]:
import shutil, subprocess

receptor_pdbqt = PROT_PROC / "BACE1_prepared.pdbqt"
assert shutil.which("obabel"), "OpenBabel 'obabel' not found in PATH."

cmd = [
    "obabel",
    str(receptor_pdb),
    "-O", str(receptor_pdbqt),
    "-xr"    # remove hydrogens from receptor rotatable bonds (common option); can omit if undesired
]
print("Running:", " ".join(cmd))
subprocess.run(cmd, check=True)
print("Wrote receptor PDBQT:", receptor_pdbqt.exists(), receptor_pdbqt)


Running: obabel C:\Users\ashak\ad-quantum-repurpose\data\processed\proteins\BACE1_prepared.pdb -O C:\Users\ashak\ad-quantum-repurpose\data\processed\proteins\BACE1_prepared.pdbqt -xr
Wrote receptor PDBQT: True C:\Users\ashak\ad-quantum-repurpose\data\processed\proteins\BACE1_prepared.pdbqt


In [7]:
box_size = (22.0, 22.0, 22.0)
np.savetxt(PROT_PROC / "grid_size_angs.txt", np.array(box_size).reshape(1,3), fmt="%.1f")
print("Saved grid size:", box_size)


Saved grid size: (22.0, 22.0, 22.0)
