In [None]:
!pip install biopython



In [None]:
import os

base_path = "/content/drive/MyDrive/Dataset"

os.makedirs(base_path + "/binding_pdb", exist_ok=True)
os.makedirs(base_path + "/nonbinding_pdb", exist_ok=True)

os.makedirs(base_path + "/3d_grids/binding", exist_ok=True)
os.makedirs(base_path + "/3d_grids/nonbinding", exist_ok=True)

print("Folder structure ready!")


Folder structure ready!


In [None]:
from Bio.PDB import PDBParser
import numpy as np
import os

In [None]:
GRID_SIZE = 32        # 32x32x32 grid
VOXEL_SIZE = 1.0     # 1 Ã… per voxel
HALF_GRID = GRID_SIZE / 2

In [None]:
def pdb_to_voxel(pdb_path):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("complex", pdb_path)

    ligand_coords = []

    # Step 1: find ligand atoms (exclude water)
    for atom in structure.get_atoms():
        residue = atom.get_parent()
        if residue.get_id()[0] != " " and residue.get_resname() != "HOH":
            ligand_coords.append(atom.get_coord())

    ligand_coords = np.array(ligand_coords)

    # Safety check
    if len(ligand_coords) == 0:
        raise ValueError("No ligand found in " + pdb_path)

    ligand_center = ligand_coords.mean(axis=0)

    # Step 2: create empty grid`
    grid = np.zeros((GRID_SIZE, GRID_SIZE, GRID_SIZE), dtype=np.float32)

    # Step 3: map atoms to grid
    for atom in structure.get_atoms():
        coord = atom.get_coord()
        shifted = (coord - ligand_center) / VOXEL_SIZE + HALF_GRID
        x, y, z = shifted.astype(int)

        if 0 <= x < GRID_SIZE and 0 <= y < GRID_SIZE and 0 <= z < GRID_SIZE:
            grid[x, y, z] = 1.0

    return grid

In [None]:
binding_pdb_dir = "/content/drive/MyDrive/Dataset/binding_pdb"
binding_out_dir = "/content/drive/MyDrive/Dataset/3d_grids/binding"

for file in os.listdir(binding_pdb_dir):
    if file.endswith(".pdb"):
        pdb_path = os.path.join(binding_pdb_dir, file)
        grid = pdb_to_voxel(pdb_path)

        out_name = file.replace(".pdb", ".npy")
        np.save(os.path.join(binding_out_dir, out_name), grid)

print("All binding voxel grids generated!")

All binding voxel grids generated!


In [None]:
nonbinding_pdb_dir = "/content/drive/MyDrive/Dataset/nonbinding_pdb"
nonbinding_out_dir = "/content/drive/MyDrive/Dataset/3d_grids/nonbinding"

for file in os.listdir(nonbinding_pdb_dir):
    if file.endswith(".pdb"):
        pdb_path = os.path.join(nonbinding_pdb_dir, file)
        grid = pdb_to_voxel(pdb_path)

        out_name = file.replace(".pdb", ".npy")
        np.save(os.path.join(nonbinding_out_dir, out_name), grid)

print("All non-binding voxel grids generated!")

All non-binding voxel grids generated!


In [None]:
len(os.listdir("/content/drive/MyDrive/Dataset/3d_grids/binding")),
len(os.listdir("/content/drive/MyDrive/Dataset/3d_grids/nonbinding"))

59