In [55]:
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem

def create_grid(size=20, resolution=1):
    num_cells = int(size * resolution)
    grid = np.zeros((num_cells, num_cells, num_cells, 23))  # 23 features per grid point
    return grid

# Function to apply 3D rotation to atomic coordinates
def rotate_molecule(mol_to_rot, rotation_matrix):
    
    conf = mol_to_rot.GetConformer()
    for atom_idx in range(mol_to_rot.GetNumAtoms()):
        pos = conf.GetAtomPosition(atom_idx)
        new_pos = np.dot(rotation_matrix, np.array([pos.x, pos.y, pos.z]))
        conf.SetAtomPosition(atom_idx, new_pos)
    return mol_to_rot

# Generate a random rotation matrix
def generate_random_rotation_matrix():
    # Generate a random 3D rotation using Euler angles
    rotation = R.from_euler('xyz', np.random.uniform(0, 360, size=3), degrees=True)
    return rotation.as_matrix()

# Function to encode atomic features (same as before)
atom_types = {'C': 0, 'N': 1, 'O': 2, 'S': 3, 'other': 4}

def encode_atom_features(atom):
    features = np.zeros(14)
   # One-hot encoding for atom types
    atom_symbol = atom.GetSymbol()
    # if atom_symbol == 'H':
    #     print(atom_symbol, " is atom symbol")
    if atom_symbol in atom_types:
        features[atom_types[atom_symbol]] = 1
    else:
        features[atom_types['other']] = 1
    
    hybridization = atom.GetHybridization()
    if hybridization == Chem.HybridizationType.SP:
        features[5] = 1
    elif hybridization == Chem.HybridizationType.SP2:
        features[6] = 1
    elif hybridization == Chem.HybridizationType.SP3:
        features[7] = 1

    num_heavy_atoms = sum(1 for neighbor in atom.GetNeighbors() if neighbor.GetAtomicNum() > 1)
    features[8] = num_heavy_atoms
   
    # Number of bonded hetero atoms (atoms other than carbon and hydrogen)
    num_hetero_atoms = sum(1 for neighbor in atom.GetNeighbors() if neighbor.GetAtomicNum() not in {1, 6})
    features[9] = num_hetero_atoms
    features[10] = 1 if atom.GetIsAromatic() else 0

    # formal charge, 0 is no charge, 1 is negative, and 2 is positive charge
    residue = atom.GetPDBResidueInfo().GetResidueName().strip() 
    atom_name = atom.GetPDBResidueInfo().GetName().strip()

    if atom.GetFormalCharge() == 1: # setting positive charge to 2
        atom.SetFormalCharge(2) 

    if residue == "ASP" and atom_name == "CG": 
        atom.SetFormalCharge(1) # setting to negative charge, CG has summed up charge of OD1 and OD2
    if residue == "GLU" and atom_name == "CD": 
        atom.SetFormalCharge(1) # setting to negative charge, CD has summed up charge of OE1 and OE2

    features[11] = 1 if atom.GetFormalCharge() != 0 else 0 # binary label, charge or no charge
   
    features[12] = atom.GetFormalCharge()
    # if atom.GetFormalCharge() != 0:
    #     print(atom.GetFormalCharge(), "is atom that has formal charge and", atom_name, "is atom name")
    
    features[13] = 1 if atom.IsInRing() else 0
    
    #print(features)
    return features

# Function to perform one-hot encoding for residue types
def encode_residue_type(residue):
    features = np.zeros(9)
    if residue in ['ASP', 'GLU']:
        features[0] = 1
    elif residue in ['LYS', 'ARG']:
        features[1] = 1
    elif residue == 'HIS':
        features[2] = 1
    elif residue == 'CYS':
        features[3] = 1
    elif residue in ['ASN', 'GLN', 'SER', 'THR']:
        features[4] = 1
    elif residue == 'GLY':
        features[5] = 1
    elif residue == 'PRO':
        features[6] = 1
    elif residue in ['PHE', 'TYR', 'TRP']:
        features[7] = 1
    elif residue in ['ALA', 'ILE', 'LEU', 'MET', 'VAL']:
        features[8] = 1
    return features

# Map atoms to the grid based on their 3D coordinates
def map_atoms_to_grid(mol, grid, grid_center, grid_size=20, resolution=1):
    conf = mol.GetConformer()

    # Compute bounds for min max normalization
    all_positions = np.array([[pos.x, pos.y, pos.z] for pos in [conf.GetAtomPosition(atom_idx) for atom_idx in range(mol.GetNumAtoms())]])
    min_coords = np.min(all_positions, axis=0)
    max_coords = np.max(all_positions, axis=0)
    scale = max_coords - min_coords
    print(scale, "is scale")

    # Apply min-max normalization to scale positions to [0, grid_size)
    def shift(pos, min_coords):
        return ((pos - min_coords))
    
    for atom in mol.GetAtoms(): 
        pos = conf.GetAtomPosition(atom.GetIdx())
        shifted_pos = shift(np.array([pos.x, pos.y, pos.z]), min_coords)

        # Map to grid coordinates
        grid_coord = np.rint(shifted_pos).astype(int)
        
        if np.all(grid_coord >= 0) and np.all(grid_coord < (grid_size * resolution)):
            atom_features = encode_atom_features(atom)

            residue = atom.GetPDBResidueInfo().GetResidueName()
            residue_features = encode_residue_type(residue)

            combined_features = np.concatenate((atom_features, residue_features))
            if np.any(grid[tuple(grid_coord)]):
                grid_coord = np.floor(shifted_pos).astype(int) # try flooring if rint doesn't work
                if np.any(grid[tuple(grid_coord)]):
                    grid_coord = np.ceil(shifted_pos).astype(int) # last ditch effort is to try ceiling if flooring fails
                    if np.any(grid[tuple(grid_coord)]):
                        print("Overwritten atoms")
                        raise Exception("Overwritten atoms!")
            grid[tuple(grid_coord)] = combined_features # print this part as well
        else:
            print("Atom didn't go in the grid")
            raise Exception("Atom out of bounds")

    return grid

def min_max_normalize(grid):
    min_val = np.min(grid)
    max_val = np.max(grid)
    
    if max_val - min_val == 0:
        return grid  # Avoid division by zero if all values are the same
    
    return (grid - min_val) / (max_val - min_val)

# Main function to generate multiple rotated grids
def generate_non_rotated_grid(grid_center, filtered_pdb_path, grid_size=30, resolution=1):
    mol = Chem.MolFromPDBFile(filtered_pdb_path, sanitize=True)
    
    if mol is None:
        return None
        
    # Create a new grid
    grid = create_grid(size=grid_size, resolution=resolution)
    
    # Map rotated atoms to the grid
    grid = map_atoms_to_grid(mol, grid, grid_center, grid_size, resolution)

    # Apply Min-Max normalization
    grid = min_max_normalize(grid)
    
    return grid
def saving_features(rotated_grids,output_path,protein_name_):
    os.makedirs(output_path, exist_ok=True)
    # Save each grid
    for idx, grid in enumerate(rotated_grids):
        np.save(f'{output_path}/{protein_name_}_grid_{idx}.npy', grid)
        print(f"Saved rotated grid {idx} successfully.")
    return

In [56]:
def print_features(features):
    atom_types_list = ['Carbon', 'Nitrogen', 'Oxygen', 'Sulfur', 'Other']
    residue_types_list = [
        'Aspartic Acid/Glutamic Acid',   # [0]
        'Lysine/Arginine',               # [1]
        'Histidine',                     # [2]
        'Cysteine',                      # [3]
        'Asparagine/Glutamine/Serine/Threonine',  # [4]
        'Glycine',                       # [5]
        'Proline',                       # [6]
        'Phenylalanine/Tyrosine/Tryptophan',  # [7]
        'Alanine/Isoleucine/Leucine/Methionine/Valine'  # [8]
    ]
    
    # === Atom Type ===
    atom_type_idx = np.argmax(features[:5])
    atom_type = atom_types_list[atom_type_idx]

    # === Hybridization ===
    hybridization = "Unknown"
    if features[5]:
        hybridization = "SP"
    elif features[6]:
        hybridization = "SP2"
    elif features[7]:
        hybridization = "SP3"

    num_heavy_atoms = features[8]
    num_hetero_atoms = features[9]
    is_aromatic = bool(features[10])
    has_charge = bool(features[11])
    formal_charge = features[12]
    in_ring = bool(features[13])

    # === Residue Type ===
    residue_type_idx = np.argmax(features[14:])
    residue_type = "Unknown"
    if features[14 + residue_type_idx] != 0:
        residue_type = residue_types_list[residue_type_idx]

    # === Print Information ===
    print(f"Atom: {atom_type}")
    print(f"Hybridization: {hybridization}")
    print(f"Number of heavy atoms bonded: {int(num_heavy_atoms)}")
    print(f"Number of hetero atoms bonded: {int(num_hetero_atoms)}")
    print(f"Aromatic: {'Yes' if is_aromatic else 'No'}")
    print(f"Has Charge: {'Yes' if has_charge else 'No'}")
    print(f"Formal Charge: {int(formal_charge)}")
    print(f"In Ring: {'Yes' if in_ring else 'No'}")
    print(f"Residue Type: {residue_type}\n")


In [57]:
grid_center = np.array([0, 0, 0])  # Grid center at origin

file = "../3DCNN/filtered-pdbs-distinct-5A/positive/3NY9-filtered.pdb"

non_rotated_grid = generate_non_rotated_grid(grid_center, file)

print(f"Grid shape: {non_rotated_grid.shape}")  # Should be (x, y, z, features)

# Compute sum of absolute feature values at each (x, y, z)
non_zero_mask = np.sum(np.abs(non_rotated_grid), axis=3) != 0  # Shape: (x, y, z)

# Find coordinates where at least one feature is non-zero
non_zero_coords = np.argwhere(non_zero_mask)

print(f"Total non-zero grid points: {non_zero_coords.shape[0]}\n")

# Print coordinates
for coord in non_zero_coords:
    x, y, z = coord
    features = non_rotated_grid[x, y, z, :]
    # print(f"(x={x}, y={y}, z={z})")
    # print_features(features)

[10.532  9.91  25.626] is scale
Grid shape: (30, 30, 30, 23)
Total non-zero grid points: 52



In [58]:
grid = np.load("../../../Data/SplitData/Cholesterol/cholesterol-grid-5A_exp1/Test/Positive/3NY9-filtered_grid_0.npy")

print(f"Grid shape: {grid.shape}")  # Should be (x, y, z, features)

# Compute sum of absolute feature values at each (x, y, z)
non_zero_mask = np.sum(np.abs(grid), axis=3) != 0  # Shape: (x, y, z)

# Find coordinates where at least one feature is non-zero
non_zero_coords = np.argwhere(non_zero_mask)

print(f"Total non-zero grid points: {non_zero_coords.shape[0]}\n")

# Print coordinates
for coord in non_zero_coords:
    x, y, z = coord
    features = grid[x, y, z, :]
    # print(f"(x={x}, y={y}, z={z})")
    # print_features(features)


Grid shape: (30, 30, 30, 23)
Total non-zero grid points: 52



In [None]:
# Load PDB file
pdb_path = "path_to_your_file.pdb"
mol = Chem.MolFromPDBFile(pdb_path, removeHs=False)

if mol is None:
    raise ValueError("Failed to load PDB file. Check the path or file content.")

conf = mol.GetConformer()

# Collect atoms with their coordinates
atom_data = []
for atom in mol.GetAtoms():
    idx = atom.GetIdx()
    pos = conf.GetAtomPosition(idx)
    atom_data.append((
        atom.GetSymbol(),
        np.array([pos.x, pos.y, pos.z])
    ))

# Sort atoms by spatial position (lexicographically: x, then y, then z)
atom_data.sort(key=lambda x: (x[1][0], x[1][1], x[1][2]))

# Print sorted atoms
print("Atoms sorted by spatial coordinates (x, y, z):\n")
for symbol, coords in atom_data:
    print(f"Atom: {symbol}, x: {coords[0]:.3f}, y: {coords[1]:.3f}, z: {coords[2]:.3f}")
