In [1]:
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import py3Dmol

def parse_pdb(file_path):
    protein_atoms = []
    ligand_atoms = []

    amino_acids = {}
    current_chain = None

    with open(file_path, 'r') as pdb_file:
        for line in pdb_file:
            if line.startswith("ATOM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                chain = line[21]
                residue_number = int(line[22:26])
                residue_name = line[17:20]

                if chain != current_chain:
                    amino_acids = {}
                    current_chain = chain

                if chain not in amino_acids:
                    amino_acids[chain] = {}

                amino_acids[chain][residue_number] = residue_name

                protein_atoms.append((atom_type, x, y, z, chain, residue_number))

            elif line.startswith("HETATM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])

                ligand_atoms.append((atom_type, x, y, z))

    print("Parsed protein atoms:", len(protein_atoms))
    print("Parsed ligand atoms:", len(ligand_atoms))
    print("Parsed amino acids:", amino_acids)

    return protein_atoms, ligand_atoms, amino_acids


def calculate_distances(protein_atoms, ligand_atoms):
    distances = []

    for protein_atom in protein_atoms:
        for ligand_atom in ligand_atoms:
            dist = np.sqrt((protein_atom[1] - ligand_atom[1]) ** 2 +
                           (protein_atom[2] - ligand_atom[2]) ** 2 +
                           (protein_atom[3] - ligand_atom[3]) ** 2)
            distances.append((protein_atom[0], ligand_atom[0], dist, protein_atom[4], protein_atom[5]))

    return distances


def sort_distances(distances):
    return sorted(distances, key=lambda x: x[2])


def find_hydrogen_bonds(sorted_distances):
    hydrogen_bonds = []

    for protein_atom_type, ligand_atom_type, dist, chain, residue_number in sorted_distances:
        if (protein_atom_type[0] in ['N', 'O', 'S'] and ligand_atom_type[0] in ['N', 'O', 'S']) or \
                (ligand_atom_type[0] in ['N', 'O', 'S'] and protein_atom_type[0] in ['N', 'O', 'S']):
            if 2.7 <= dist <= 3.3:  # Adjust the threshold distance for hydrogen bonds
                hydrogen_bonds.append((chain, residue_number, protein_atom_type, ligand_atom_type, dist))

    return hydrogen_bonds

def visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds, amino_acids, protein_atoms):
    view = py3Dmol.view(width=800, height=600)

    # Add stick representation for ligand atoms
    for i, (atom_type, x, y, z) in enumerate(ligand_atoms):
        view.addStyle({'atom': i}, {'stick': {}})

    # Add balls for amino acid atoms involved in hydrogen bonds
    for chain, residue_number, protein_atom_type, ligand_atom_type, dist in hydrogen_bonds:
        amino_acid = amino_acids.get(chain, {}).get(residue_number, "Unknown")
        color = 'red' if dist <= 3.0 else 'blue'

        # Add amino acid atom as a ball
        if len(protein_atom_type) >= 4:
            atom_index = len(ligand_atoms) + len(protein_atoms)
            x_coord, y_coord, z_coord = protein_atom_type[1], protein_atom_type[2], protein_atom_type[3]
            print("Amino acid coordinates:", x_coord, y_coord, z_coord)  # Add print statement to check coordinates
            view.addSphere({'center': {'x': x_coord, 'y': y_coord, 'z': z_coord}, 'radius': 0.4, 'color': color})

            # Label the amino acid
            amino_acid_label = f"{amino_acid}({chain}){residue_number}"
            view.addLabel(amino_acid_label, {'fontSize': 12, 'position': {'x': x_coord, 'y': y_coord, 'z': z_coord + 1}, 'backgroundColor': 'white', 'fontColor': color})

    # Add yellow dotted lines for hydrogen bonds
    for chain, residue_number, protein_atom_type, ligand_atom_type, dist in hydrogen_bonds:
        if len(ligand_atom_type) >= 4 and len(protein_atom_type) >= 4:
            start_index = len(ligand_atoms) + len(protein_atoms)
            end_index = len(ligand_atoms) + len(protein_atoms) + 1
            start_coords = {'x': ligand_atom_type[1], 'y': ligand_atom_type[2], 'z': ligand_atom_type[3]}
            end_coords = {'x': protein_atom_type[1], 'y': protein_atom_type[2], 'z': protein_atom_type[3]}
            print("Start:", start_coords, "End:", end_coords)  # Add print statement to check coordinates
            view.addCylinder({'start': start_coords, 'end': end_coords, 'radius': 0.1, 'color': 'yellow', 'dashed': True})

    view.zoomTo()
    view.show()




pdb_file_path = "E:\\THESIS\\BACK_END\\4MS4-20240302T114112Z-001\\4MS4\\1.pdb"

protein_atoms, ligand_atoms, amino_acids = parse_pdb(pdb_file_path)
distances = calculate_distances(protein_atoms, ligand_atoms)
sorted_distances = sort_distances(distances)

# Finding and filtering possible hydrogen bonds
hydrogen_bonds = find_hydrogen_bonds(sorted_distances)

# Visualizing ligand with interactions
# Visualizing ligand with interactions
visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds, amino_acids, protein_atoms)


Parsed protein atoms: 4304
Parsed ligand atoms: 19
Parsed amino acids: {'A': {50: 'ARG', 51: 'ARG', 52: 'ALA', 53: 'VAL', 54: 'TYR', 55: 'ILE', 56: 'GLY', 57: 'ALA', 58: 'LEU', 59: 'PHE', 60: 'PRO', 61: 'MET', 62: 'SER', 63: 'GLY', 64: 'GLY', 65: 'TRP', 66: 'PRO', 67: 'GLY', 68: 'GLY', 69: 'GLN', 70: 'ALA', 71: 'CYS', 72: 'GLN', 73: 'PRO', 74: 'ALA', 75: 'VAL', 76: 'GLU', 77: 'MET', 78: 'ALA', 79: 'LEU', 80: 'GLU', 81: 'ASP', 82: 'VAL', 83: 'ASN', 84: 'SER', 85: 'ARG', 86: 'ARG', 87: 'ASP', 88: 'ILE', 89: 'LEU', 90: 'PRO', 91: 'ASP', 92: 'TYR', 93: 'GLU', 94: 'LEU', 95: 'LYS', 96: 'LEU', 97: 'ILE', 98: 'HIS', 99: 'HIS', 100: 'ASP', 101: 'SER', 102: 'LYS', 103: 'CYS', 104: 'ASP', 105: 'PRO', 106: 'GLY', 107: 'GLN', 108: 'ALA', 109: 'THR', 110: 'LYS', 111: 'TYR', 112: 'LEU', 113: 'TYR', 114: 'GLU', 115: 'LEU', 116: 'LEU', 117: 'TYR', 118: 'ASN', 119: 'ASP', 120: 'PRO', 121: 'ILE', 122: 'LYS', 123: 'ILE', 124: 'ILE', 125: 'LEU', 126: 'MET', 127: 'PRO', 128: 'GLY', 129: 'CYS', 130: 'SER', 

###TRIAL 2###


In [2]:
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import py3Dmol

def parse_pdb(file_path):
    protein_atoms = []
    ligand_atoms = []
    amino_acids = {}
    current_chain = None

    with open(file_path, 'r') as pdb_file:
        for line in pdb_file:
            if line.startswith("ATOM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                chain = line[21]
                residue_number = int(line[22:26])
                residue_name = line[17:20]

                if chain != current_chain:
                    amino_acids[chain] = {}
                    current_chain = chain

                amino_acids[chain][residue_number] = residue_name
                protein_atoms.append((atom_type, x, y, z, chain, residue_number))

            elif line.startswith("HETATM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                ligand_atoms.append((atom_type, x, y, z))

    return protein_atoms, ligand_atoms, amino_acids

def calculate_distances(protein_atoms, ligand_atoms):
    distances = []

    for protein_atom in protein_atoms:
        for ligand_atom in ligand_atoms:
            dist = np.sqrt((protein_atom[1] - ligand_atom[1]) ** 2 +
                           (protein_atom[2] - ligand_atom[2]) ** 2 +
                           (protein_atom[3] - ligand_atom[3]) ** 2)
            distances.append((protein_atom, ligand_atom, dist))

    return distances

def find_hydrogen_bonds(distances):
    hydrogen_bonds = []

    for protein_atom, ligand_atom, dist in distances:
        if (protein_atom[0][0] in ['N', 'O', 'S'] and ligand_atom[0][0] in ['N', 'O', 'S']) or \
           (ligand_atom[0][0] in ['N', 'O', 'S'] and protein_atom[0][0] in ['N', 'O', 'S']):
            if 2.7 <= dist <= 3.3:  # Adjust the threshold distance for hydrogen bonds
                hydrogen_bonds.append((protein_atom, ligand_atom, dist))

    return hydrogen_bonds

def visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds):
    view = py3Dmol.view(width=800, height=600)

    for i, (atom_type, x, y, z) in enumerate(ligand_atoms):
        view.addStyle({'atom': i}, {'stick': {}})

    for protein_atom, ligand_atom, dist in hydrogen_bonds:
        view.addSphere({'center': {'x': ligand_atom[1], 'y': ligand_atom[2], 'z': ligand_atom[3]}, 'radius': 0.4})
        view.addLabel(f"Distance: {dist}", {'fontSize': 12,
                                            'position': {'x': ligand_atom[1], 'y': ligand_atom[2], 'z': ligand_atom[3] + 1},
                                            'backgroundColor': 'white'})

        view.addCylinder({'start': {'x': ligand_atom[1], 'y': ligand_atom[2], 'z': ligand_atom[3]},
                          'end': {'x': protein_atom[1], 'y': protein_atom[2], 'z': protein_atom[3]},
                          'radius': 0.1,
                          'color': 'yellow',
                          'dashed': True})

    view.zoomTo()
    view.show()

pdb_file_path = "E:\\THESIS\\BACK_END\\4MS4-20240302T114112Z-001\\4MS4\\1.pdb"

protein_atoms, ligand_atoms, amino_acids = parse_pdb(pdb_file_path)
distances = calculate_distances(protein_atoms, ligand_atoms)
hydrogen_bonds = find_hydrogen_bonds(distances)

visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds)


Trial 3

In [3]:
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import py3Dmol

def parse_pdb(file_path):
    protein_atoms = []
    ligand_atoms = []
    amino_acids = {}
    current_chain = None

    with open(file_path, 'r') as pdb_file:
        for line in pdb_file:
            if line.startswith("ATOM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                chain = line[21]
                residue_number = int(line[22:26])
                residue_name = line[17:20]

                if chain != current_chain:
                    amino_acids[chain] = {}
                    current_chain = chain

                amino_acids[chain][residue_number] = residue_name
                protein_atoms.append((atom_type, x, y, z, chain, residue_number))

            elif line.startswith("HETATM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                ligand_atoms.append((atom_type, x, y, z))

    return protein_atoms, ligand_atoms, amino_acids

def calculate_distances(protein_atoms, ligand_atoms):
    distances = []

    for protein_atom in protein_atoms:
        for ligand_atom in ligand_atoms:
            dist = np.sqrt((protein_atom[1] - ligand_atom[1]) ** 2 +
                           (protein_atom[2] - ligand_atom[2]) ** 2 +
                           (protein_atom[3] - ligand_atom[3]) ** 2)
            distances.append((protein_atom, ligand_atom, dist))

    return distances

def find_hydrogen_bonds(distances):
    hydrogen_bonds = []

    for protein_atom, ligand_atom, dist in distances:
        if (protein_atom[0][0] in ['N', 'O', 'S'] and ligand_atom[0][0] in ['N', 'O', 'S']) or \
           (ligand_atom[0][0] in ['N', 'O', 'S'] and protein_atom[0][0] in ['N', 'O', 'S']):
            if 2.7 <= dist <= 3.3:  # Adjust the threshold distance for hydrogen bonds
                hydrogen_bonds.append((protein_atom, ligand_atom, dist))

    return hydrogen_bonds

def visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds):
    view = py3Dmol.view(width=800, height=600)

    for i, (atom_type, x, y, z) in enumerate(ligand_atoms):
        view.addStyle({'atom': i}, {'stick': {}})

    for protein_atom, ligand_atom, dist in hydrogen_bonds:
        view.addSphere({'center': {'x': ligand_atom[1], 'y': ligand_atom[2], 'z': ligand_atom[3]}, 'radius': 0.4})
        view.addLabel(f"Distance: {dist}", {'fontSize': 12,
                                            'position': {'x': ligand_atom[1], 'y': ligand_atom[2], 'z': ligand_atom[3] + 1},
                                            'backgroundColor': 'white'})

        view.addCylinder({'start': {'x': ligand_atom[1], 'y': ligand_atom[2], 'z': ligand_atom[3]},
                          'end': {'x': protein_atom[1], 'y': protein_atom[2], 'z': protein_atom[3]},
                          'radius': 0.1,
                          'color': 'yellow',
                          'dashed': True})

    # Set background color to black
    view.setBackgroundColor('black')

    view.zoomTo()
    view.show()

pdb_file_path = "E:\\THESIS\\BACK_END\\4MS4-20240302T114112Z-001\\4MS4\\1.pdb"

protein_atoms, ligand_atoms, amino_acids = parse_pdb(pdb_file_path)
distances = calculate_distances(protein_atoms, ligand_atoms)
hydrogen_bonds = find_hydrogen_bonds(distances)

visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds)


TRIAL 4

In [4]:
import numpy as np
import pandas as pd
import py3Dmol
from rdkit import Chem
from rdkit.Chem import AllChem

# Function to extract PDB-like information from a PDB file
def extract_pdb_info(file_path):
    pdb_info = ""
    with open(file_path, "r") as f:
        for line in f:
            if line.startswith("HETATM"):
                pdb_info += line
    return pdb_info

# Function to convert PDB-like information to an RDKit molecule with 3D coordinates
def pdb_to_rdkit_3d(pdb_text):
    mol = Chem.MolFromPDBBlock(pdb_text)
    if mol is not None:
        mol = Chem.AddHs(mol)  # Add explicit hydrogen atoms
        AllChem.EmbedMolecule(mol, randomSeed=42)
    else:
        print("Error: Unable to create RDKit molecule from PDB block.")
    return mol

def visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds):
    view = py3Dmol.view(width=800, height=600)

    # Add ligand atoms
    for atom_type, x, y, z in ligand_atoms:
        view.addAtom({'x': x, 'y': y, 'z': z, 'elem': atom_type})

    # Add yellow dotted lines for hydrogen bonds and label relevant amino acid atoms
    for chain, residue_number, protein_atom_type, ligand_atom_type, _ in hydrogen_bonds:
        amino_acid = amino_acids.get(chain, {}).get(residue_number)
        if amino_acid and len(amino_acid) == 5:
            residue_name, atom_type, x1, y1, z1 = amino_acid
            if len(ligand_atom_type) == 3:
                x2, y2, z2 = ligand_atom_type  # Ligand atom coordinates
                view.addAtom({'x': x1, 'y': y1, 'z': z1, 'elem': protein_atom_type})
                view.addLine({'start': {'x': x1, 'y': y1, 'z': z1}, 'end': {'x': x2, 'y': y2, 'z': z2}, 'dashed': True, 'color': 'yellow'})
                view.addLabel(f"{residue_name} {residue_number}({chain})", {'x': x1, 'y': y1, 'z': z1})
                view.addAtom({'x': x1, 'y': y1, 'z': z1, 'elem': protein_atom_type, 'style': 'sphere', 'radius': 0.5})

    # Style and zoom to fit the structure
    view.setStyle({'stick': {}})
    view.setBackgroundColor('white')
    view.zoomTo()

    # Show the interactive 3D view
    view.show()


# Path to your docked PDB file
pdb_file_path = "E:\\THESIS\\BACK_END\\4MS4-20240302T114112Z-001\\4MS4\\1.pdb"

# Extract PDB-like information from the PDB file
pdb_info = extract_pdb_info(pdb_file_path)

# Convert PDB-like information to RDKit molecule with 3D coordinates
mol_3d = pdb_to_rdkit_3d(pdb_info)

if mol_3d is not None:
    # Create a Py3Dmol view
    view = py3Dmol.view(width=400, height=400)

    # Convert RDKit molecule to PDB format for Py3Dmol
    pdb_block = Chem.MolToPDBBlock(mol_3d)
    view.addModel(pdb_block, 'pdb')

    # Style and zoom to fit the structure
    view.setStyle({'stick': {}})
    view.setBackgroundColor('white')
    view.zoomTo()

    # Show the interactive 3D view
    view.show()

    # Predict hydrogen bonds and visualize ligand with interactions
    protein_atoms, ligand_atoms, amino_acids = parse_pdb(pdb_file_path)
    distances = calculate_distances(protein_atoms, ligand_atoms)
    sorted_distances = sort_distances(distances)
    hydrogen_bonds = find_hydrogen_bonds(sorted_distances)

    # Visualize ligand with interactions (hydrogen bonds)
    visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds)


ValueError: not enough values to unpack (expected 5, got 3)

FINAL NOW  

In [12]:
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import py3Dmol

def parse_pdb(file_path):
    protein_atoms = []
    ligand_atoms = []
    amino_acids = {}
    current_chain = None

    with open(file_path, 'r') as pdb_file:
        for line in pdb_file:
            if line.startswith("ATOM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                chain = line[21]
                residue_number = int(line[22:26])
                residue_name = line[17:20]

                if chain != current_chain:
                    amino_acids[chain] = {}
                    current_chain = chain

                amino_acids[chain][residue_number] = residue_name
                protein_atoms.append((atom_type, x, y, z, chain, residue_number))

            elif line.startswith("HETATM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                ligand_atoms.append((atom_type, x, y, z))

    return protein_atoms, ligand_atoms, amino_acids

def calculate_distances(protein_atoms, ligand_atoms):
    distances = []

    for protein_atom in protein_atoms:
        for ligand_atom in ligand_atoms:
            dist = np.sqrt((protein_atom[1] - ligand_atom[1]) ** 2 +
                           (protein_atom[2] - ligand_atom[2]) ** 2 +
                           (protein_atom[3] - ligand_atom[3]) ** 2)
            distances.append((protein_atom, ligand_atom, dist))

    return distances

def find_hydrogen_bonds(distances):
    hydrogen_bonds = []

    for protein_atom, ligand_atom, dist in distances:
        if (protein_atom[0][0] in ['N', 'O', 'S'] and ligand_atom[0][0] in ['N', 'O', 'S']) or \
           (ligand_atom[0][0] in ['N', 'O', 'S'] and protein_atom[0][0] in ['N', 'O', 'S']):
            if 2.7 <= dist <= 3.3:  # Adjust the threshold distance for hydrogen bonds
                hydrogen_bonds.append((protein_atom, ligand_atom, dist))

    return hydrogen_bonds

def visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds, amino_acids):
    view = py3Dmol.view(width=800, height=600)

    # Show ligand atoms in stick view
    for i, (atom_type, x, y, z) in enumerate(ligand_atoms):
        view.addStyle({'atom': i}, {'stick': {}})

    # Show amino acid atoms involved in hydrogen bonds as blue spheres
    for protein_atom, ligand_atom, dist in hydrogen_bonds:
        if len(ligand_atom) > 4:  # Check if the tuple has enough elements
            protein_atom_index = ligand_atom[4]  # Index of the protein atom in ligand_atoms
            amino_acid = amino_acids.get(protein_atom[4], {}).get(protein_atom[5])
            if amino_acid:
                residue_name, residue_number = amino_acid
                view.addStyle({'atom': protein_atom_index}, {'sphere': {'color': 'blue'}})
                view.addLabel(f"{residue_name} {residue_number}", {'fontSize': 12,
                                                                   'position': {'x': protein_atom[1], 'y': protein_atom[2], 'z': protein_atom[3] + 1},
                                                                   'color': 'blue'})

            # Show yellow dotted lines for hydrogen bonds
            view.addCylinder({'start': {'x': ligand_atom[1], 'y': ligand_atom[2], 'z': ligand_atom[3]},
                              'end': {'x': protein_atom[1], 'y': protein_atom[2], 'z': protein_atom[3]},
                              'radius': 0.1,
                              'color': 'yellow',
                              'dashed': True})

    view.zoomTo()
    view.show()

pdb_file_path = "E:\\THESIS\\BACK_END\\4MS4-20240302T114112Z-001\\4MS4\\1.pdb"

protein_atoms, ligand_atoms, amino_acids = parse_pdb(pdb_file_path)
distances = calculate_distances(protein_atoms, ligand_atoms)
hydrogen_bonds = find_hydrogen_bonds(distances)

# Call the function with ligand_atoms, hydrogen_bonds, and amino_acids
visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds, amino_acids)