#### IMPORT ligand structure from Docked PDB file

In [16]:


from rdkit import Chem
from rdkit.Chem import AllChem
import py3Dmol

# function to extract PDB-like information from a PDB file
def extract_pdb_info(file_path):
    pdb_info = ""
    with open(file_path, "r") as f:
        for line in f:
            if line.startswith("HETATM"):
                pdb_info += line
    return pdb_info

# function to convert PDB-like information to an RDKit molecule with 3D coordinates
def pdb_to_rdkit_3d(pdb_text):
    mol = Chem.MolFromPDBBlock(pdb_text)
    if mol is not None:
        mol = Chem.AddHs(mol)  # Add explicit hydrogen atoms
        AllChem.EmbedMolecule(mol, randomSeed=42)
    else:
        print("Error: Unable to create RDKit molecule from PDB block.")
    return mol

# Path to your docked PDB file

pdb_file_path = "E:\\THESIS\\BACK_END\\4MS4-20240302T114112Z-001\\4MS4\\1.pdb"


# Extract PDB-like information from the PDB file
pdb_info = extract_pdb_info(pdb_file_path)

# Convert PDB-like information to RDKit molecule with 3D coordinates
mol_3d = pdb_to_rdkit_3d(pdb_info)

if mol_3d is not None:
    # Create a Py3Dmol view
    view = py3Dmol.view(width=400, height=400)

    # Convert RDKit molecule to PDB format for Py3Dmol
    pdb_block = Chem.MolToPDBBlock(mol_3d)
    view.addModel(pdb_block, 'pdb')

    # Style and zoom to fit the structure
    view.setStyle({'stick': {}})
    view.setBackgroundColor('white')
    view.zoomTo()

    # Show the interactive 3D view
    view.show()


Hydrogen bond Predictor

In [17]:
import numpy as np
import pandas as pd


def parse_pdb(file_path):
    protein_atoms = []
    ligand_atoms = []

    amino_acids = {}
    current_chain = None

    with open(file_path, 'r') as pdb_file:
        for line in pdb_file:
            if line.startswith("ATOM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                chain = line[21]
                residue_number = int(line[22:26])
                residue_name = line[17:20]

                if chain != current_chain:
                    amino_acids = {}
                    current_chain = chain

                if chain not in amino_acids:
                    amino_acids[chain] = {}

                amino_acids[chain][residue_number] = residue_name

                protein_atoms.append((atom_type, x, y, z, chain, residue_number))

            elif line.startswith("HETATM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])

                ligand_atoms.append((atom_type, x, y, z))

    return protein_atoms, ligand_atoms, amino_acids


def calculate_distances(protein_atoms, ligand_atoms):
    distances = []

    for protein_atom in protein_atoms:
        for ligand_atom in ligand_atoms:
            dist = np.sqrt((protein_atom[1] - ligand_atom[1]) ** 2 +
                           (protein_atom[2] - ligand_atom[2]) ** 2 +
                           (protein_atom[3] - ligand_atom[3]) ** 2)
            distances.append((protein_atom[0], ligand_atom[0], dist, protein_atom[4], protein_atom[5]))

    return distances


def sort_distances(distances):
    return sorted(distances, key=lambda x: x[2])


def write_to_excel(sorted_distances, output_file='distances.xlsx'):
    df = pd.DataFrame(sorted_distances, columns=['Protein Atom', 'Ligand Atom', 'Distance', 'Chain', 'Residue Number'])
    df.to_excel(output_file, index=False)
    print(f"Distances written to {output_file}")


def find_hydrogen_bonds(sorted_distances):
    hydrogen_bonds = []

    for protein_atom_type, ligand_atom_type, dist, chain, residue_number in sorted_distances:
        if (protein_atom_type[0] in ['N', 'O', 'S'] and ligand_atom_type[0] in ['N', 'O', 'S']) or \
                (ligand_atom_type[0] in ['N', 'O', 'S'] and protein_atom_type[0] in ['N', 'O', 'S']):
            if 2.7 <= dist <= 3.3:  # Adjust the threshold distance for hydrogen bonds
                hydrogen_bonds.append((chain, residue_number, protein_atom_type, ligand_atom_type, dist))

    return hydrogen_bonds


pdb_file_path = "E:\\THESIS\\BACK_END\\4MS4-20240302T114112Z-001\\4MS4\\1.pdb"

protein_atoms, ligand_atoms, amino_acids = parse_pdb(pdb_file_path)
distances = calculate_distances(protein_atoms, ligand_atoms)
sorted_distances = sort_distances(distances)

# sorted distances to an Excel file
write_to_excel(sorted_distances, output_file='sorted_distances.xlsx')

# Finding and printing possible hydrogen bonds
hydrogen_bonds = find_hydrogen_bonds(sorted_distances)
if hydrogen_bonds:
    print("\nPossible Hydrogen Bond Interactions:")
    for chain, residue_number, protein_atom_type, ligand_atom_type, dist in hydrogen_bonds:
        amino_acid = amino_acids.get(chain, {}).get(residue_number, "Unknown")
        print(f"{amino_acid} {residue_number}({chain}){protein_atom_type}(donor) ------ {ligand_atom_type}(acceptor)   {dist}")
else:
    print("\nNo possible Hydrogen Bond Interactions found.")


Distances written to sorted_distances.xlsx

Possible Hydrogen Bond Interactions:
GLY 151(A)O(donor) ------ N1(acceptor)   2.718914673173837
SER 153(A)N(donor) ------ O2(acceptor)   3.078475109530692
SER 153(A)OG(donor) ------ O2(acceptor)   3.162595611202927


Combining Both for Visualising H bonds

In [18]:
import numpy as np
import pandas as pd
import py3Dmol


def parse_pdb(file_path):
    protein_atoms = []
    ligand_atoms = []

    amino_acids = {}
    current_chain = None

    with open(file_path, 'r') as pdb_file:
        for line in pdb_file:
            if line.startswith("ATOM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                chain = line[21]
                residue_number = int(line[22:26])
                residue_name = line[17:20]

                if chain != current_chain:
                    amino_acids = {}
                    current_chain = chain

                if chain not in amino_acids:
                    amino_acids[chain] = {}

                amino_acids[chain][residue_number] = (residue_name, atom_type, x, y, z)

                protein_atoms.append((atom_type, x, y, z, chain, residue_number))

            elif line.startswith("HETATM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])

                ligand_atoms.append((atom_type, x, y, z))

    return protein_atoms, ligand_atoms, amino_acids


def calculate_distances(protein_atoms, ligand_atoms):
    distances = []

    for protein_atom in protein_atoms:
        for ligand_atom in ligand_atoms:
            dist = np.sqrt((protein_atom[1] - ligand_atom[1]) ** 2 +
                           (protein_atom[2] - ligand_atom[2]) ** 2 +
                           (protein_atom[3] - ligand_atom[3]) ** 2)
            distances.append((protein_atom[0], ligand_atom[0], dist, protein_atom[4], protein_atom[5]))

    return distances


def sort_distances(distances):
    return sorted(distances, key=lambda x: x[2])


def find_hydrogen_bonds(sorted_distances):
    hydrogen_bonds = []

    for protein_atom_type, ligand_atom_type, dist, chain, residue_number in sorted_distances:
        if (protein_atom_type[0] in ['N', 'O', 'S'] and ligand_atom_type[0] in ['N', 'O', 'S']) or \
                (ligand_atom_type[0] in ['N', 'O', 'S'] and protein_atom_type[0] in ['N', 'O', 'S']):
            if 2.7 <= dist <= 3.3:  # Adjust the threshold distance for hydrogen bonds
                hydrogen_bonds.append((chain, residue_number, protein_atom_type, ligand_atom_type, dist))

    return hydrogen_bonds


def visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds):
    view = py3Dmol.view(width=800, height=600)

    # Add ligand atoms
    for atom_type, x, y, z in ligand_atoms:
        view.addAtom({'x': x, 'y': y, 'z': z, 'elem': atom_type})

    # Add dotted lines for hydrogen bonds
    for chain, residue_number, protein_atom_type, ligand_atom_type, _ in hydrogen_bonds:
        amino_acid = amino_acids.get(chain, {}).get(residue_number)
        if amino_acid:
            _, _, x1, y1, z1 = amino_acid
            if len(ligand_atom_type) == 3:
                x2, y2, z2 = ligand_atom_type  # Ligand atom coordinates
                view.addAtom({'x': x1, 'y': y1, 'z': z1, 'elem': protein_atom_type})
                view.addLine({'start': {'x': x1, 'y': y1, 'z': z1}, 'end': {'x': x2, 'y': y2, 'z': z2}, 'dashed': True})

    # Style and zoom to fit the structure
    view.setStyle({'stick': {}})
    view.setBackgroundColor('white')
    view.zoomTo()

    # Show the interactive 3D view
    view.show()



pdb_file_path = "E:\\THESIS\\BACK_END\\4MS4-20240302T114112Z-001\\4MS4\\1.pdb"

protein_atoms, ligand_atoms, amino_acids = parse_pdb(pdb_file_path)
distances = calculate_distances(protein_atoms, ligand_atoms)
sorted_distances = sort_distances(distances)

# Finding possible hydrogen bonds
hydrogen_bonds = find_hydrogen_bonds(sorted_distances)

# Visualizing ligand with interactions
visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds)


TRIAL 2


### Trial 3


In [19]:
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import py3Dmol

def parse_pdb(file_path):
    protein_atoms = []
    ligand_atoms = []

    amino_acids = {}
    current_chain = None

    with open(file_path, 'r') as pdb_file:
        for line in pdb_file:
            if line.startswith("ATOM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                chain = line[21]
                residue_number = int(line[22:26])
                residue_name = line[17:20]

                if chain != current_chain:
                    amino_acids = {}
                    current_chain = chain

                if chain not in amino_acids:
                    amino_acids[chain] = {}

                amino_acids[chain][residue_number] = residue_name

                protein_atoms.append((atom_type, x, y, z, chain, residue_number))

            elif line.startswith("HETATM"):
                atom_type = line[12:16].strip()
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])

                ligand_atoms.append((atom_type, x, y, z))

    return protein_atoms, ligand_atoms, amino_acids


def calculate_distances(protein_atoms, ligand_atoms):
    distances = []

    for protein_atom in protein_atoms:
        for ligand_atom in ligand_atoms:
            dist = np.sqrt((protein_atom[1] - ligand_atom[1]) ** 2 +
                           (protein_atom[2] - ligand_atom[2]) ** 2 +
                           (protein_atom[3] - ligand_atom[3]) ** 2)
            distances.append((protein_atom[0], ligand_atom[0], dist, protein_atom[4], protein_atom[5]))

    return distances


def sort_distances(distances):
    return sorted(distances, key=lambda x: x[2])


def find_hydrogen_bonds(sorted_distances):
    hydrogen_bonds = []

    for protein_atom_type, ligand_atom_type, dist, chain, residue_number in sorted_distances:
        if (protein_atom_type[0] in ['N', 'O', 'S'] and ligand_atom_type[0] in ['N', 'O', 'S']) or \
                (ligand_atom_type[0] in ['N', 'O', 'S'] and protein_atom_type[0] in ['N', 'O', 'S']):
            if 2.7 <= dist <= 3.3:  # Adjust the threshold distance for hydrogen bonds
                hydrogen_bonds.append((chain, residue_number, protein_atom_type, ligand_atom_type, dist))

    return hydrogen_bonds


def visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds, amino_acids):
    view = py3Dmol.view(width=800, height=600)

    # Add sticks representation for ligand atoms
    for i, (atom_type, x, y, z) in enumerate(ligand_atoms):
        view.addSphere({'center': {'x': x, 'y': y, 'z': z}, 'radius': 0.2, 'color': 'green'})

    for chain, residue_number, protein_atom_type, ligand_atom_type, dist in hydrogen_bonds:
        amino_acid = amino_acids.get(chain, {}).get(residue_number, "Unknown")
        color = 'red' if dist <= 3.0 else 'blue'

        if len(ligand_atom_type) >= 4 and len(protein_atom_type) >= 4:
            start_coords = {'x': ligand_atom_type[1], 'y': ligand_atom_type[2], 'z': ligand_atom_type[3]}
            end_coords = {'x': protein_atom_type[1], 'y': protein_atom_type[2], 'z': protein_atom_type[3]}

            label = f"{amino_acid}({chain}){residue_number}-{protein_atom_type[0]}"
            view.addLabel(label, {'fontSize': 12, 'position': {'x': 10, 'y': 10}, 'backgroundColor': 'white', 'fontColor': color})
            view.addCylinder({'start': start_coords, 'end': end_coords, 'radius': 0.1, 'color': 'black', 'dashed': True})

    view.zoomTo()
    view.show()



pdb_file_path = "E:\\THESIS\\BACK_END\\4MS4-20240302T114112Z-001\\4MS4\\1.pdb"

protein_atoms, ligand_atoms, amino_acids = parse_pdb(pdb_file_path)
distances = calculate_distances(protein_atoms, ligand_atoms)
sorted_distances = sort_distances(distances)

# Finding and filtering possible hydrogen bonds
hydrogen_bonds = find_hydrogen_bonds(sorted_distances)

# Visualizing ligand with interactions
visualize_ligand_with_interactions(ligand_atoms, hydrogen_bonds, amino_acids)