In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

!pip install -q condacolab
import condacolab

condacolab.install()

import condacolab

condacolab.check()
!conda install -c conda-forge openbabel

condacolab.check()

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
✨🍰✨ Everything looks OK!
✨🍰✨ Everything looks OK!
Channels:
 - conda-forge
Platform: linux-64
Collecting package metadata (repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | 

In [None]:
import sys
import time
import contextlib

with open('/content/labodock_install.log', 'a') as inpt:
    with contextlib.redirect_stdout(inpt):

        # -- Start installation --
        start = time.time()
        !rm -r /content/sample_data
        !wget https://github.com/ccsb-scripps/AutoDock-Vina/releases/download/v1.2.5/vina_1.2.5_linux_x86_64 -O vina
        !chmod u+x vina

        !pip install py3Dmol==2.0.3
        !pip install rdkit-pypi==2022.9.5
        !pip install meeko==0.5.0
        !pip install condacolab==0.1.7

        import condacolab
        condacolab.install_mambaforge()
        !mamba install -c conda-forge spyrmsd=0.6.0 openbabel=3.1.1 plip=2.3.0
        end = time.time()
        # -- End installation --

        print(f'+ Time elapsed: ' + time.strftime('%Mm %Ss', time.gmtime(end - start)))

In [None]:
#@title **Install dependencies**
#@markdown It will take a few minutes, please, drink a coffee and wait. ;-)
# install dependencies
%%capture

!pip install biopython
from Bio import PDB

! apt-get install pymol
from pymol import cmd


import sys
!pip -q install py3Dmol
!pip install git+https://github.com/pablo-arantes/biopandas
!mamba install openmmforcefields -c conda-forge -y
!pip install prolif
!mamba install -c conda-forge openbabel -y
!mamba install -c conda-forge ambertools -y
!mamba install -c conda-forge parmed -y
!pip install --upgrade MDAnalysis
!pip install biopython

from openff.toolkit.topology import Molecule, Topology
from openff.toolkit.typing.engines.smirnoff import ForceField
from openff.toolkit.utils import get_data_file_path

import parmed as pmd
from biopandas.pdb import PandasPdb
import os
import urllib.request
import numpy as np
import MDAnalysis as mda
import py3Dmol
import pytraj as pt
import platform
import scipy.cluster.hierarchy
from scipy.spatial.distance import squareform
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas as pd
from scipy.interpolate import griddata
import seaborn as sb
from statistics import mean, stdev
from pytraj import matrix
from matplotlib import colors
from IPython.display import set_matplotlib_formats

import rdkit
import mdtraj as md
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import rdMolTransforms
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem import rdDepictor
from rdkit.Chem import rdForceFieldHelpers
from IPython.display import SVG
import ipywidgets as widgets
import rdkit
from rdkit.Chem.Draw import IPythonConsole
AllChem.SetPreferCoordGen(True)
from IPython.display import Image
import openbabel
from openbabel import pybel

import os
import subprocess

# Added new from PoseBusters
import subprocess
from pathlib import Path
from tempfile import TemporaryDirectory
import logging

from meeko import MoleculePreparation, PDBQTMolecule, RDKitMolCreate
from pymol import cmd
from rdkit.Chem import AddHs, MolFromMolFile, SDWriter

In [None]:
# @title **This part is taken from LaboDock: https://github.com/RyanZR/labodock**
# @markdown This creates important custom functions and methods for later
# @markdown docking and binding interaction study.

%alias vina /content/vina

#############################################
# Suppress Warnings

#RDLogger.DisableLog('rdApp.warning')

#############################################
# Grid Box Calculation Methods

class GridBox:

    ranges = tuple[list[float], list[float], list[float]]
    coords = tuple[float, float, float]
    center_bxsize = tuple[tuple[float, float, float], tuple[float, float, float]]

    def __init__(self, inpt_file: str) -> None:
        self.inpt = open(inpt_file, 'r')
        self.data = self.inpt.read()
        self.cmol = Chem.MolFromPDBBlock(self.data)
        self.conf = self.cmol.GetConformer()
        self.ntom = self.cmol.GetNumAtoms()
        self.inpt.close()

    def update_gridbox(self, mol_block: str) -> None:
        self.cmol = Chem.MolFromPDBBlock(mol_block)
        self.conf = self.cmol.GetConformer()
        self.ntom = self.cmol.GetNumAtoms()

    def compute_coords(self) -> ranges:
        x_coord = [self.conf.GetAtomPosition(c).x for c in range(self.ntom)]
        y_coord = [self.conf.GetAtomPosition(c).y for c in range(self.ntom)]
        z_coord = [self.conf.GetAtomPosition(c).z for c in range(self.ntom)]
        return x_coord, y_coord, z_coord

    def compute_ranges(self) -> ranges:
        x, y, z = self.compute_coords()
        x_range = [min(x), max(x)]
        y_range = [min(y), max(y)]
        z_range = [min(z), max(z)]
        return x_range, y_range, z_range

    def compute_center(self, use_range: bool = True) -> coords:
        x, y, z = self.compute_ranges() if use_range else self.compute_coords()
        x_center = round(np.mean(x), 3)
        y_center = round(np.mean(y), 3)
        z_center = round(np.mean(z), 3)
        return x_center, y_center, z_center

    def generate_res_molblock(self, residues_list: list[str]) -> str:
        res_lines = [line for line in self.data.split('\n')
                     if line[22:26].lstrip() in residues_list
                     and 'END' not in line]
        res_block = '\n'.join(res_lines)
        return res_block

    def labox(self, scale: float = 2.0) -> coords:
        xr, yr, zr = self.compute_ranges()
        center = self.compute_center()
        bxsize = (round(abs(xr[0] - xr[1]) * scale, 3),
                  round(abs(yr[0] - yr[1]) * scale, 3),
                  round(abs(zr[0] - zr[1]) * scale, 3))
        return center, bxsize

    def eboxsize(self, gy_box_ratio: float = 0.23, modified: bool = False) -> center_bxsize:
        xc, yc, zc = self.compute_coords()
        center = self.compute_center(modified)
        distsq = [(x-center[0])**2 + (y-center[1])**2 + (z-center[2])**2
                  for x, y, z in zip(xc, yc, zc)]
        bxsize = (round(np.sqrt(sum(distsq) / len(xc)) / gy_box_ratio, 3),) * 3
        return center, bxsize

    def autodock_grid(self) -> center_bxsize:
        xr, yr, zr = self.compute_ranges()
        center = self.compute_center()
        bxsize = (22.5, 22.5, 22.5)
        return center, bxsize

    def defined_by_res(self, residue_number: str, scale: float = 1.25) -> center_bxsize:
        res_list = residue_number.replace(',', ' ').split()
        res_block = self.generate_res_molblock(res_list)
        self.update_gridbox(res_block)
        return self.labox(scale=scale)

#############################################
# RMSD Calculation Methods

class ComputeRMSD:

    def __init__(self) -> None:
        self.MCS_mol = None
        self.MCS_png = None

    def load_molecule(self, inpt_file: str, remove_Hs: bool = True) -> tuple:
        molecule = io.loadmol(inpt_file)
        molecule.strip() if remove_Hs else None
        name = os.path.basename(inpt_file).split('.')[0]
        coor = molecule.coordinates
        anum = molecule.atomicnums
        mtrx = molecule.adjacency_matrix
        cmol = Chem.MolFromPDBFile(inpt_file)
        return name, coor, anum, mtrx, cmol

    def mol_to_png(self, mol: object) -> object:
        legend = 'Maximum Common Substructure'
        png = Draw.MolToImage(mol, legend=legend)
        return png

    def find_MCS(self, ref: tuple, lig: tuple) -> object:
        if self.MCS_mol is None:
            MCS_obj = rdFMCS.FindMCS([ref[4], lig[4]])
            MCS_mol = Chem.MolFromSmarts(MCS_obj.smartsString)
            MCS_png = self.mol_to_png(MCS_mol)
            self.MCS_mol = MCS_mol
            self.MCS_png = MCS_png
        return self.MCS_mol

    def hung_RMSD(self, ref: tuple, lig: tuple) -> float:
        try:
            hRMSD = round(rmsd.hrmsd(ref[1], lig[1], ref[2], lig[2]), 3)
        except:
            hRMSD = 'ERROR'
        return hRMSD

    def symm_RMSD(self, ref: tuple, lig: tuple, minimise: bool = False) -> float:
        try:
            sRMSD = round(rmsd.symmrmsd(ref[1], lig[1], ref[2], lig[2], ref[3], lig[3], minimize=minimise), 3)
        except:
            sRMSD = 'ERROR'
        return sRMSD

    def labo_RMSD(self, ref: tuple, lig: tuple) -> float:
        mol_substr = self.find_MCS(ref, lig)
        ref_substr = ref[4].GetSubstructMatch(mol_substr)
        lig_substr = lig[4].GetSubstructMatch(mol_substr)

        distsq = []
        for ref_atom, lig_atom in zip(ref_substr, lig_substr):
            ref_pos = ref[4].GetConformer().GetAtomPosition(ref_atom)
            lig_pos = lig[4].GetConformer().GetAtomPosition(lig_atom)
            ref_coord = np.array((ref_pos.x, ref_pos.y, ref_pos.z))
            lig_coord = np.array((lig_pos.x, lig_pos.y, lig_pos.z))
            coo_dist = np.linalg.norm(ref_coord - lig_coord)
            distsq.append(coo_dist ** 2)

        try:
            lRMSD = round(np.sqrt(sum(distsq)/len(distsq)), 3)
        except:
            lRMSD = 'ERROR'
        return lRMSD

    def rmsd_report(self,
                    ref: tuple,
                    lig: tuple,
                    lRMSD: bool = True,
                    hRMSD: bool = True,
                    sRMSD: bool = True
                    ) -> dict[str: list[float]]:
        report = {}
        report['NAME'] = [lig[0]]
        report['LABO_RMSD'] = [self.labo_RMSD(ref, lig)] if lRMSD else None
        report['HUNG_RMSD'] = [self.hung_RMSD(ref, lig)] if hRMSD else None
        report['SYMM_RMSD'] = [self.symm_RMSD(ref, lig)] if sRMSD else None
        report = {k: v for k, v in report.items() if v is not None}
        return report

#############################################
# AA Consntant and Bond Colour Dictionary

# Kyte and Doolittle Hydropathy Scale (1982)
AA_HB = {'ALA':  1.8, 'ARG': -4.5, 'ASN': -3.5, 'ASP': -3.5, 'CYS':  2.5,
         'GLN': -3.5, 'GLU': -3.5, 'GLY': -0.4, 'HIS': -3.2, 'ILE':  4.5,
         'LEU':  3.8, 'LYS': -3.9, 'MET':  1.9, 'PHE':  2.8, 'PRO': -1.6,
         'SER': -0.8, 'THR': -0.7, 'TRP': -0.9, 'TYR': -1.3, 'VAL':  4.2}

# University of Calgary PI Scale
AA_PI = {'ALA':  6.0, 'ARG': 10.76, 'ASN': 5.41, 'ASP': 2.77, 'CYS': 5.07,
         'GLN': 5.65, 'GLU':  3.22, 'GLY': 5.97, 'HIS': 7.59, 'ILE': 6.02,
         'LEU': 5.98, 'LYS':  9.74, 'MET': 5.74, 'PHE': 5.48, 'PRO':  6.3,
         'SEC': 5.68, 'SER':  5.68, 'THR':  5.6, 'TRP': 5.89, 'TYR': 5.66,
         'VAL': 5.96}

BOND_COL = {'HYDROPHOBIC': ['0x59e382', 'GREEN'],
            'HBOND': ['0x59bee3', 'LIGHT BLUE'],
            'WATERBRIDGE': ['0x4c4cff', 'BLUE'],
            'SALTBRIDGE': ['0xefd033', 'YELLOW'],
            'PISTACKING': ['0xb559e3', 'PURPLE'],
            'PICATION': ['0xe359d8', 'VIOLET'],
            'HALOGEN': ['0x59bee3', 'LIGHT BLUE'],
            'METAL':['0xe35959', 'ORANGE']}

#############################################
# AA-to-Colour Converter Function

def sequential_gradient(value: float,
                        min_value: float,
                        max_value: float,
                        targ_colour: str = '00ff00',
                        interpolation: float = 0.0
                        ) -> str:
    norm_val = (value - min_value) / (max_value - min_value)

    rgb = tuple(int(targ_colour[d:d+2], 16) for d in (0, 2, 4))
    r = int(255 - (255 - rgb[0]) * (1 - interpolation) * norm_val)
    g = int(255 - (255 - rgb[1]) * (1 - interpolation) * norm_val)
    b = int(255 - (255 - rgb[2]) * (1 - interpolation) * norm_val)

    hex_code = f'#{r:02x}{g:02x}{b:02x}'
    return hex_code

def diverging_gradient(value: float,
                       min_value: float,
                       max_value: float,
                       base_colour: str = 'ff0000',
                       targ_colour: str = '0000ff',
                       interpolation: float = 0.3
                       ) -> str:
    norm_val = (value - min_value) / (max_value - min_value)

    white = (255, 255, 255)
    rgb_A = tuple(int(base_colour[d:d+2], 16) for d in (0, 2, 4))
    rgb_B = tuple(int(targ_colour[d:d+2], 16) for d in (0, 2, 4))

    if norm_val < 0.5 - interpolation / 2:
        factor = norm_val / (0.5 - interpolation / 2)
        r = int(rgb_A[0] + (white[0] - rgb_A[0]) * factor)
        g = int(rgb_A[1] + (white[1] - rgb_A[1]) * factor)
        b = int(rgb_A[2] + (white[2] - rgb_A[2]) * factor)
    elif norm_val > 0.5 + interpolation / 2:
        factor = (norm_val - 0.5 - interpolation / 2) / (0.5 - interpolation / 2)
        r = int(white[0] + (rgb_B[0] - white[0]) * factor)
        g = int(white[1] + (rgb_B[1] - white[1]) * factor)
        b = int(white[2] + (rgb_B[2] - white[2]) * factor)
    else:
        r, g, b = white

    hex_code = f'#{r:02x}{g:02x}{b:02x}'
    return hex_code

def a2c_converter(aa_map: dict, grad_func: 'function') -> dict:
    min_value = min(aa_map.values())
    max_value = max(aa_map.values())
    aa_dict = {aa: grad_func(value, min_value, max_value)
               for aa, value in aa_map.items()}
    return aa_dict

#############################################
# Built-in Styling Function

def builtin_style(style: str, opacity: float = 1.0) -> dict:
    match style:
        case _ if any(kw in style for kw in ('Carbon', 'chain', 'ssJmol', 'ssPyMol')):
            style_dict = {'colorscheme': style}
        case 'hydrophobicity':
            style_dict = {'colorscheme': {
                'prop': 'resn', 'map': a2c_converter(AA_HB, sequential_gradient)}}
        case 'isoelectric points':
            style_dict = {'colorscheme': {
                'prop': 'resn', 'map': a2c_converter(AA_PI, diverging_gradient)}}
        case 'b factor':
            style_dict = {'colorscheme': {
                'prop': 'b', 'gradient': 'rwb', 'min': 90, 'max': 50}}
        case _:
            style_dict = {'color': style}

    style_dict.update({'opacity': opacity, 'singleBonds': False})
    return style_dict

#############################################
# Built-in Colour Scale Function

def colour_scale(aa_map: dict, grad_func: 'function') -> None:
    min_value = min(aa_map.values())
    max_value = max(aa_map.values())

    linear_values = np.linspace(min_value, max_value, 100)
    colours = [grad_func(value, min_value, max_value)
               for value in linear_values]

    fig, ax = plt.subplots(figsize=(4.85, 0.25))
    norm_value = plt.Normalize(min_value, max_value)
    colour_map = plt.cm.colors.ListedColormap(colours)
    scalar_map = plt.cm.ScalarMappable(norm_value, colour_map)
    scalar_map.set_array([])

    cscale = plt.colorbar(scalar_map, ax, orientation='horizontal')
    cscale.set_ticks([min_value, max_value])

def show_cscale(rept_info: dict, surf_info: dict) -> None:

    def cs_selector() -> str:
        if any(surf_info):
            style = [*surf_info.values()][0]
        elif any(rept_info):
            style = [*rept_info.values()][0]
        else:
            style = None
        return style

    def cs_display(style: str):
        if style == 'hydrophobicity':
            label_title(style, 'Less', 'More')
            colour_scale(AA_HB, sequential_gradient)
        elif style == 'isoelectric points':
            label_title(style, 'Acid', 'Base')
            colour_scale(AA_PI, diverging_gradient)
        else:
            pass

    def label_title(text: str, min: str, max: str) -> None:
        print(f'-' * 55)
        print(f'{min}{text.upper():^47}{max}')
        print(f'-' * 55)

    cs_display(cs_selector())

#############################################
# Other Functions

def extract_config(inpt_file: str) -> tuple:
    with open(inpt_file, 'r') as inpt:
        data = [line.split() for line in inpt.readlines()]
    center = (float(data[0][2]), float(data[1][2]), float(data[2][2]))
    bxsize = (float(data[4][2]), float(data[5][2]), float(data[6][2]))
    return center, bxsize

def interaction_dict(inpt_file: str, interactions: str = '', usage: str = 'view' or 'lbsp') -> dict:

    usg_map = {'lbsp': 0, 'view': 1}

    def filter_df(int_df: pd.DataFrame, interactions: list = []) -> pd.DataFrame:
        int_df = int_df[int_df['BOND'].isin(interactions)] if interactions else int_df
        return int_df

    def s2f_dict(item: dict) -> dict:
        return {key: tuple(float(val) for val in value[1:-1].split(','))
                for key, value in item.items()}

    def b2c_dict(item: dict) -> dict:
        return {key: BOND_COL[val][usg_map[usage]] for key, val in item.items()}

    intrxn = interactions.replace(',', ' ').split()
    inter_df = pd.read_csv(inpt_file)
    int_dict = filter_df(inter_df, intrxn).to_dict()
    int_dict['LIGCOO'] = s2f_dict(int_dict['LIGCOO'])
    int_dict['PROTCOO'] = s2f_dict(int_dict['PROTCOO'])
    int_dict['COLOR'] = b2c_dict(int_dict['BOND'])

    return int_dict

def find_midpoint(coords: list) -> tuple[float, float, float]:
    return tuple(round(coord, 3) for coord in np.mean(coords, axis=0))

#############################################
# LaboSpace Viewer

class LaboSpace:

    residue_style = {
        'stick':
         {'colorscheme': 'orangeCarbon', 'radius': 0.15}}
    residue_label = {
        'alignment': 'bottomLeft',
        'showBackground': False,
        'inFront': True,
        'fontSize': 14,
        'fontColor': '0x000000',
        'screenOffset': {'x': 25, 'y': 25}}
    atom_label = {
        'alignment': 'bottomLeft',
        'showBackground': False,
        'inFront': True,
        'fontSize': 14,
        'fontColor': '0x000000',
        'screenOffset': {'x': 10, 'y': 10}}

    def __init__(self, vw: int = 500, vh: int = 500) -> None:
        self.mview = py3Dmol.view(width=vw, height=vh)
        self.count = -1
        self.residues = []

    def read_moldata(self, inpt_file: str) -> str:
        inpt = open(inpt_file, 'r')
        data = inpt.read()
        inpt.close()
        return data

    def load_receptor(self, inpt_file: str) -> object:
        data = self.read_moldata(inpt_file)
        self.mview.addModel(data, 'pdb')
        self.count += 1
        return self

    def load_ligand(self, inpt_file: str) -> object:
        data = self.read_moldata(inpt_file)
        self.mview.addModel(data)
        self.count += 1
        return self

    def set_style(self,
                  show_represent: bool = True,
                  represent_type: str = 'cartoon',
                  represent_style: dict = {}
                  ) -> object:
        if show_represent:
            self.mview.setStyle(
                {'model': self.count},
                {represent_type: represent_style})
        else:
            self.mview.setStyle(
                {'model': self.count},
                {})
        return self

    def add_style(self,
                  show_represent: bool = True,
                  represent_style: dict = {}
                  ) -> object:
        if show_represent:
            self.mview.addStyle(
                {'model': self.count},
                represent_style)
        return self

    def add_residues(self,
                     show_residues: bool = True,
                     residue_number: str = ''
                     ) -> object:
        if show_residues and residue_number:
            res = residue_number.replace(',', ' ').split()
            self.residues.extend(list(set(res)))
            self.mview.addStyle(
                {'and': [{'model': self.count}, {'resi': self.residues}]},
                self.residue_style)
            self.mview.addResLabels(
                {'and': [{'model': self.count}, {'resi': self.residues}]},
                self.residue_label)
        return self

    def add_surface(self,
                    show_surface: bool = True,
                    surface_type: str = 'SES',
                    surface_style: dict = {}
                    ) -> object:
        if show_surface:
            self.mview.addSurface(
                surface_type,
                surface_style,
                {'model': self.count})
        return self

    def add_gridbox(self,
                    show_gridbox: bool,
                    center: list[float],
                    bxsize: list[float]
                    ) -> object:
        if show_gridbox:
            bxi, byi, bzi = center
            bxf, byf, bzf = bxsize
            self.mview.addBox({
                'center': {'x': bxi, 'y': byi, 'z': bzi},
                'dimensions': {'w': bxf, 'h': byf, 'd': bzf},
                'color': 'skyBlue',
                'opacity': 0.6})
            self.mview.addLabel(
                f'center: {bxi:>8}, {byi:>8}, {bzi:>8}',
                {'showBackground': False,
                 'fontSize': 14,
                 'fontColor': '0x000000',
                 'useScreen': True,
                 'screenOffset': {'x': 15, 'y': 0}})
            self.mview.addLabel(
                f'bxsize: {bxf:>8}, {byf:>8}, {bzf:>8}',
                {'showBackground': False,
                 'fontSize': 14,
                 'fontColor': '0x000000',
                 'useScreen': True,
                 'screenOffset': {'x': 15, 'y': -20}})
        return self

    def add_interaction(self,
                        interaction_file: str,
                        show_interaction: bool = True,
                        select_interaction: list = []
                        ) -> object:
        if show_interaction:
            int_dict = interaction_dict(interaction_file, select_interaction, 'lbsp')
            dist = int_dict['DIST'].values()
            bond = int_dict['BOND'].values()
            resn = int_dict['RESNR'].values()
            ligcoo = int_dict['LIGCOO'].values()
            prtcoo = int_dict['PROTCOO'].values()
            color = int_dict['COLOR'].values()

            int_res = list(set(resn) - set(self.residues))
            self.residues.extend(int_res)
            self.mview.addStyle(
                {'and': [{'model': 0}, {'resi': int_res}]},
                self.residue_style)
            self.mview.addResLabels(
                {'and': [{'model': 0}, {'resi': int_res}]},
                self.residue_label)

            for dis, col, lig, prt in zip(dist, color, ligcoo, prtcoo):
                mid = find_midpoint([lig, prt])
                self.mview.addCylinder(
                    {'start': {'x': lig[0], 'y': lig[1], 'z': lig[2]},
                     'end': {'x': prt[0], 'y': prt[1], 'z': prt[2]},
                     'radius': 0.05,
                     'fromCap': 1,
                     'toCap': 1,
                     'color': col,
                     'dashed': True})
                self.mview.addLabel(
                    str(dis) + ' Å',
                    {'position': {'x': mid[0], 'y': mid[1], 'z': mid[2]},
                     'alignment': 'bottomLeft',
                     'inFront': False,
                     'backgroundColor': col,
                     'fontSize': 10,
                     'screenOffset': {'x': 10, 'y': 10}})
        return self

    def label_atoms(self, show_label: bool = False) -> object:
        # WARNING: Avoid applying on protein !!!
        if show_label:
            self.mview.addPropertyLabels(
                'atom',
                {'model': self.count},
                self.atom_label)
        return self

    def view_space(self,
                   zoom_model: int = -1,
                   slab_view: bool = False,
                   slab_model: int = -1,
                   background_colour: str = '0xFFFFFF'
                   ) -> None:
        self.mview.setBackgroundColor(background_colour)
        self.mview.setProjection('orthographic')
        self.mview.zoomTo({'model': zoom_model})
        self.mview.fitSlab({'model': slab_model}) if slab_view else None
        self.mview.show()

import py3Dmol

class LaboSpaceAnother:
    residue_style = {
        'stick': {'colorscheme': 'orangeCarbon', 'radius': 0.15}
    }
    residue_label = {
        'alignment': 'bottomLeft',
        'showBackground': False,
        'inFront': True,
        'fontSize': 14,
        'fontColor': '0x000000',
        'screenOffset': {'x': 25, 'y': 25}
    }
    atom_label = {
        'alignment': 'bottomLeft',
        'showBackground': False,
        'inFront': True,
        'fontSize': 14,
        'fontColor': '0x000000',
        'screenOffset': {'x': 10, 'y': 10}
    }

    def __init__(self, vw: int = 500, vh: int = 500) -> None:
        self.mview = py3Dmol.view(width=vw, height=vh)
        self.count = -1
        self.residues = []

    def read_moldata(self, inpt_file: str) -> str:
        with open(inpt_file, 'r') as f:
            data = f.read()
        return data

    def load_receptor(self, inpt_file: str) -> object:
        data = self.read_moldata(inpt_file)
        self.mview.addModel(data, 'pdb')
        self.count += 1
        return self

    def load_ligand(self, inpt_file: str) -> object:
        data = self.read_moldata(inpt_file)
        self.mview.addModel(data)
        self.count += 1
        return self

    def load_pocket(self, pocket_file: str) -> object:
        data = self.read_moldata(pocket_file)
        self.mview.addModel(data)
        self.count += 1
        return self

    def set_style(self,
                  show_represent: bool = True,
                  represent_type: str = 'cartoon',
                  represent_style: dict = {}
                  ) -> object:
        if show_represent:
            self.mview.setStyle(
                {'model': self.count},
                {represent_type: represent_style}
            )
        else:
            self.mview.setStyle(
                {'model': self.count},
                {}
            )
        return self

    def add_style(self,
                  show_represent: bool = True,
                  represent_style: dict = {}
                  ) -> object:
        if show_represent:
            self.mview.addStyle(
                {'model': self.count},
                represent_style
            )
        return self

    def add_residues(self,
                     show_residues: bool = True,
                     residue_number: str = ''
                     ) -> object:
        if show_residues and residue_number:
            res = residue_number.replace(',', ' ').split()
            self.residues.extend(list(set(res)))
            self.mview.addStyle(
                {'and': [{'model': self.count}, {'resi': self.residues}]},
                self.residue_style
            )
            self.mview.addResLabels(
                {'and': [{'model': self.count}, {'resi': self.residues}]},
                self.residue_label
            )
        return self

    def add_surface(self,
                    show_surface: bool = True,
                    surface_type: str = 'SES',
                    surface_style: dict = {}
                    ) -> object:
        if show_surface:
            self.mview.addSurface(
                surface_type,
                surface_style,
                {'model': self.count}
            )
        return self

    def add_gridbox(self,
                    show_gridbox: bool,
                    center: list[float],
                    bxsize: list[float]
                    ) -> object:
        if show_gridbox:
            bxi, byi, bzi = center
            bxf, byf, bzf = bxsize
            self.mview.addBox({
                'center': {'x': bxi, 'y': byi, 'z': bzi},
                'dimensions': {'w': bxf, 'h': byf, 'd': bzf},
                'color': 'skyBlue',
                'opacity': 0.6
            })
            self.mview.addLabel(
                f'center: {bxi:>8}, {byi:>8}, {bzi:>8}',
                {'showBackground': False,
                 'fontSize': 14,
                 'fontColor': '0x000000',
                 'useScreen': True,
                 'screenOffset': {'x': 15, 'y': 0}}
            )
            self.mview.addLabel(
                f'bxsize: {bxf:>8}, {byf:>8}, {bzf:>8}',
                {'showBackground': False,
                 'fontSize': 14,
                 'fontColor': '0x000000',
                 'useScreen': True,
                 'screenOffset': {'x': 15, 'y': -20}}
            )
        return self

    def add_interaction(self,
                        interaction_file: str,
                        show_interaction: bool = True,
                        select_interaction: list = []
                        ) -> object:
        if show_interaction:
            # Implementation for interaction visualization goes here
            pass
        return self

    def label_atoms(self, show_label: bool = False) -> object:
        if show_label:
            self.mview.addPropertyLabels(
                'atom',
                {'model': self.count},
                self.atom_label
            )
        return self

    def view_space(self,
                   zoom_model: int = -1,
                   slab_view: bool = False,
                   slab_model: int = -1,
                   background_colour: str = '0xFFFFFF') -> None:
        self.mview.setBackgroundColor(background_colour)
        self.mview.setProjection('orthographic')
        self.mview.zoomTo({'model': zoom_model})
        self.mview.fitSlab({'model': slab_model}) if slab_view else None
        self.mview.show()

print(f'+ Methods and functions successfully built')

+ Methods and functions successfully built


  and should_run_async(code)


In [None]:
import zipfile
import os

zip_file_path = '/content/gdrive/MyDrive/Docking_benchmarks/posebusters_paper_data.zip'

extract_to_folder = '/content/gdrive/MyDrive/Docking_benchmarks/PB_new_run'
os.makedirs(extract_to_folder, exist_ok=True)
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to_folder)

print(f'Contents of {zip_file_path} have been extracted to {extract_to_folder}')

Contents of /content/gdrive/MyDrive/Docking_benchmarks/posebusters_paper_data.zip have been extracted to /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run


In [None]:
#@title **Install Reduce to assign explicit hydrogens**
!git clone https://github.com/rlabduke/reduce.git
!cd reduce && make

Cloning into 'reduce'...
remote: Enumerating objects: 2961, done.[K
remote: Counting objects: 100% (423/423), done.[K
remote: Compressing objects: 100% (155/155), done.[K
remote: Total 2961 (delta 279), reused 390 (delta 267), pack-reused 2538[K
Receiving objects: 100% (2961/2961), 39.72 MiB | 10.21 MiB/s, done.
Resolving deltas: 100% (2124/2124), done.
Start Compling:
for i in toolclasses libpdb reduce_src ;\
do \
	(cd $i ; echo "making" all "in /$i..."; \
		make  'CXXDEBUGFLAGS=' all); \
done
making all in /toolclasses...
make[1]: Entering directory '/content/reduce/toolclasses'
g++ -O  -DCHARFUNCMACROS -DINCTEMPLATEDEFNS -DBOOLPREDEFINED -DBRACKETOPERPARMS -DLISTFRIENDFIX -c UseCount.cpp -o UseCount.o
g++ -O  -DCHARFUNCMACROS -DINCTEMPLATEDEFNS -DBOOLPREDEFINED -DBRACKETOPERPARMS -DLISTFRIENDFIX -c Point3d.cpp -o Point3d.o
g++ -O  -DCHARFUNCMACROS -DINCTEMPLATEDEFNS -DBOOLPREDEFINED -DBRACKETOPERPARMS -DLISTFRIENDFIX -c utility.cpp -o utility.o
[01m[Kutility.cpp:[m[K In func

In [None]:
!apt-get update
!apt-get install -y build-essential cmake

!git clone https://github.com/rlabduke/reduce.git

!mkdir -p /root/build/reduce
!cmake -S /content/reduce -B /root/build/reduce

!cmake --build /root/build/reduce --target install

# Copy the HET dictionary to /usr/local
!sudo cp /content/reduce/reduce_wwPDB_het_dict.txt /usr/local/reduce_wwPDB_het_dict.txt

!which reduce
!ls /usr/local/bin/reduce
!ls /usr/local/reduce_wwPDB_het_dict.txt


0% [Working]            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
0% [Waiting for headers] [Connecting to security.ubuntu.com (185.125.190.83)] [1 InRelease 3,626 B/30% [Waiting for headers] [Connecting to security.ubuntu.com (185.125.190.83)] [Connecting to r2u.sta                                                                                                    Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease
0% [Waiting for headers] [Connecting to security.ubuntu.com (185.125.190.83)] [Connected to r2u.stat                                                                                                    Get:3 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
0% [3 InRelease 12.7 kB/128 kB 10%] [Connecting to security.ubuntu.com (185.125.190.83)] [Connected                                                                                                     Hit:4 https://developer.download.nvidia.com/compute

In [None]:
#@title **Use Reduce to add explicit hydrogens to the protein**

import os
import subprocess

base_dir = '/content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set'

def add_hydrogens(input_file, output_file):
    reduce_executable = '/usr/local/bin/reduce'
    het_dict_path = '/usr/local/reduce_wwPDB_het_dict.txt'

    if not os.path.isfile(reduce_executable):
        raise FileNotFoundError(f"Reduce executable not found at {reduce_executable}")

    if not os.path.isfile(het_dict_path):
        raise FileNotFoundError(f"HET dictionary not found at {het_dict_path}")

    reduce_cmd = f"{reduce_executable} -BUILD -DB {het_dict_path} {input_file} > {output_file}"
    try:
        result = subprocess.run(reduce_cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        print(f"Hydrogens added to {input_file} successfully.")
    except subprocess.CalledProcessError as e:
        print(f"Error processing {input_file}: {e}")
        print(f"Standard Output: {e.stdout.decode()}")
        print(f"Standard Error: {e.stderr.decode()}")

for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith('_protein.pdb'):
            input_path = os.path.join(root, file)
            output_path = os.path.join(root, file.replace('_protein.pdb', '_protein_reduce.pdb'))
            print(f"Processing {input_path}...")
            add_hydrogens(input_path, output_path)
            print(f"Saved to {output_path}")

print("Processing completed.")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 Processing set: C 121 GLN     [2]: C 116 THR OG1 [11]: C 105 TYR OH  [13]
  : C 102 SER OG  [7]: B 333 TYR OH  [19]
 permutations: 38038
 Computing dot scores
 Num optimizations problems to be solved for this clique: 3
 Beginning Optimization
 Dynamic programming succeeded to fully optimize hypergraph
 Beginning Optimization
 Dynamic programming succeeded to fully optimize hypergraph
 Optimal score following low resolution optimization: 16.992
 Beginning Optimization
 Dynamic programming succeeded to fully optimize hypergraph
> : 17.821       
 Optimal score following high resolution, local optimization: 17.821

 Processing set: B 330 THR OG1 [6]: B 318 GLN     [2]: B 317 ASN     [2]
 permutations: 24
 Computing dot scores
 Num optimizations problems to be solved for this clique: 4
 Beginning Optimization
 Dynamic programming succeeded to fully optimize hypergraph
 Beginning Optimization
 Dynamic programming succeeded to

In [None]:
#@title **Remove bad atoms**

from pathlib import Path
import os

def _remove_bad_atoms(file_in: Path, file_out: Path):
    cmd.reinitialize()
    cmd.load(filename=str(file_in), object="complex")
    for element in ["Mo", "B", "Li", "Xe", "As", "Cs", "V", "X"]:
        cmd.remove(f"bymolecule elem {element}")
    cmd.save(filename=str(file_out), selection="complex")

base_dir = Path('/content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set')

for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith('_protein_reduce.pdb'):
            input_path = Path(root) / file
            output_path = input_path.with_name(input_path.stem.replace('_protein_reduce', '_protein_reduce_rm_bad') + '.pdb')
            print(f"Processing {input_path} to remove bad atoms...")
            _remove_bad_atoms(input_path, output_path)
            print(f"Saved to {output_path}")

print("All files processed and saved.")

Processing /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/7ZDY_6MJ/7ZDY_6MJ_protein_reduce.pdb to remove bad atoms...
Saved to /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/7ZDY_6MJ/7ZDY_6MJ_protein_reduce_rm_bad.pdb
Processing /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/7WCF_ACP/7WCF_ACP_protein_reduce.pdb to remove bad atoms...
Saved to /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/7WCF_ACP/7WCF_ACP_protein_reduce_rm_bad.pdb
Processing /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/8F8E_XJI/8F8E_XJI_protein_reduce.pdb to remove bad atoms...
Saved to /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/8F8E_XJI/8F8E_XJI_protein_reduce_rm_bad.pdb
Processing /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/7U3J_L6U/7U3J_L6U_protein_reduce.pdb to remove bad atoms.

In [None]:
# @title **Preparing proteins pdbqt files, after Reduce and removing bad atoms**
import os
import subprocess

base_dir = '/content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set'

size_limit = 10 * 1024 * 1024

processed_proteins = 0
for subdir, _, files in os.walk(base_dir):
    for file in files:
        if file.endswith('_protein_reduce_rm_bad.pdb'):
            processed_proteins += 1
            prot_pdb_file_path = os.path.join(subdir, file)

            if os.path.getsize(prot_pdb_file_path) < size_limit:
                prot_pdbqt_file_path = prot_pdb_file_path.replace('_protein_reduce_rm_bad.pdb', '_protein_reduce_rm_bad_input.pdbqt')

                obabel_cmd = f"obabel {prot_pdb_file_path} -xr -O {prot_pdbqt_file_path} -p 7.4 > /dev/null 2>&1"

                subprocess.run(obabel_cmd, shell=True, check=True)

                print(f'Processed {prot_pdb_file_path} and saved as {prot_pdbqt_file_path}')

print("Processed proteins:", processed_proteins)

Processed /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/7ZDY_6MJ/7ZDY_6MJ_protein_reduce_rm_bad.pdb and saved as /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/7ZDY_6MJ/7ZDY_6MJ_protein_reduce_rm_bad_input.pdbqt
Processed /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/7WCF_ACP/7WCF_ACP_protein_reduce_rm_bad.pdb and saved as /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/7WCF_ACP/7WCF_ACP_protein_reduce_rm_bad_input.pdbqt
Processed /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/8F8E_XJI/8F8E_XJI_protein_reduce_rm_bad.pdb and saved as /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/8F8E_XJI/8F8E_XJI_protein_reduce_rm_bad_input.pdbqt
Processed /content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set/7U3J_L6U/7U3J_L6U_protein_reduce_rm_bad.pdb and saved as /content/gdrive/

In [None]:
#@title **Now process ligands, add H, and convert to pdbqt by Meeko**

import os
from rdkit.Chem import MolFromMolFile, AddHs
from meeko import MoleculePreparation

# Base directory containing the subfolders
base_dir = '/content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set'

processed_ligands = 0

for subdir, _, files in os.walk(base_dir):
    for file in files:
        if file.endswith('_ligand_start_conf.sdf'):
            processed_ligands += 1

            lig_sdf_file_path = os.path.join(subdir, file)
            lig = MolFromMolFile(str(lig_sdf_file_path), sanitize=True)
            assert lig is not None, f"Failed to load ligand from {lig_sdf_file_path}"

            lig = AddHs(lig, addCoords=True)
            assert lig.GetNumConformers() >= 1, "Ligand must have at least one conformer"

            meeko_prep = MoleculePreparation()
            meeko_prep.prepare(lig)

            lig_pdbqt_file_path = lig_sdf_file_path.replace('_ligand_start_conf.sdf', '_ligand_start_conf_prepared.pdbqt')

            with open(lig_pdbqt_file_path, 'w') as pdbqt_file:
                pdbqt_file.write(meeko_prep.write_pdbqt_string())

print("Processed ligands:", processed_ligands)

Processed ligands: 428


In [None]:
import os

base_dir = '/content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set'

def count_subfolders_with_protein_reduce(base_dir):
    subfolders_with_protein_reduce = 0

    for root, dirs, files in os.walk(base_dir):
        if any(file.endswith('_protein_reduce.pdb') for file in files):
            subfolders_with_protein_reduce += 1

    return subfolders_with_protein_reduce

num_subfolders = count_subfolders_with_protein_reduce(base_dir)

print(f"Number of subfolders containing at least one '_protein_reduce_rm_bad_input.pdbqt' file: {num_subfolders}")

Number of subfolders containing at least one '_protein_reduce_rm_bad_input.pdbqt' file: 428


In [None]:
import os

base_dir = '/content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set'

def count_subfolders_with_protein_reduce(base_dir):
    subfolders_with_protein_reduce = 0

    for root, dirs, files in os.walk(base_dir):
        if any(file.endswith('_protein_reduce_rm_bad.pdb') for file in files):
            subfolders_with_protein_reduce += 1

    return subfolders_with_protein_reduce

num_subfolders = count_subfolders_with_protein_reduce(base_dir)

print(f"Number of subfolders containing at least one '_protein_reduce_rm_bad.pdb' file: {num_subfolders}")

Number of subfolders containing at least one '_protein_reduce_rm_bad.pdb' file: 428


In [None]:
import os

base_dir = '/content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set'

def count_subfolders_with_protein_reduce(base_dir):
    subfolders_with_protein_reduce = 0

    for root, dirs, files in os.walk(base_dir):
        if any(file.endswith('_protein_reduce_rm_bad_input.pdbqt') for file in files):
            subfolders_with_protein_reduce += 1

    return subfolders_with_protein_reduce

num_subfolders = count_subfolders_with_protein_reduce(base_dir)

print(f"Number of subfolders containing at least one '_protein_reduce_rm_bad_input.pdbqt' file: {num_subfolders}")

Number of subfolders containing at least one '_protein_reduce_rm_bad_input.pdbqt' file: 428


In [None]:
import os

base_dir = '/content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set'

def count_subfolders_with_protein_reduce(base_dir):
    subfolders_with_protein_reduce = 0

    for root, dirs, files in os.walk(base_dir):
        if any(file.endswith('_ligand_start_conf_prepared.pdbqt') for file in files):
            subfolders_with_protein_reduce += 1

    return subfolders_with_protein_reduce

num_subfolders = count_subfolders_with_protein_reduce(base_dir)

print(f"Number of subfolders containing at least one '_ligand_start_conf_prepared.pdbqt' file: {num_subfolders}")

Number of subfolders containing at least one '_ligand_start_conf_prepared.pdbqt' file: 428


In [None]:
import shutil
import os

source_folder = '/content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set_SET1'
destination_base = '/content/gdrive/MyDrive/Docking_benchmarks/PB_new_run/posebusters_benchmark_set_SET'

# Make 10 copies
for i in range(2, 12):  # From SET2 to SET11 (10 copies)
    destination_folder = f"{destination_base}{i}"
    shutil.copytree(source_folder, destination_folder)

print("10 copies created successfully.")


10 copies created successfully.


In [None]:
#@title **Set up the grid boxes, when the reference ligand is available.**

import os
from rdkit import Chem
from rdkit.Chem import rdMolTransforms

base_dir = '/content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set'

def compute_heavy_atom_centroid(mol):
    """
    Compute the geometric center of heavy atoms in the molecule.
    """
    conf = mol.GetConformer()
    num_atoms = mol.GetNumAtoms()

    heavy_atoms = []
    for atom in mol.GetAtoms():
        if atom.GetAtomicNum() > 1:
            heavy_atoms.append(atom.GetIdx())

    if not heavy_atoms:
        raise ValueError("No heavy atoms found in the molecule.")

    centroid = [0.0, 0.0, 0.0]
    for idx in heavy_atoms:
        pos = conf.GetAtomPosition(idx)
        centroid[0] += pos.x
        centroid[1] += pos.y
        centroid[2] += pos.z

    centroid = [coord / len(heavy_atoms) for coord in centroid]
    return centroid

for subdir, _, files in os.walk(base_dir):
    for file in files:
        if '_ligand.sdf' in file:
            sdf_file_path = os.path.join(subdir, file)

            suppl = Chem.SDMolSupplier(sdf_file_path)
            mol = suppl[0]

            if mol is None:
                raise ValueError(f"Could not read molecule from {sdf_file_path}")

            centroid = compute_heavy_atom_centroid(mol)
            subfolder_name = os.path.basename(subdir)
            config_file_path = os.path.join(subdir, f'{subfolder_name}_config_default')

            bxsize = [25, 25, 25]

            with open(config_file_path, 'w') as cfg:
                cfg.write(f'center_x = {centroid[0]}\n')
                cfg.write(f'center_y = {centroid[1]}\n')
                cfg.write(f'center_z = {centroid[2]}\n')
                cfg.write(f'\n')
                cfg.write(f'size_x = {bxsize[0]}\n')
                cfg.write(f'size_y = {bxsize[1]}\n')
                cfg.write(f'size_z = {bxsize[2]}\n')

            print(f'Processed {sdf_file_path} and created config file {config_file_path}')

Processed /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7ZDY_6MJ/7ZDY_6MJ_ligand.sdf and created config file /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7ZDY_6MJ/7ZDY_6MJ_config_default
Processed /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7WCF_ACP/7WCF_ACP_ligand.sdf and created config file /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7WCF_ACP/7WCF_ACP_config_default
Processed /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/8F8E_XJI/8F8E_XJI_ligand.sdf and created config file /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/8F8E_XJI/8F8E_XJI_config_default
Processed /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7U3J_L6U/7U3J_L6U_ligand.sdf and created config file /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7U3J_L6U/7U3J_L6U_config_default
Processed /content/gdrive/MyDrive/Docking_benchmarks/posebusters

In [None]:
import os
from rdkit import Chem
from rdkit.Chem import AllChem

def read_box_params(config_path):
    box_params = {}
    with open(config_path, 'r') as file:
        for line in file:
            if line.startswith('center_x'):
                box_params['center_x'] = float(line.split('=')[1].strip())
            elif line.startswith('center_y'):
                box_params['center_y'] = float(line.split('=')[1].strip())
            elif line.startswith('center_z'):
                box_params['center_z'] = float(line.split('=')[1].strip())
            elif line.startswith('size_x'):
                box_params['size_x'] = float(line.split('=')[1].strip())
            elif line.startswith('size_y'):
                box_params['size_y'] = float(line.split('=')[1].strip())
            elif line.startswith('size_z'):
                box_params['size_z'] = float(line.split('=')[1].strip())
    return box_params

def read_ligand_coordinates(ligand_path):
    suppl = Chem.SDMolSupplier(ligand_path)
    mol = next(suppl)
    coordinates = []
    if mol is not None:
        for atom in mol.GetAtoms():
            pos = mol.GetConformer().GetAtomPosition(atom.GetIdx())
            coordinates.append((pos.x, pos.y, pos.z))
    return coordinates

def is_ligand_within_box(ligand_coords, box_params):
    center_x, center_y, center_z = box_params['center_x'], box_params['center_y'], box_params['center_z']
    size_x, size_y, size_z = box_params['size_x'], box_params['size_y'], box_params['size_z']

    min_x, max_x = center_x - size_x / 2, center_x + size_x / 2
    min_y, max_y = center_y - size_y / 2, center_y + size_y / 2
    min_z, max_z = center_z - size_z / 2, center_z + size_z / 2

    for x, y, z in ligand_coords:
        if not (min_x <= x <= max_x and min_y <= y <= max_y and min_z <= z <= max_z):
            return False
    return True

def process_all_folders(main_folder):
    ligand_within_box_count = 0
    ligand_not_within_box = []

    for subfolder in os.listdir(main_folder):
        subfolder_path = os.path.join(main_folder, subfolder)
        if os.path.isdir(subfolder_path):
            config_file = None
            ligand_file = None

            for file in os.listdir(subfolder_path):
                if file.endswith('_config_default'):
                    config_file = os.path.join(subfolder_path, file)
                elif file.endswith('_ligand.sdf'):
                    ligand_file = os.path.join(subfolder_path, file)

            if config_file and ligand_file:
                box_params = read_box_params(config_file)
                ligand_coords = read_ligand_coordinates(ligand_file)
                if is_ligand_within_box(ligand_coords, box_params):
                    ligand_within_box_count += 1
                else:
                    ligand_not_within_box.append(subfolder_path)

    return ligand_within_box_count, ligand_not_within_box

main_folder = '/content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set'
count, not_within_box_subfolders = process_all_folders(main_folder)
print(f"Number of times the ligand is completely within the search box: {count}")
print("Subfolders where the ligand is not within the box:")
for subfolder in not_within_box_subfolders:
    print(subfolder)


Number of times the ligand is completely within the search box: 426
Subfolders where the ligand is not within the box:
/content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7MSR_DCA
/content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7PT3_3KK


# Need to fix 2 files. Rewrite them this:

In [None]:
file_path = '/content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7MSR_DCA/7MSR_DCA_config_default'

with open(file_path, 'r') as file:
    content = file.read()

print(content)

center_x = 15.718829787234041
center_y = 14.668978723404255
center_z = 19.0881914893617

size_x = 25
size_y = 25
size_z = 25



In [None]:
file_path = '/content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7PT3_3KK/7PT3_3KK_config_default'

with open(file_path, 'r') as file:
    content = file.read()

print(content)

center_x = -24.699648148148142
center_y = -5.193777777777779
center_z = 30.072407407407397

size_x = 25
size_y = 25
size_z = 25



In [None]:
import os
from rdkit import Chem
from rdkit.Chem import AllChem

def read_box_params(config_path):
    box_params = {}
    with open(config_path, 'r') as file:
        for line in file:
            if line.startswith('center_x'):
                box_params['center_x'] = float(line.split('=')[1].strip())
            elif line.startswith('center_y'):
                box_params['center_y'] = float(line.split('=')[1].strip())
            elif line.startswith('center_z'):
                box_params['center_z'] = float(line.split('=')[1].strip())
            elif line.startswith('size_x'):
                box_params['size_x'] = float(line.split('=')[1].strip())
            elif line.startswith('size_y'):
                box_params['size_y'] = float(line.split('=')[1].strip())
            elif line.startswith('size_z'):
                box_params['size_z'] = float(line.split('=')[1].strip())
    return box_params

def read_ligand_coordinates(ligand_path):
    suppl = Chem.SDMolSupplier(ligand_path)
    mol = next(suppl)
    coordinates = []
    if mol is not None:
        for atom in mol.GetAtoms():
            pos = mol.GetConformer().GetAtomPosition(atom.GetIdx())
            coordinates.append((pos.x, pos.y, pos.z))
    return coordinates

def is_ligand_within_box(ligand_coords, box_params):
    center_x, center_y, center_z = box_params['center_x'], box_params['center_y'], box_params['center_z']
    size_x, size_y, size_z = box_params['size_x'], box_params['size_y'], box_params['size_z']

    min_x, max_x = center_x - size_x / 2, center_x + size_x / 2
    min_y, max_y = center_y - size_y / 2, center_y + size_y / 2
    min_z, max_z = center_z - size_z / 2, center_z + size_z / 2

    for x, y, z in ligand_coords:
        if not (min_x <= x <= max_x and min_y <= y <= max_y and min_z <= z <= max_z):
            return False
    return True

def process_all_folders(main_folder):
    ligand_within_box_count = 0
    ligand_not_within_box = []

    for subfolder in os.listdir(main_folder):
        subfolder_path = os.path.join(main_folder, subfolder)
        if os.path.isdir(subfolder_path):
            config_file = None
            ligand_file = None

            for file in os.listdir(subfolder_path):
                if file.endswith('_config_default'):
                    config_file = os.path.join(subfolder_path, file)
                elif file.endswith('_ligand.sdf'):
                    ligand_file = os.path.join(subfolder_path, file)

            if config_file and ligand_file:
                box_params = read_box_params(config_file)
                ligand_coords = read_ligand_coordinates(ligand_file)
                if is_ligand_within_box(ligand_coords, box_params):
                    ligand_within_box_count += 1
                else:
                    ligand_not_within_box.append(subfolder_path)

    return ligand_within_box_count, ligand_not_within_box

main_folder = '/content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set'
count, not_within_box_subfolders = process_all_folders(main_folder)
print(f"Number of times the ligand is completely within the search box: {count}")
print("Subfolders where the ligand is not within the box:")
for subfolder in not_within_box_subfolders:
    print(subfolder)


Number of times the ligand is completely within the search box: 428
Subfolders where the ligand is not within the box:


In [None]:
#@title **Create docking folders**
import os

base_dir = '/content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set'

subfolders = [f.path for f in os.scandir(base_dir) if f.is_dir()]

for subfolder in subfolders:
    subfolder_name = os.path.basename(subfolder)
    new_subsubfolder_name = subfolder_name + '_default_docking'
    new_subsubfolder_path = os.path.join(subfolder, new_subsubfolder_name)

    os.makedirs(new_subsubfolder_path, exist_ok=True)
    print(f'Created: {new_subsubfolder_path}')

Created: /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7ZDY_6MJ/7ZDY_6MJ_default_docking
Created: /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7WCF_ACP/7WCF_ACP_default_docking
Created: /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/8F8E_XJI/8F8E_XJI_default_docking
Created: /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7U3J_L6U/7U3J_L6U_default_docking
Created: /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/8AUH_L9I/8AUH_L9I_default_docking
Created: /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7XRL_FWK/7XRL_FWK_default_docking
Created: /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/7MS7_ZQ1/7MS7_ZQ1_default_docking
Created: /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/6Y7L_QMG/6Y7L_QMG_default_docking
Created: /content/gdrive/MyDrive/Docking_benchmarks/posebusters_benchmark_set/8CGC_LMR/8CGC_LMR_default_