In [13]:
!conda install -c conda-forge numpy swig boost-cpp libboost sphinx sphinx_rtd_theme -y
!pip install vina
!conda install -c conda-forge mdanalysis -y
!pip install rdkit
!pip install prolif
!conda install nglview -c conda-forge -y
!pip install pdb2pqr
# !pip install meeko
!pip install biopython
!pip install gemmi

Channels:
 - conda-forge
Platform: linux-64
Collecting package metadata (repodata.json): done
Solving environment: done


    current version: 25.3.0
    latest version: 25.5.1

Please update conda by running

    $ conda update -n base -c conda-forge conda



# All requested packages already installed.

Channels:
 - conda-forge
Platform: linux-64
Collecting package metadata (repodata.json): done
Solving environment: done


    current version: 25.3.0
    latest version: 25.5.1

Please update conda by running

    $ conda update -n base -c conda-forge conda



# All requested packages already installed.

Channels:
 - conda-forge
Platform: linux-64
Collecting package metadata (repodata.json): done
Solving environment: done


    current version: 25.3.0
    latest version: 25.5.1

Please update conda by running

    $ conda update -n base -c conda-forge conda



# All requested packages already installed.

Collecting gemmi
  Downloading gemmi-0.7.3-cp313-cp313-manylinux_2_27_x86_64.manyl

                                                                                
Preparing transaction: done
Verifying transaction: done
Executing transaction: done


In [1]:
import os
import numpy as np
from Bio.PDB import PDBList
import MDAnalysis as mda
import nglview as nv
from rdkit import Chem
from rdkit.Chem import AllChem, rdmolfiles



  import pkg_resources


In [2]:
protein_folder = 'protein/'
ligand_folder = 'ligand/'

os.makedirs(protein_folder, exist_ok=True)
os.makedirs(ligand_folder, exist_ok=True)

## Protein Preparation

In [3]:
protein_id = "2BMF"

pdbl = PDBList()
protein_file_path = pdbl.retrieve_pdb_file(protein_id, file_format="pdb", pdir=protein_folder)

if os.path.exists(f"{protein_folder}{protein_id}.pdb"):
    os.remove(f"{protein_folder}{protein_id}.pdb")
os.rename(protein_file_path, f"{protein_folder}{protein_id}.pdb")

Downloading PDB structure '2bmf'...


In [4]:
u = mda.Universe(f"{protein_folder}{protein_id}.pdb")
nv.show_mdanalysis(u)

NGLWidget()

In [5]:
protein = u.select_atoms("protein and chainID A")
water = u.select_atoms("resname HOH and chainID A")
view = nv.show_mdanalysis(protein)
view.clear_representations()
view.add_representation('surface', colorScheme="hydrophobicity")
water_view = nv.show_mdanalysis(water)
water_view.add_representation('spacefill')
view

NGLWidget()

In [6]:
protein.write(f"{protein_folder}protein_{protein_id}.pdb")



In [7]:
!pdb2pqr --pdb-output="{protein_folder}protein_h.pdb" --pH=7.4 "{protein_folder}protein_{protein_id}.pdb" "{protein_folder}protein_{protein_id}.pqr" --whitespace

INFO:PDB2PQR v3.7.1: biomolecular structure conversion software.
INFO:Please cite:  Jurrus E, et al.  Improvements to the APBS biomolecular solvation software suite.  Protein Sci 27 112-128 (2018).
INFO:Please cite:  Dolinsky TJ, et al.  PDB2PQR: expanding and upgrading automated preparation of biomolecular structures for molecular simulations. Nucleic Acids Res 35 W522-W525 (2007).
INFO:Checking and transforming input arguments.
INFO:Loading topology files.
INFO:Loading molecule: protein/protein_2BMF.pdb
ERROR:Error parsing line: invalid literal for int() with base 10: ''
ERROR:<REMARK     2>
ERROR:Truncating remaining errors for record type:REMARK

ERROR:['REMARK']
INFO:Setting up molecule.
INFO:Created biomolecule object with 442 residues and 3531 atoms.
INFO:Setting termini states for biomolecule chains.
INFO:Loading forcefield.
INFO:Loading hydrogen topology definitions.
INFO:Attempting to repair 24 missing atoms in biomolecule.
INFO:Added atom CG to residue LYS A 199 at coordinat

In [8]:
u = mda.Universe(f"{protein_folder}protein_{protein_id}.pqr")
u.atoms.write(f"{protein_folder}{protein_id}.pdbqt")



In [9]:
# Read in the just-written PDBQT file, replace text, and write back
with open(f"{protein_folder}{protein_id}.pdbqt", 'r') as file:
    file_content = file.read()

# Replace 'TITLE' and 'CRYST1' with 'REMARK'
file_content = file_content.replace('TITLE', 'REMARK').replace('CRYST1', 'REMARK')

# Write the modified content back to the file
with open(f"{protein_folder}{protein_id}.pdbqt", 'w') as file:
    file.write(file_content)

## Ligand Preparation

In [10]:
smiles = "CCCSc1ncc(c(n1)C(=O)Nc2nc3ccc(cc3s2)OC)Cl"

mol = Chem.MolFromSmiles(smiles)
mol = Chem.AddHs(mol)
AllChem.EmbedMolecule(mol)
AllChem.UFFOptimizeMolecule(mol)

writer = rdmolfiles.PDBWriter(f"{ligand_folder}ligand.pdb")
writer.write(mol)
writer.close()

Chem.MolToMolFile(mol, f"{ligand_folder}ligand.mol")

In [11]:
u = mda.Universe(f"{ligand_folder}ligand.pdb")
nv.show_mdanalysis(u)

NGLWidget()

In [12]:
# Use meeko to prepare small molecules - using meeko helps us visualize them later.
!mk_prepare_ligand.py -i "{ligand_folder}ligand.mol" -o "{ligand_folder}prepared_ligand.pdbqt"

Input molecules processed: 1, skipped: 0
PDBQT files written: 1
PDBQT files not written due to error: 0
Input molecules with errors: 0


## Pre-Docking: Defining the Search Box

In [13]:
u = mda.Universe(f"{protein_folder}protein_{protein_id}.pdb")
res1 = u.select_atoms("resid 388")
nv.show_mdanalysis(res1)

NGLWidget()

In [14]:
res2 = u.select_atoms("resid 599")
nv.show_mdanalysis(res2)

NGLWidget()

In [15]:
cg1 = res1.center_of_geometry()
cg2 = res2.center_of_geometry()
coors = np.vstack([cg1, cg2])
center = coors.sum(axis=0) / 2
center = center.tolist()
ligand_box = coors.max(axis=0) - coors.min(axis=0) + 5 # padding of 5
ligand_box = ligand_box.tolist()
print("center:", center)
print("box:", ligand_box)

center: [-4.630055514756929, -3.6067424138086013, 55.17953026896775]
box: [8.110111062160946, 15.846151461805961, 5.402394651162503]


## Docking Ligands with AutoDock Vina

In [16]:
os.makedirs("docking_results", exist_ok=True)