In [3]:
! pip install deepchem

Collecting deepchem
  Using cached deepchem-2.7.1-py3-none-any.whl (693 kB)
Installing collected packages: deepchem
Successfully installed deepchem-2.7.1


In [15]:
! conda install -c conda-forge pdbfixer

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 23.7.4
  latest version: 23.11.0

Please update conda by running

    $ conda update -n base -c defaults conda

Or to minimize the number of packages updated during conda update use

     conda install conda=23.11.0



# All requested packages already installed.



In [16]:
! conda install -c conda-forge vina

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 23.7.4
  latest version: 23.11.0

Please update conda by running

    $ conda update -n base -c defaults conda

Or to minimize the number of packages updated during conda update use

     conda install conda=23.11.0



# All requested packages already installed.



In [15]:
! pip install -q mdtraj nglview
# !jupyter-nbextension enable nglview --py --sys-prefix  # for jupyter notebook
# !jupyter labextension install  nglview-js-widgets  # for jupyter lab

In [5]:
import os
import numpy as np
import pandas as pd

import tempfile

from rdkit import Chem
from rdkit.Chem import AllChem
import deepchem as dc

from deepchem.utils import download_url, load_from_disk

In [6]:
data_dir = dc.utils.get_data_dir()
dataset_file = os.path.join(data_dir, "pdbbind_core_df.csv.gz")

if not os.path.exists(dataset_file):
    print('File does not exist. Downloading file...')
    download_url("https://s3-us-west-1.amazonaws.com/deepchem.io/datasets/pdbbind_core_df.csv.gz")
    print('File downloaded...')

raw_dataset = load_from_disk(dataset_file)
raw_dataset = raw_dataset[['pdb_id', 'smiles', 'label']]

In [7]:
raw_dataset.head(2)

Unnamed: 0,pdb_id,smiles,label
0,2d3u,CC1CCCCC1S(O)(O)NC1CC(C2CCC(CN)CC2)SC1C(O)O,6.92
1,3cyx,CC(C)(C)NC(O)C1CC2CCCCC2C[NH+]1CC(O)C(CC1CCCCC...,8.0


In [8]:
from openmm.app import PDBFile
from pdbfixer import PDBFixer

from deepchem.utils.vina_utils import prepare_inputs

In [9]:
# consider one protein-ligand complex for visualization
pdbid = raw_dataset['pdb_id'].iloc[1]
ligand = raw_dataset['smiles'].iloc[1]

In [10]:
%%time
fixer = PDBFixer(pdbid=pdbid)
PDBFile.writeFile(fixer.topology, fixer.positions, open('%s.pdb' % (pdbid), 'w'))

p, m = None, None
# fix protein, optimize ligand geometry, and sanitize molecules
try:
    p, m = prepare_inputs('%s.pdb' % (pdbid), ligand)
except:
    print('%s failed PDB fixing' % (pdbid)) 

if p and m:  # protein and molecule are readable by RDKit
    print(pdbid, p.GetNumAtoms())
    Chem.rdmolfiles.MolToPDBFile(p, '%s.pdb' % (pdbid))
    Chem.rdmolfiles.MolToPDBFile(m, 'ligand_%s.pdb' % (pdbid))



3cyx 1510
CPU times: user 1.09 s, sys: 74.4 ms, total: 1.16 s
Wall time: 3.96 s


In [11]:
import mdtraj as md
import nglview

from IPython.display import display, Image



In [12]:
protein_mdtraj = md.load_pdb('3cyx.pdb')
ligand_mdtraj = md.load_pdb('ligand_3cyx.pdb')

In [13]:
v = nglview.show_mdtraj(ligand_mdtraj)

In [14]:
display(v)  # interactive view outside Colab

NGLWidget()

In [16]:
view = nglview.show_mdtraj(protein_mdtraj)
display(view)  # interactive view outside Colab

NGLWidget()