# Docking

This notebook demonstrates how to run an AutoDock Vina calculation

In [3]:
from crossflow import filehandling, tasks, clients
import sys
from urllib.request import urlretrieve
import numpy as np
import mdtraj as mdt

Create a crossflow client, connected to a local pool of workers:

In [4]:
client = clients.Client()

Make the SubprocessTasks for **fpocket** and **Vina**, and FunctionTasks for other tasks:

In [5]:
# The fpocket task:
fpocket = tasks.SubprocessTask('fpocket -f x.pdb')
fpocket.set_inputs(['x.pdb'])
fpocket.set_outputs(['x_out/x_out.pdb'])

In [15]:
# The vina task:
vina = tasks.SubprocessTask('mac_vina --receptor r.pdbqt --ligand l.pdbqt --out out.pdbqt'
                                 ' --center_x {xc} --center_y {yc} --center_z {zc}'
                                 ' --size_x {sx} --size_y {sy} --size_z {sz} > dock.log')
vina.set_inputs(['r.pdbqt', 'l.pdbqt', 'xc', 'yc', 'zc', 'sx', 'sy', 'sz'])
vina.set_outputs(['out.pdbqt', 'dock.log'])

In [7]:
# AutoDock Tool based tasks to prepare receptor and ligand for docking:
prep_receptor = tasks.SubprocessTask('adt prepare_receptor4.py -r x.pdb -o x.pdbqt')
prep_receptor.set_inputs(['x.pdb'])
prep_receptor.set_outputs(['x.pdbqt'])

prep_ligand = tasks.SubprocessTask('adt prepare_ligand4.py -l x.pdb -o x.pdbqt')
prep_ligand.set_inputs(['x.pdb'])
prep_ligand.set_outputs(['x.pdbqt'])

In [8]:
def download_and_select(pdb_code, chain_name):
    '''
    A function to download a pdb file, and extract just the given chain
    
    Args:
        pdb_code (str): 4-letter PDB code
        chain_name (str): 1-letter chain ID
        
    Returns:
        mdt.trajectory: the receptor (protein atoms only)
    '''
    pdb_file = pdb_code + '.pdb'
    path = urlretrieve('http://files.rcsb.org/download/' + pdb_file, pdb_file)
    with open(pdb_file) as f:
        chain_ids = []
        for line in f.readlines():
            if 'CHAIN:' in line:
                chain_ids += [c[0] for c in line.split()[3:]]
                
    full_system = mdt.load(pdb_file)
    cid = chain_ids.index(chain_name)
    receptor_atoms = full_system.topology.select('protein and chainid {}'.format(cid))
    receptor = mdt.load(pdb_file, atom_indices=receptor_atoms)
    return receptor

In [9]:
def _pdbqt2pdb(infile):
    '''
    A Function to convert pdbqt files back to pdb ones
    
    Args:
        infile (str): name of the input file, .pdbqt format
    
    Returns:
        str: name of the .pdb file (always 'tmp.pdb')
    '''
    outfile = 'tmp.pdb'
    fout = open(outfile, 'w')
    with open(infile, 'r') as fin:
        for line in fin:
            if line[1:6] in 'ATOM  HETATM MODEL ENDMDL':
                fout.write(line)       
    fout.close()
    return 'tmp.pdb'

# Now make a FunctionTask for this:
pdbqt2pdb = tasks.FunctionTask(_pdbqt2pdb)
pdbqt2pdb.set_inputs(['infile'])
pdbqt2pdb.set_outputs(['outfile'])

In [10]:
def _get_dimensions(pockets):
    '''
    A Function to find the centre and extents of each of the first pocket found by fpocket
    
    Args:
        pockets (str): Name of the pdb format file produced by fpocket
        
    Returns:
        (list,) * 6: the pocket centres and extents in x/y/z - in Angstroms
    '''
    buffer = 2.0
    t = mdt.load(pockets)
    site = t.topology.select('resname STP and residue {}'.format(1))
    # In the next two lines, the factor of 10 is a conversion from nanometres to Angstroms:
    xc, yc, zc = tuple(10 * (t.xyz[0][site].min(axis=0) + t.xyz[0][site].max(axis=0)) / 2)
    sx, sy, sz = tuple(10 * (t.xyz[0][site].max(axis=0) - t.xyz[0][site].min(axis=0)) + buffer)
    return xc, yc, zc, sx, sy, sz

# Now make a FunctionTask for this:
get_dimensions = tasks.FunctionTask(_get_dimensions)
get_dimensions.set_inputs(['pockets'])
get_dimensions.set_outputs(['xc', 'yc', 'zc', 'sx', 'sy', 'sz'])

In [11]:
fh = filehandling.FileHandler()
#provisionscript = fh.load('provision.dat')
#results = client.map(provision, [provisionscript] * 8) # >= max number of workers
#print(results[0].result())

Now we construct the workflow. For convenience it's split up here into sections.

In [12]:
ligand = fh.load('prz.pdb')
pdbcodes = ['1qy1']
chain_ids = ['A']
#with open('complexes.list') as f:
#    receptors = f.readlines()
#pdbcodes = [r[:4].lower() for r in receptors]
#chain_ids = [r[4] for r in receptors]
print(pdbcodes)

['1qy1']


In [13]:
def best_affinity(logfiles):
    '''
    Return the affinity of the top-ranked pose
    '''
    best_a = 0.0
    best_p = None
    for i, logfile in enumerate(logfiles):
        for line in logfile.result().read_text().split('\n'):
            if '   1   ' in line:
                if float(line.split()[1]) < best_a:
                    best_a = float(line.split()[1])
                    best_p = i                            
    return best_p, best_a

In [26]:
while True:
    # prepare ligand for docking
    ligand_qt, status = client.submit(prep_ligand, ligand)
    if status.result().returncode != 0:
        print('prepare ligand: FAILED')
        break
    print('prepare ligand: PASSED') 
    
    # prepare receptor for docking
    receptor = download_and_select('1qy1', 'A')
    receptor_qt, status = client.submit(prep_receptor, receptor)
    if status.result().returncode != 0:
        print('prepare receptor: FAILED')
        break
    print('prepare receptor: PASSED')  

    # run fpocket
    pockets, status = client.submit(fpocket, receptor)
    if status.result().returncode != 0:
        print('run fpocket: FAILED')
        break
    print('run fpocket: PASSED') 
    
    # Get pocket dimensions
    xc, yc, zc, sx, sy, sz, status = client.submit(get_dimensions, pockets)
    if status.result().returncode != 0:
        print('get pocket dimensions: FAILED')
        break
    print('get pocket dimenions: PASSED') 
    # Run the dock
    dock, logfile, status = client.submit(vina, receptor_qt, ligand_qt, xc, yc, zc, sx, sy, sz)
    if status.result().returncode != 0:
        print('vina docking: FAILED')
        break
    print('vina docking: PASSED') 
    
    # Convert the poses into an MDTraj trajectory:
    posepdb, status = client.submit(pdbqt2pdb, dock)
    if status.result().returncode != 0:
        print('pose extraction: FAILED')
        break
    print('pose extraction: PASSED')
    poses = mdt.load(posepdb.result())
    break
    
print(logfile.result().read_text())
print(poses)

prepare ligand: PASSED
prepare receptor: PASSED
run fpocket: PASSED
get pocket dimenions: PASSED
vina docking: PASSED
pose extraction: PASSED
AutoDock Vina v1.2.3-9-g75f87a4-mod
#################################################################
# If you used AutoDock Vina in your work, please cite:          #
#                                                               #
# J. Eberhardt, D. Santos-Martins, A. F. Tillack, and S. Forli  #
# AutoDock Vina 1.2.0: New Docking Methods, Expanded Force      #
# Field, and Python Bindings, J. Chem. Inf. Model. (2021)       #
# DOI 10.1021/acs.jcim.1c00203                                  #
#                                                               #
# O. Trott, A. J. Olson,                                        #
# AutoDock Vina: improving the speed and accuracy of docking    #
# with a new scoring function, efficient optimization and       #
# multithreading, J. Comp. Chem. (2010)                         #
# DOI 10.1002/jcc.21334       