# Reverse Docking

This notebook demonstrates how a variety of different tools can be glued together into an efficient and flexible workflow using **Crossflow**.

The workflow docks a ligand (PRZ) to a set of protein structures (taken from the cryptosite database).

The notebook requires **fpocket** and **autodock vina** to be installed on the worker node(s) of your dask cluster.

In [13]:
from crossflow import filehandling, tasks, clients
from distributed import LocalCluster
import mdtraj as mdt
import numpy as np
from pathlib import Path

Create a crossflow client, connected to a local pool of workers:

In [2]:
cluster = LocalCluster(n_workers=1, threads_per_worker=1)
client = clients.Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 1
Total threads: 1,Total memory: 16.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:63588,Workers: 1
Dashboard: http://127.0.0.1:8787/status,Total threads: 1
Started: Just now,Total memory: 16.00 GiB

0,1
Comm: tcp://127.0.0.1:63593,Total threads: 1
Dashboard: http://127.0.0.1:63594/status,Memory: 16.00 GiB
Nanny: tcp://127.0.0.1:63591,
Local directory: /var/folders/v0/vzwwlsr12vvbmyxvhlrhgpf80000gp/T/dask-scratch-space/worker-g6po4dje,Local directory: /var/folders/v0/vzwwlsr12vvbmyxvhlrhgpf80000gp/T/dask-scratch-space/worker-g6po4dje


Make the SubprocessTasks for **fpocket** and **Vina**, and FunctionTasks for other tasks:

In [None]:
# The provisioning task - run on all workers before they do anything else
provision = tasks.SubprocessTask('chmod +x provision.sh && ./provision.sh')
provision.set_inputs(['provision.sh'])
provision.set_outputs(['STDOUT'])

In [3]:
# The fpocket task:
fpocket = tasks.SubprocessTask('rex fpocket -f x.pdb')
fpocket.set_inputs(['x.pdb'])
fpocket.set_outputs(['x_out/x_out.pdb'])

In [4]:
# The vina task:
vina = tasks.SubprocessTask('rex vina --receptor r.pdbqt --ligand l.pdbqt --out out.pdbqt > dock.log'
                                 ' --config conf.dat --num_modes 1')
vina.set_inputs(['r.pdbqt', 'l.pdbqt', 'conf.dat'])
vina.set_outputs(['out.pdbqt', 'dock.log'])

In [5]:
def _get_dimensions(pockets):
    '''
    A Function to find the centre and extents of each of the pockets found by fpocket
    
    Args:
        pockets (str): Name of the pdb format file produced by fpocket
        
    Returns:
        configs: the pocket centres and extents in x/y/z - in Angstroms, in vina .conf format
    '''
    buffer = 2.0 # buffer added to pocket extents (Angstroms)
    t = mdt.load(pockets)
    n_pockets = len([r for r in t.topology.residues if r.name == 'STP'])
    confs = []
    template = 'center_x = {:6.3f}\ncenter_y = {:6.3f}\ncenter_z = {:6.3f}\nsize_x = {:6.3f}\nsize_y = {:6.3f}\nsize_z = {:6.3f}\n'
    for ip in range(n_pockets):
        site = t.topology.select('resname STP and residue {}'.format(ip + 1))
        # In the next two lines, the factor of 10 is a conversion from nanometres to Angstroms:
        xc, yc, zc = tuple(10 * (t.xyz[0][site].min(axis=0) + t.xyz[0][site].max(axis=0)) / 2)
        sx, sy, sz = tuple(10 * (t.xyz[0][site].max(axis=0) - t.xyz[0][site].min(axis=0)) + buffer)
        conftext = template.format(xc, yc, zc, sx, sy, sz)
        conf_handle = fh.create(f'conf_{ip}.dat')
        conf_handle.write_text(conftext)
        confs.append(conf_handle)
        
    return confs

# Now make a FunctionTask for this:
get_dimensions = tasks.FunctionTask(_get_dimensions)
get_dimensions.set_inputs(['pockets'])
get_dimensions.set_outputs(['confs'])

In [6]:
fh = filehandling.FileHandler()
#provisionscript = fh.load('provision.dat')
#results = client.map(provision, [provisionscript] * 8) # >= max number of workers
#print(results[0].result())

Now we construct the workflow. For convenience it's split up here into sections.

In [7]:
receptor_dir = Path('./receptors')
receptor_files = receptor_dir.glob('*.pdbqt')
pdbcodes = []
receptors = []
for r in receptor_files:
    receptors.append(fh.load(r))
    pdbcodes.append(r.stem[:4])

In [14]:
print(pdbcodes)

['3hl8', '1za1', '2brl', '1ftl', '1ecc', '3ip0', '2piq', '2wkw', '1afq', '3hl7', '2ofp', '1u1d', '2h4k', '1pzy', '1eyj', '3hqp', '2yqs', '2bys', '3f82', '3lth', '3hok', '3bl7', '1gzf', '2w5k', '1q0b', '1t49', '1g67', '1oke', '2iyq', '3fgo', '1gky', '2oo8', '2ixu', '1yv3', '2q8h', '1lic', '1tr5', '3cfn', '2ohv', '3gqz', '1cib', '1j6z', '2gir', '1xvc', '2hka', '2ieg', '1ow3', '1ryo', '1s9d', '3fqk', '1d6y', '2iuz', '2gz7', '3bqm', '1imb', '1l5s', '2bu2', '1br6', '1ey3', '2egh', '2hvd', '3eks', '3dhh', '1ghy', '1nx3', '1ha3', '2jds', '3dc1', '2wi7', '1ank', '2v57', '2eum', '3h9j', '1fqc', '2npq', '3ixj', '3hzt', '2al4', '1ctr']


In [9]:
def best_affinity(logfiles):
    '''
    Return the affinity of the top-ranked pose
    '''
    best_a = 0.0
    best_p = None
    for i, logfile in enumerate(logfiles):
        for line in logfile.result().read_text().split('\n'):
            if '   1   ' in line:
                if float(line.split()[1]) < best_a:
                    best_a = float(line.split()[1])
                    best_p = i                            
    return best_p, best_a

# prepare ligand for docking
#ligand_qt = client.submit(prep_ligand, ligand)

In [28]:
for pdbcode, receptor in zip(pdbcodes[:5], receptors[:5]):
    pockets = client.submit(fpocket, receptor)
    configs = client.submit(get_dimensions, pockets)
    docks, logs = client.map(vina, receptor, ligand, configs.result())
    print(pdbcode, best_affinity(logs))

3hl8 (7, -5.39)
1za1 (0, -4.962)
2brl (14, -4.86)
1ftl (0, -4.747)
1ecc (0, -5.744)
