# Speed up g(r) code

I want to compare the timings for calculating the distances between many atoms.  This involves computing the double loop over all coordinates.

In [1]:
import time
import os.path as op

In [2]:
from sasmol import sasmol

In [3]:
import numpy as np

In [4]:
from scipy.spatial.distance import pdist

In [5]:
import concurrent.futures
import multiprocessing

n_cpu = multiprocessing.cpu_count()
print('n_cpu = {}'.format(n_cpu))

n_cpu = 24


In [6]:
import sys
sys.path.append('./')
from distance import distance

In [7]:
import dask.array as da # NOT APPROPRIATE FOR PROBLEM

In [8]:
# setup inputs
pdb_fname = 'test.pdb'
dcd_fname = 'test.dcd'
assert op.exists(pdb_fname), 'no such file: {}'.format(pdb_fname)
assert op.exists(dcd_fname), 'no such file: {}'.format(dcd_fname)
mol = sasmol.SasMol(0)
mol.read_pdb(pdb_fname)

reading filename:  test.pdb
num_atoms =  2048
>>> found  1  model(s) or frame(s)
finished reading frame =  1


In [9]:
# confirm indexing
n = 3
d = np.zeros([n, n])
for j in xrange(3):
    for k in xrange(j+1, 3):
        d[j, k] = 1
print(d)

[[ 0.  1.  1.]
 [ 0.  0.  1.]
 [ 0.  0.  0.]]


In [10]:
# confirm products
c1 = np.arange(3)
c2 = np.arange(3)/10.0
dsum = np.sqrt((c1[0] - c2[0]) ** 2  + (c1[1] - c2[1]) ** 2  + (c1[2] - c2[2]) ** 2)
nsum = np.sqrt(np.sum((c1 - c2) ** 2))
print(dsum, nsum)

(2.0124611797498111, 2.0124611797498111)


In [11]:
dcd_file = mol.open_dcd_read(dcd_fname)

In [12]:
%%timeit
# ignorant python
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

dist = np.zeros([n_frames, n_atoms, n_atoms])
for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    for j in xrange(n_atoms):
        for k in xrange(j+1, n_atoms):
            dist[i, j, k] = np.sqrt(np.sum((coor[j] - coor[k]) ** 2))
            
mol.close_dcd_read(dcd_file[0])

number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
1 loop, best of 3: 1min 2s per loop


In [13]:
# ignorant python
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

dist = np.zeros([n_frames, n_atoms, n_atoms])
for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    for j in xrange(n_atoms):
        for k in xrange(j+1, n_atoms):
            dist[i, j, k] = np.sqrt(np.sum((coor[j] - coor[k]) ** 2))
            
mol.close_dcd_read(dcd_file[0])

dp = dist[-1, :4, :4]

number of frames: 5
number of atoms: 2048
.....result =  0


In [14]:
dp

array([[  0.        ,   9.55838011,  12.49256537,  11.12902151],
       [  0.        ,   0.        ,   7.85849646,   4.99603282],
       [  0.        ,   0.        ,   0.        ,  12.75118492],
       [  0.        ,   0.        ,   0.        ,   0.        ]])

In [16]:
%%timeit
# numpy version
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

dist = np.zeros([n_frames, n_atoms, n_atoms])
for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    dist[i] = np.sqrt(((coor[:, None, :] - coor) ** 2).sum(-1))
    
mol.close_dcd_read(dcd_file[0])

number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
1 loop, best of 3: 828 ms per loop


In [17]:
# numpy version
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

dist = np.zeros([n_frames, n_atoms, n_atoms])
for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    dist[i] = np.sqrt(((coor[:, None, :] - coor) ** 2).sum(-1))
    
mol.close_dcd_read(dcd_file[0])

dn = dist[-1, :4, :4]

number of frames: 5
number of atoms: 2048
.....result =  0


In [18]:
dp, dn

(array([[  0.        ,   9.55838011,  12.49256537,  11.12902151],
        [  0.        ,   0.        ,   7.85849646,   4.99603282],
        [  0.        ,   0.        ,   0.        ,  12.75118492],
        [  0.        ,   0.        ,   0.        ,   0.        ]]),
 array([[  0.        ,   9.55838011,  12.49256537,  11.12902151],
        [  9.55838011,   0.        ,   7.85849646,   4.99603282],
        [ 12.49256537,   7.85849646,   0.        ,  12.75118492],
        [ 11.12902151,   4.99603282,  12.75118492,   0.        ]]))

In [19]:
print(dp-dn)

[[  0.           0.           0.           0.        ]
 [ -9.55838011   0.           0.           0.        ]
 [-12.49256537  -7.85849646   0.           0.        ]
 [-11.12902151  -4.99603282 -12.75118492   0.        ]]


Even though the NumPy version calculates the entire matrix, it is more than 2x faster.

In [20]:
%%timeit
# scipy version 1
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

dist = np.zeros([n_frames, n_atoms, n_atoms])

for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    row, col = np.triu_indices(n_atoms, 1)
    dist[i, row, col] = pdist(coor)
    
mol.close_dcd_read(dcd_file[0])

number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
1 loop, best of 3: 280 ms per loop


In [21]:
# scipy version 1
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

dist = np.zeros([n_frames, n_atoms, n_atoms])

for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    row, col = np.triu_indices(n_atoms, 1)
    dist[i, row, col] = pdist(coor)
    
mol.close_dcd_read(dcd_file[0])

ds1 = dist[-1, :4, :4]

number of frames: 5
number of atoms: 2048
.....result =  0


In [22]:
%%timeit
# scipy version 2 (with boolean-indexing)
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

dist = np.zeros([n_frames, n_atoms, n_atoms])
r = np.arange(n_atoms)

for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    dist[i, r[:, None] < r] = pdist(coor)
    
mol.close_dcd_read(dcd_file[0])

number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
1 loop, best of 3: 217 ms per loop


In [23]:
# scipy version 2 (with boolean-indexing)
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

dist = np.zeros([n_frames, n_atoms, n_atoms])
r = np.arange(n_atoms)

for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    dist[i, r[:, None] < r] = pdist(coor)
    
mol.close_dcd_read(dcd_file[0])

ds2 = dist[-1, :4, :4]

number of frames: 5
number of atoms: 2048
.....result =  0


In [24]:
%%timeit
# scipy version 2 without producing matrix
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

n_dists = int(0.5 * n_atoms * (n_atoms - 1))
dist = np.zeros([n_frames, n_dists])
r = np.arange(n_atoms)

for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    dist[i] = pdist(coor)
    
mol.close_dcd_read(dcd_file[0])

ds2 = dist[-1, :16]

number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result = 

In [26]:
dp, dn, ds1, ds2

(array([[  0.        ,   9.55838011,  12.49256537,  11.12902151],
        [  0.        ,   0.        ,   7.85849646,   4.99603282],
        [  0.        ,   0.        ,   0.        ,  12.75118492],
        [  0.        ,   0.        ,   0.        ,   0.        ]]),
 array([[  0.        ,   9.55838011,  12.49256537,  11.12902151],
        [  9.55838011,   0.        ,   7.85849646,   4.99603282],
        [ 12.49256537,   7.85849646,   0.        ,  12.75118492],
        [ 11.12902151,   4.99603282,  12.75118492,   0.        ]]),
 array([[  0.        ,   9.55838011,  12.49256537,  11.12902151],
        [  0.        ,   0.        ,   7.85849646,   4.99603282],
        [  0.        ,   0.        ,   0.        ,  12.75118492],
        [  0.        ,   0.        ,   0.        ,   0.        ]]),
 array([[  0.        ,   9.55838011,  12.49256537,  11.12902151],
        [  0.        ,   0.        ,   7.85849646,   4.99603282],
        [  0.        ,   0.        ,   0.        ,  12.75118492],
    

In [27]:
print(dn-ds1)
print(dn-ds2)

[[  0.           0.           0.           0.        ]
 [  9.55838011   0.           0.           0.        ]
 [ 12.49256537   7.85849646   0.           0.        ]
 [ 11.12902151   4.99603282  12.75118492   0.        ]]
[[  0.           0.           0.           0.        ]
 [  9.55838011   0.           0.           0.        ]
 [ 12.49256537   7.85849646   0.           0.        ]
 [ 11.12902151   4.99603282  12.75118492   0.        ]]


In [28]:
%%timeit
dx = pdist(coor[:, :1])
dy = pdist(coor[:, 1:2])
dz = pdist(coor[:, 2:])
    
dx -= box_length * ((dx / box_length).round())
dy -= box_length * ((dy / box_length).round())
dz -= box_length * ((dz / box_length).round())

r = np.sqrt((dx * dx) + (dy * dy) + (dz * dz))

r_i = (r[r < box_length / 2.0] / delta_r).astype(int)  # round down
r_i_unique = np.unique(r_i, return_counts=True)
gr[r_i_unique[0]] += 2 * r_i_unique[1]

NameError: global name 'box_length' is not defined

In [None]:
%%timeit
dx = pdist(coor[:, :1])
dy = pdist(coor[:, 1:2])
dz = pdist(coor[:, 2:])
    
dx -= box_length * ((dx / box_length).round())
dy -= box_length * ((dy / box_length).round())
dz -= box_length * ((dz / box_length).round())

r = np.sqrt((dx * dx) + (dy * dy) + (dz * dz))

for r_i in (r[r < box_length / 2.0] / delta_r).astype(int):
    gr[r_i] += 2

In [None]:
%%timeit
dx = pdist(coor[:, :1])
dy = pdist(coor[:, 1:2])
dz = pdist(coor[:, 2:])
    
dx -= box_length * ((dx / box_length).round())
dy -= box_length * ((dy / box_length).round())
dz -= box_length * ((dz / box_length).round())

r = np.linalg.norm(np.c_[dx, dy, dz], axis=1)

r_i = (r[r < box_length / 2.0] / delta_r).astype(int)  # round down
r_i_unique = np.unique(r_i, return_counts=True)
gr[r_i_unique[0]] += 2 * r_i_unique[1]

In [None]:
%%timeit
dx = pdist(coor[:, :1])
dy = pdist(coor[:, 1:2])
dz = pdist(coor[:, 2:])

dcoor = np.c_[dx, dy, dz]    
dcoor -= box_length * ((dcoor / box_length).round())

r = np.linalg.norm(dcoor, axis=1)

r_i = (r[r < box_length / 2.0] / delta_r).astype(int)  # round down
r_i_unique = np.unique(r_i, return_counts=True)
gr[r_i_unique[0]] += 2 * r_i_unique[1]

In [None]:
gr0 = np.zeros(int(box_length/delta_r))
dx = pdist(coor[:, :1])
dy = pdist(coor[:, 1:2])
dz = pdist(coor[:, 2:])
    
dx -= box_length * ((dx / box_length).round())
dy -= box_length * ((dy / box_length).round())
dz -= box_length * ((dz / box_length).round())

r = np.sqrt((dx * dx) + (dy * dy) + (dz * dz))

r_i = (r[r < box_length / 2.0] / delta_r).astype(int)  # round down
r_i_unique = np.unique(r_i, return_counts=True)
gr0[r_i_unique[0]] += 2 * r_i_unique[1]

In [None]:
gr0[10:20]

In [None]:
gr1 = np.zeros(int(box_length/delta_r))
dx = pdist(coor[:, :1])
dy = pdist(coor[:, 1:2])
dz = pdist(coor[:, 2:])
    
dx -= box_length * ((dx / box_length).round())
dy -= box_length * ((dy / box_length).round())
dz -= box_length * ((dz / box_length).round())

r1 = np.linalg.norm(np.c_[dx, dy, dz], axis=1)

r_i = (r1[r1 < box_length / 2.0] / delta_r).astype(int)  # round down
r_i_unique = np.unique(r_i, return_counts=True)
gr1[r_i_unique[0]] += 2 * r_i_unique[1]

In [None]:
gr1[10:20]

In [None]:
gr2 = np.zeros(int(box_length/delta_r))
dx = pdist(coor[:, :1])
dy = pdist(coor[:, 1:2])
dz = pdist(coor[:, 2:])

dcoor = np.c_[dx, dy, dz]    
dcoor -= box_length * ((dcoor / box_length).round())
r2 = np.linalg.norm(dcoor, axis=1)

r_i = (r2[r2 < box_length / 2.0] / delta_r).astype(int)  # round down
r_i_unique = np.unique(r_i, return_counts=True)
gr2[r_i_unique[0]] += 2 * r_i_unique[1]

In [None]:
gr2[10:20]

In [None]:
%%timeit 
dist = np.zeros([2048*2047/2, 3])
dist[:, 0] = pdist(coor[:, :1])
dist[:, 1] = pdist(coor[:, 0:2])
dist[:, 2] = pdist(coor[:, 1:])
dist -= box_length * (dist / box_length).round()
dist = np.linalg.norm(dist, axis=1)
# dist = np.sqrt((dist ** 2).sum(axis=1))

In [None]:
dist = np.zeros([2048*2047/2, 3])
dist[:, 0] = pdist(coor[:, :1])
dist[:, 1] = pdist(coor[:, 0:2])
dist[:, 2] = pdist(coor[:, 1:])
dist -= box_length * (dist / box_length).round()
dist = np.linalg.norm(dist, axis=1)
#dist = np.sqrt((dist ** 2).sum(axis=1))

In [29]:
dist[:10]

array([[[  0.        ,   9.51893197,  12.39790455, ...,  14.24272878,
          12.67773685,   6.42020945],
        [  0.        ,   0.        ,   7.84797516, ...,   5.48464127,
           8.22573826,   6.71877054],
        [  0.        ,   0.        ,   0.        , ...,   9.40489263,
          11.90410877,  10.54183759],
        ..., 
        [  0.        ,   0.        ,   0.        , ...,   0.        ,
           6.44510459,   9.35923539],
        [  0.        ,   0.        ,   0.        , ...,   0.        ,
           0.        ,   6.30163111],
        [  0.        ,   0.        ,   0.        , ...,   0.        ,
           0.        ,   0.        ]],

       [[  0.        ,   9.38826942,  12.39777216, ...,  14.30050004,
          12.60315022,   6.3477412 ],
        [  0.        ,   0.        ,   7.79215351, ...,   5.71410856,
           8.33435722,   6.71334543],
        [  0.        ,   0.        ,   0.        , ...,   9.33329583,
          11.91714298,  10.54510141],
        ...,

In [30]:
print(dn-ds1)
print(dn-ds2)

[[  0.           0.           0.           0.        ]
 [  9.55838011   0.           0.           0.        ]
 [ 12.49256537   7.85849646   0.           0.        ]
 [ 11.12902151   4.99603282  12.75118492   0.        ]]
[[  0.           0.           0.           0.        ]
 [  9.55838011   0.           0.           0.        ]
 [ 12.49256537   7.85849646   0.           0.        ]
 [ 11.12902151   4.99603282  12.75118492   0.        ]]


In [31]:
%%timeit
# old fortran version (old_distance.so compiled using setup_<something>.py)
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

dist = np.zeros([n_frames, n_atoms, n_atoms])

for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    dist[0] = distance(coor, dist[0])
    
mol.close_dcd_read(dcd_file[0])

number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
1 loop, best of 3: 350 ms per loop


In [41]:
%%timeit
# new fortran version (new_distance.so compiled using `f2py -c distance.f --f77flags='-mavx' distance.so`)
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

dist = np.zeros([n_frames, n_atoms, n_atoms])

for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    dist[0] = distance(coor, dist[0])
    
mol.close_dcd_read(dcd_file[0])

number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
1 loop, best of 3: 353 ms per loop


In [None]:
805 s

In [None]:
# fortran version
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

dist = np.zeros([n_frames, n_atoms, n_atoms])

for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    dist[i] = distance(coor, dist[0])
    dist[i] = distance(coor, dist[0])

mol.close_dcd_read(dcd_file[0])

df = dist[-1, :4, :4]

In [None]:
dn, df

In [None]:
dn - df

# Now with [Concurrent.futures](https://docs.python.org/3.2/library/concurrent.futures.html)
This implementation presents a problem when loading many (thousands) of DCD frames.  I expect the problem is in reading in the coordinates before submitting the job to the executor.  Consider implementing `maxsize` variable, described [here](https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Queue).

In [42]:
def get_dist(coor, dist, r):
    dist[r[:, None] < r] = pdist(coor)

In [43]:
%%timeit
# ThreadPoolExecutor.submit
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

r = np.arange(n_atoms)
dist = np.zeros([n_frames, n_atoms, n_atoms])
with concurrent.futures.ThreadPoolExecutor(max_workers=n_cpu) as executor:
    for i in xrange(n_frames):
        mol.read_dcd_step(dcd_file, i)
        coor = mol.coor()[0]
        executor.submit(get_dist, coor, dist[i], r)
    
mol.close_dcd_read(dcd_file[0])

number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result = 

In [44]:
%%timeit
# ThreadPoolExecutor.map
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

r = np.arange(n_atoms)
dist = np.zeros([n_frames, n_atoms, n_atoms])
with concurrent.futures.ThreadPoolExecutor(max_workers=n_cpu) as executor:
    for i in xrange(n_frames):
        mol.read_dcd_step(dcd_file, i)
        coor = mol.coor()[0]
        executor.map(get_dist, coor, dist[i], r)
    
mol.close_dcd_read(dcd_file[0])

number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
1 loop, best of 3: 1.43 s per loop


In [45]:
%%timeit
# ProcessPoolExecutor.submit
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

r = np.arange(n_atoms)
dist = np.zeros([n_frames, n_atoms, n_atoms])
with concurrent.futures.ProcessPoolExecutor(max_workers=n_cpu) as executor:
    for i in xrange(n_frames):
        mol.read_dcd_step(dcd_file, i)
        coor = mol.coor()[0]
        executor.submit(get_dist, coor, dist[i], r)
    
mol.close_dcd_read(dcd_file[0])

number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.....result =  0
1 loop, best of 3: 533 ms per loop


In [None]:
%%timeit
# ProcessPoolExecutor.map
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

r = np.arange(n_atoms)
dist = np.zeros([n_frames, n_atoms, n_atoms])
with concurrent.futures.ProcessPoolExecutor(max_workers=n_cpu) as executor:
    for i in xrange(n_frames):
        mol.read_dcd_step(dcd_file, i)
        coor = mol.coor()[0]
        executor.map(get_dist, coor, dist[i], r)
    
mol.close_dcd_read(dcd_file[0])

number of frames: 5
number of atoms: 2048
.....result =  0
number of frames: 5
number of atoms: 2048
.

Traceback (most recent call last):
  File "/home/schowell/data/myPrograms/anaconda/lib/python2.7/multiprocessing/queues.py", line 268, in _feed
    send(obj)
IOError: [Errno 32] Broken pipe


....

`ThreadPoolExecutor` with `submit()` is much faster, probably because I did not setup iterators for `map()` (not sure about `ProcessPoolExecutor`)

In [None]:
# ThreadPoolExecutor with submit
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

r = np.arange(n_atoms)
dist = np.zeros([n_frames, n_atoms, n_atoms])
with concurrent.futures.ThreadPoolExecutor(max_workers=n_cpu) as executor:
    for i in xrange(n_frames):
        mol.read_dcd_step(dcd_file, i)
        coor = mol.coor()[0]
        executor.submit(get_dist, coor, dist[i], r)
    
mol.close_dcd_read(dcd_file[0])
dc = dist[-1, :4, :4]

In [None]:
dp, dc

In [None]:
dp - dc

## Now with the real data set

In [None]:
# setup inputs

run_path = '../../simulations/lj_sphere_monomer/runs/p_0p14/output'
pdb_fname = 'run2.pdb'
dcd_fname = 'run2.dcd'

pdb_fname = op.join(run_path, pdb_fname)
dcd_fname = op.join(run_path, dcd_fname)

mol = sasmol.SasMol(0)
mol.read_pdb(pdb_fname)

In [None]:
def get_dist(coor, dist, r):
    dist[r[:, None] < r] = pdist(coor)

In [None]:
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

r = np.arange(n_atoms)
dist = np.zeros([n_frames, n_atoms, n_atoms])
tic = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=n_cpu) as executor:
    for i in xrange(n_frames):
        mol.read_dcd_step(dcd_file, i)
        coor = mol.coor()[0]
        executor.submit(get_dist, coor, dist[i], r)

toc = time.time() - tic    

mol.close_dcd_read(dcd_file[0])

print toc

# Now with [Dask](http://dask.pydata.org/en/latest/)

In [None]:
def get_dist(coor, dist, r):
    dist[r[:, None] < r] = pdist(coor)
    return dist

In [None]:
from dask import delayed, compute

In [None]:
slices = [coor[i] for i in range(coor.shape[0])] # create an iterator
slices2 = [delayed(pdist)(slice) for slice in slices]
results = compute(*slices2)

In [None]:
%%timeit
# Dask with scipy version 2 (with boolean-indexing)
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

r = np.arange(n_atoms)
dist = np.zeros([n_frames, n_atoms, n_atoms])
for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    delayed(get_dist)(coor, dist[i], r)

mol.close_dcd_read(dcd_file[0])

In [None]:
# Dask with scipy version 2 (with boolean-indexing)
dcd_file = mol.open_dcd_read(dcd_fname)

n_atoms = mol.natoms()
n_frames = dcd_file[2]

print('number of frames: {}'.format(n_frames))
print('number of atoms: {}'.format(n_atoms))

r = np.arange(n_atoms)
dist = np.zeros([n_frames, n_atoms, n_atoms])
res = []
for i in xrange(n_frames):
    mol.read_dcd_step(dcd_file, i)
    coor = mol.coor()[0]
    res.append(delayed(get_dist)(coor, dist[i], r))

mol.close_dcd_read(dcd_file[0])
test = [calc.compute for calc in res]
dd = dist[-1, :4, :4]

In [None]:
dd

In [None]:
dist