#### normal

In [2]:
from dask.distributed import Client, progress
client = Client(processes=False, threads_per_worker=4,
                n_workers=1, memory_limit='2GB')
client

0,1
Client  Scheduler: inproc://172.27.9.105/23736/8  Dashboard: http://172.27.9.105/23736/8:33439/status,Cluster  Workers: 1  Cores: 4  Memory: 2.00 GB


In [2]:
import numpy as np
import dask.array as da

In [11]:
%%time
x = np.random.normal(10, 0.1, size=(20000, 20000)) 
xz = x.size * x.itemsize
y = x.mean(axis=0)[::100] 
y
print("size(byte): ", xz)

size(byte):  3200000000
CPU times: user 11.1 s, sys: 391 ms, total: 11.5 s
Wall time: 12.5 s


In [12]:
%%time
x = da.random.normal(10, 0.1, size=(20000, 20000), chunks=(1000, 1000))
xzd = x.size * x.itemsize
y = x.mean(axis=0)[::100] 
x
y.compute() 
print("size(byte): ", xzd)

size(byte):  3200000000
CPU times: user 11.7 s, sys: 38.3 ms, total: 11.8 s
Wall time: 3.34 s


#### Lennard-Jones potential

In [13]:
import numpy as np

# make a random collection of particles
def make_cluster(natoms, radius=40, seed=1981):
    np.random.seed(seed)
    cluster = np.random.normal(0, radius, (natoms,3))-0.5
    return cluster

def lj(r2):
    sr6 = (1./r2)**3
    pot = 4.*(sr6*sr6 - sr6)
    return pot

# build the matrix of distances
def distances(cluster):
    diff = cluster[:, np.newaxis, :] - cluster[np.newaxis, :, :]
    mat = (diff*diff).sum(-1)
    return mat

# the lj function is evaluated over the upper traingle
# after removing distances near zero
def potential(cluster):
    d2 = distances(cluster)
    dtri = np.triu(d2)
    energy = lj(dtri[dtri > 1e-6]).sum()
    return energy

In [14]:
cluster = make_cluster(int(7e3), radius=500)

In [15]:
%time potential(cluster)

CPU times: user 2.06 s, sys: 200 ms, total: 2.26 s
Wall time: 2.84 s


-0.21282893668845293

In [16]:
%prun -s tottime potential(cluster)

 

In [17]:
import dask.array as da

# compute the potential on the entire
# matrix of distances and ignore division by zero
def potential_dask(cluster):
    d2 = distances(cluster)
    energy = da.nansum(lj(d2))/2.
    return energy

In [18]:
from os import cpu_count

dcluster = da.from_array(cluster, chunks=cluster.shape[0]//cpu_count())

In [19]:
e = potential_dask(dcluster)
%time e.compute()

  return func(*args2)
  return func(*args2)


CPU times: user 3.57 s, sys: 350 ms, total: 3.93 s
Wall time: 1.13 s


-0.21282893668845307