#### **test environment: Intel i5-6300HQ, Ubuntu 18.04 Subsystem on Windows 10**

In [1]:
import bayesfast as bf
import numpy as np
import multiprocess
from distributed import Client, LocalCluster

In [2]:
D = 16 # number of dims
a = 1.
b = 0.5
lower = np.full(D, -30.) # lower bound of the prior for x_1, ...
upper = np.full(D, 30.) # upper bound of the prior for x_1, ...
lower[0] = -4 # lower bound of the prior for x_0
upper[0] = 4 # upper bound of the prior for x_0
bound = np.array((lower, upper)).T
diff = bound[:, 1] - bound[:, 0]
const = np.sum(np.log(diff)) # normalization of the flat prior

def logp(x):
    n = x.shape[-1]
    _a = -0.5 * x[..., 0]**2 / a**2
    _b = -0.5 * np.sum(x[..., 1:]**2, axis=-1) * np.exp(-2 * b * x[..., 0])
    _c = (-0.5 * np.log(2 * np.pi * a**2) - 
          0.5 * (n - 1) * np.log(2 * np.pi) - (n - 1) * b * x[..., 0])
    return _a + _b + _c - const

def grad(x):
    n = x.shape[-1]
    foo = -x / np.insert(np.full((*x.shape[:-1], n - 1), 
                                 np.exp(2 * b * x[..., 0])), 0, a**2, axis=-1)
    foo[0] += b * np.sum(x[..., 1:]**2, axis=-1) * np.exp(-2 * b * x[..., 0])
    foo[0] -= (n - 1) * b
    return foo

In [3]:
den = bf.DensityLite(logp=logp, grad=grad, input_size=D, input_scales=bound,
                     hard_bounds=True)
np.random.seed(0)
x = np.random.randn(4, D)

In [4]:
cluster = LocalCluster(n_workers=4, threads_per_worker=1)
client = Client(cluster)
client



0,1
Client  Scheduler: tcp://127.0.0.1:59916  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 4  Cores: 4  Memory: 17.03 GB




#### **the dask dashboard says most of the time is used to deserialize DensityLite**

In [5]:
%time foo = client.gather(client.map(den, x))
np.asarray(foo)



CPU times: user 172 ms, sys: 93.8 ms, total: 266 ms
Wall time: 4.13 s


array([-94.2455445 , -92.8864777 , -95.90057691, -86.70286387])

#### **this does not make sense, since serializing and deserializing DensityLite only needs a few milliseconds**

In [6]:
import cloudpickle

%time pickled = cloudpickle.dumps(den)
%time unpickled = cloudpickle.loads(pickled)
unpickled(x)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 2.28 ms
CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 159 µs


array([-94.2455445 , -92.8864777 , -95.90057691, -86.70286387])

#### **as comparison, the speed of multiprocess (which uses dill to serialize / deserialize) seems more reasonable**

In [7]:
with multiprocess.Pool(4) as pool:
    %time foo = pool.map(den, x)
np.asarray(foo)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 27.1 ms


array([-94.2455445 , -92.8864777 , -95.90057691, -86.70286387])