# Performances with dask arrays

We compare the performance for automatically generated dask arrays.
This is a similar setup to the one used in the performance comparison with [numpy arrays](./perf_numpy.ipynb).
Here we do not compare with the numpy histograms function but with `dask-histogram`.

In [1]:
import boost_histogram as bh
import dask_histogram as dh
import dask.array as da
import xarray as xr
import xarray_histogram as xh
from numpy.testing import assert_allclose
from xhistogram.xarray import histogram as xhistogram

In [2]:
nbins = (100, 100)
ranges = ((-3, 3), (-3, 3))

axes = [bh.axis.Regular(nbins[i], *ranges[i]) for i in range(2)]
edges = [ax.edges for ax in axes]

In [3]:
np.random.seed(42)

chunk_size = 1_000_000
vals1d = da.random.normal(size=[10_000_000], chunks=(chunk_size)).astype(np.float32)
da_1d = xr.DataArray(vals1d, name='test_1d')

vals = da.random.normal(size=[2, 10_000_000], chunks=(1, chunk_size)).astype(np.float32)
da_2d = [xr.DataArray(vals[i], name='test_2d_{}'.format(i)) for i in range(2)]

## One-dimensional histogram (flattened)

In [4]:
answer = dh.factory(vals1d, axes=[axes[0]]).values()

### dask-histogram

In [5]:
%%timeit
h = dh.factory(vals1d, axes=[axes[0]])
assert_allclose(h.values(), answer)

209 ms ± 12.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### xhistogram

In [6]:
%%timeit
assert(not da_1d._in_memory)
h = xhistogram(da_1d, bins=edges[0], block_size=chunk_size)
assert_allclose(h.values, answer)

411 ms ± 15 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### xarray-histogram

In [7]:
%%timeit
assert(not da_1d._in_memory)
h = xh.histogram(da_1d, bins=axes[0])
assert_allclose(h.values, answer)

235 ms ± 26.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Two-dimensional (flattened)

In [8]:
answer = dh.factory(*vals, axes=axes).values()

### dask-histogram

In [9]:
%%timeit
h = dh.factory(*vals, axes=axes)
assert_allclose(h.values(), answer)

375 ms ± 15.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### xhistogram

In [10]:
%%timeit
assert(not da_2d[0]._in_memory)
h = xhistogram(*da_2d, bins=edges, block_size=chunk_size)
assert_allclose(h.values, answer)

754 ms ± 14.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### xarray-histogram

In [11]:
%%timeit
assert(not da_2d[0]._in_memory)
h = xh.histogram(da_2d, bins=axes)
assert_allclose(h.values, answer)

364 ms ± 11.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## One-dimensional (along second dimension)

In [12]:
vals_1d_t = da.random.normal(size=[3, 10_000_000], chunks=(1, chunk_size))
da_1d_t = xr.DataArray(vals_1d_t, name='test_1d', dims=['t', 'x'])

In [13]:
answer = xhistogram(da_1d_t, bins=edges[0], dim=['x']).load()

### xhistogram

In [14]:
%%timeit
h = xhistogram(da_1d_t, bins=edges[0], dim=['x'], block_size=chunk_size)
assert_allclose(h.values, answer.values)

1.06 s ± 11.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### xarray-histogram

In [15]:
%%timeit
h = xh.histogram(da_1d_t, bins=axes[0], dims=['x'])
assert_allclose(h.values.T, answer.values)

463 ms ± 12.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
