# 5.11. Performing out-of-core computations on large arrays with Dask

In [None]:
import numpy as np
import dask.array as da
import memory_profiler

In [None]:
%load_ext memory_profiler

In [None]:
Y = da.random.normal(size=(10000, 10000),
                     chunks=(1000, 1000))

In [None]:
Y

In [None]:
Y.shape, Y.size, Y.chunks

In [None]:
mu = Y.mean(axis=0)
mu

In [None]:
mu[0].compute()

In [None]:
def f_numpy():
    X = np.random.normal(size=(10000, 10000))
    x = X.mean(axis=0)[0:100]

In [None]:
%%memit
f_numpy()

In [None]:
%%time
f_numpy()

In [None]:
def f_dask():
    Y = da.random.normal(size=(10000, 10000),
                         chunks=(1000, 1000))
    y = Y.mean(axis=0)[0:100].compute()

In [None]:
%%memit
f_dask()

In [None]:
%%time
f_dask()

In [None]:
def f_dask2():
    Y = da.random.normal(size=(10000, 10000),
                         chunks=(10000, 100))
    y = Y.mean(axis=0)[0:100].compute()

In [None]:
%%memit
f_dask2()

In [None]:
%%time
f_dask2()

In [None]:
from dask.distributed import Client

In [None]:
client = Client()

In [None]:
client

In [None]:
Y.sum().compute()

In [None]:
future = client.compute(Y.sum())

In [None]:
future

In [None]:
future.result()

In [None]:
huge = da.random.uniform(
    size=(1500000, 100000), chunks=(10000, 10000))
"Size in memory: %.1f GB" % (huge.nbytes / 1024 ** 3)

In [None]:
from dask.diagnostics import ProgressBar
# WARNING: this will take a very long time computing
# useless values. This is for pedagogical purposes
# only.
with ProgressBar():
    m = huge.mean().compute()

## Cleanup

In [None]:
!rm -rf dask-worker-space