In [1]:
import numpy as np
import dask.array as da

In [2]:
size_tuple = (2000,2000)
np_arr = np.random.randint(10, size=size_tuple)
np_arr2 = np.random.randint(10, size=size_tuple)

## Let's perform a complex random operation over these arrays

In [3]:
%time (((np_arr * 2).T)**2 + np_arr2 + 100).sum(axis=1).mean()

CPU times: user 73.8 ms, sys: 0 ns, total: 73.8 ms
Wall time: 121 ms


437082.321

## Let's perform the same operation with Dask Arrays

In [10]:
chunks_tuple = (500, 500)
da_arr = da.from_array(np_arr, chunks=chunks_tuple)
da_arr2 = da.from_array(np_arr2, chunks=chunks_tuple)

In [11]:
%time (((da_arr * 2).T)**2 + da_arr2 + 100).sum(axis=1).mean().compute()

CPU times: user 84.2 ms, sys: 19.1 ms, total: 103 ms
Wall time: 37.9 ms


437082.321

##  Numpy won't be able to even load this huge array

```
size_tuple = (50000, 50000)
np_arr = np.random.randint(10, size=size_tuple)
np_arr2 = np.random.randint(10, size=size_tuple)
```

## Dask is able to load and compute this data

In [6]:
chunks_tuple = (5000, 5000)
da_arr = da.random.randint(10, size=size_tuple,
                           chunks=chunks_tuple)
da_arr2 = da.random.randint(10, size=size_tuple,
                            chunks=chunks_tuple)

In [7]:
%time (((da_arr * 2).T)**2 + da_arr2 + 100).sum(axis=1).mean().compute()

CPU times: user 146 ms, sys: 6.29 ms, total: 152 ms
Wall time: 149 ms


436928.2335

In [8]:
da_arr.nbytes/1e+9

0.032