In [1]:
import xarray as xr
import numpy as np
import dask.array as da

In [2]:
coords = {
    "time": np.arange(100),
    "lat": np.arange(100),
    "lon": np.arange(100),
}

In [3]:
%%timeit
data_dasked = xr.DataArray(
    dims=["time", "lat", "lon"],
    coords=coords,
    data=da.empty((100, 100, 100)),
)

603 µs ± 24.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [4]:
%%timeit
data_numpy = xr.DataArray(
    dims=["time", "lat", "lon"],
    coords=coords,
    data=np.empty((100, 100, 100)),
)

202 µs ± 1.25 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


---

# Now we define a chunk


In [5]:
%%timeit
xr.DataArray(
    dims=["time", "lat", "lon"],
    coords=coords,
    data=da.empty((100, 100, 100)),
    name="dask",
    attrs={"units": "m"},
).chunk({"time": 10, "lat": 10, "lon": 10})

4.27 ms ± 19.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [6]:
%%timeit
xr.DataArray(
    dims=["time", "lat", "lon"],
    coords=coords,
    data=np.empty((100, 100, 100)),
    name="numpy",
    attrs={"units": "m"},
).chunk({"time": 10, "lat": 10, "lon": 10})

5.62 ms ± 12.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
%%timeit
xr.DataArray(
    dims=["time", "lat", "lon"],
    coords=coords,
    data=da.empty((100, 100, 100), chunks=(10, 10, 10)),
    name="dask_already_chunked",
    attrs={"units": "m"},
)

590 µs ± 8.98 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## Results

We can see that the empty dask array that is already chunked at creation is the fastest to compute.
