In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import dask.array as da  # need to have dask.array installed, although not directly using it here.
import xarray as xr
import xesmf as xe

In [2]:
ds = xr.tutorial.open_dataset("air_temperature", chunks={"time": 500})
ds


Unnamed: 0,Array,Chunk
Bytes,29.52 MiB,5.05 MiB
Shape,"(2920, 25, 53)","(500, 25, 53)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 29.52 MiB 5.05 MiB Shape (2920, 25, 53) (500, 25, 53) Dask graph 6 chunks in 2 graph layers Data type float64 numpy.ndarray",53  25  2920,

Unnamed: 0,Array,Chunk
Bytes,29.52 MiB,5.05 MiB
Shape,"(2920, 25, 53)","(500, 25, 53)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [None]:
ds.chunks # check the chunking of the data
# output is the chunk size along each dimension

Frozen({'time': (500, 500, 500, 500, 500, 420), 'lat': (25,), 'lon': (53,)})

Note: .data returns the underlying array (which may be a NumPy array or a lazy Dask array), while .values always returns a NumPy array and forces computation if needed.

In [4]:
ds["air"].data

Unnamed: 0,Array,Chunk
Bytes,29.52 MiB,5.05 MiB
Shape,"(2920, 25, 53)","(500, 25, 53)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 29.52 MiB 5.05 MiB Shape (2920, 25, 53) (500, 25, 53) Dask graph 6 chunks in 2 graph layers Data type float64 numpy.ndarray",53  25  2920,

Unnamed: 0,Array,Chunk
Bytes,29.52 MiB,5.05 MiB
Shape,"(2920, 25, 53)","(500, 25, 53)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [5]:
ds_out = xr.Dataset(
    {
        "lat": (["lat"], np.arange(16, 75, 1.0)),
        "lon": (["lon"], np.arange(200, 330, 1.5)),
    }
)

regridder = xe.Regridder(ds, ds_out, "bilinear")
regridder

xESMF Regridder 
Regridding algorithm:       bilinear 
Weight filename:            bilinear_25x53_59x87.nc 
Reuse pre-computed weights? False 
Input grid shape:           (25, 53) 
Output grid shape:          (59, 87) 
Periodic in longitude?      False

In [8]:
# 1. Define the regridding task (does not compute yet, just builds a Dask task graph)
%time ds_out = regridder(ds)

# Print the regridded Dataset info (variables are still dask.arrays, not computed yet)
print(ds_out)

# 2. Inspect the underlying data for the "air" variable
# This will show a dask.array<...>, meaning it’s still lazily evaluated
print(ds_out["air"].data)

# 3. Trigger the actual computation: execute the Dask task graph
# This step really applies the regridding and loads the data into memory
%time result = ds_out['air'].compute()

# 4. Check the result: now it’s a NumPy array in memory
# Output will look like (numpy.ndarray, (new_shape)), e.g. (numpy.ndarray, (59, 87, 2920))
type(result.data), result.data.shape


CPU times: user 11.8 ms, sys: 0 ns, total: 11.8 ms
Wall time: 10.7 ms
<xarray.Dataset> Size: 120MB
Dimensions:  (time: 2920, lat: 59, lon: 87)
Coordinates:
  * time     (time) datetime64[ns] 23kB 2013-01-01 ... 2014-12-31T18:00:00
  * lat      (lat) float64 472B 16.0 17.0 18.0 19.0 20.0 ... 71.0 72.0 73.0 74.0
  * lon      (lon) float64 696B 200.0 201.5 203.0 204.5 ... 326.0 327.5 329.0
Data variables:
    air      (time, lat, lon) float64 120MB dask.array<chunksize=(500, 59, 87), meta=np.ndarray>
Attributes:
    regrid_method:  bilinear
dask.array<sum-aggregate, shape=(2920, 59, 87), dtype=float64, chunksize=(500, 59, 87), chunktype=numpy.ndarray>
CPU times: user 707 ms, sys: 108 ms, total: 814 ms
Wall time: 291 ms


(numpy.ndarray, (2920, 59, 87))

# Chunking Behaviour

## Re-chunking the dataset
- `chunk({...})` changes how the data is split into chunks for Dask.  
- `{"lat": 25, "lon": 25, "time": -1}` means:  
  - Split latitude (`lat`) into chunks of 25 points.  
  - Split longitude (`lon`) into chunks of 25 points.  
  - Keep all time steps (`time = -1`) in a single chunk.  
- It’s not “cutting inside existing chunks,” but rather redefining the chunk sizes for the entire array, and Dask rebuilds a new computation graph to match.

In [10]:
ds_3lon = ds.chunk({"lat": 25, "lon": 25, "time": -1})
ds_3lon.air.data


Unnamed: 0,Array,Chunk
Bytes,29.52 MiB,13.92 MiB
Shape,"(2920, 25, 53)","(2920, 25, 25)"
Dask graph,3 chunks in 3 graph layers,3 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 29.52 MiB 13.92 MiB Shape (2920, 25, 53) (2920, 25, 25) Dask graph 3 chunks in 3 graph layers Data type float64 numpy.ndarray",53  25  2920,

Unnamed: 0,Array,Chunk
Bytes,29.52 MiB,13.92 MiB
Shape,"(2920, 25, 53)","(2920, 25, 25)"
Dask graph,3 chunks in 3 graph layers,3 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [12]:
print(ds.chunks)
print(ds_3lon.chunks)  # note the time dimension is not chunked now

Frozen({'time': (500, 500, 500, 500, 500, 420), 'lat': (25,), 'lon': (53,)})
Frozen({'time': (2920,), 'lat': (25,), 'lon': (25, 25, 3)})


## Apply the Regridder
-A Regridder can be reused after re-chunking because it depends only on the grid coordinates, not on how the data is chunked.

In [14]:
ds_3lon_out = regridder(ds_3lon)  # Regridding ds_spatial
ds_3lon_out["air"].data

Unnamed: 0,Array,Chunk
Bytes,114.35 MiB,32.86 MiB
Shape,"(2920, 59, 87)","(2920, 59, 25)"
Dask graph,4 chunks in 9 graph layers,4 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 114.35 MiB 32.86 MiB Shape (2920, 59, 87) (2920, 59, 25) Dask graph 4 chunks in 9 graph layers Data type float64 numpy.ndarray",87  59  2920,

Unnamed: 0,Array,Chunk
Bytes,114.35 MiB,32.86 MiB
Shape,"(2920, 59, 87)","(2920, 59, 25)"
Dask graph,4 chunks in 9 graph layers,4 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


## Define the Chunk Output When Regridding

In [15]:
ds_spatial_out = regridder(ds_3lon, output_chunks={"lat": 10, "lon": 10})
ds_spatial_out["air"].data

Unnamed: 0,Array,Chunk
Bytes,114.35 MiB,2.23 MiB
Shape,"(2920, 59, 87)","(2920, 10, 10)"
Dask graph,54 chunks in 9 graph layers,54 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 114.35 MiB 2.23 MiB Shape (2920, 59, 87) (2920, 10, 10) Dask graph 54 chunks in 9 graph layers Data type float64 numpy.ndarray",87  59  2920,

Unnamed: 0,Array,Chunk
Bytes,114.35 MiB,2.23 MiB
Shape,"(2920, 59, 87)","(2920, 10, 10)"
Dask graph,54 chunks in 9 graph layers,54 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
