### Handle ETH Canopy Height dataset with Zampy
Demo notebook for users and developers.

Import packages and configure paths.

In [1]:
import numpy as np
from zampy.datasets.catalog import EthCanopyHeight
from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds
from pathlib import Path

work_dir = Path("/path_to_work_directory")
download_dir = work_dir / "download"
ingest_dir = work_dir / "ingest"
times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-12-31"))
bbox_demo = SpatialBounds(54, 6, 51, 3)

Download dataset.

In [2]:
canopy_height_dataset = EthCanopyHeight()
canopy_height_dataset.download(
    download_dir=download_dir,
    time_bounds=times,
    spatial_bounds=bbox_demo,
    variable_names=["height_of_vegetation"],
)

File 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.tif' already exists, skipping...


True

Data ingestion to the unified format in `zampy`.

In [3]:
canopy_height_dataset.ingest(download_dir, ingest_dir)

File 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc' already exists, skipping...
File 'ETH_GlobalCanopyHeight_10m_2020_N48E003_Map.nc' already exists, skipping...
File 'ETH_GlobalCanopyHeight_10m_2020_N48E003_Map_SD.nc' already exists, skipping...
File 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map_SD.nc' already exists, skipping...


True

In [4]:
ds = canopy_height_dataset.load(
    ingest_dir=ingest_dir,
    time_bounds=times,
    spatial_bounds=bbox_demo,
    variable_names=["height_of_vegetation"],
    resolution=0.05,
)

In [5]:
ds

Unnamed: 0,Array,Chunk
Bytes,14.54 kiB,14.54 kiB
Shape,"(1, 61, 61)","(1, 61, 61)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 14.54 kiB 14.54 kiB Shape (1, 61, 61) (1, 61, 61) Dask graph 1 chunks in 10 graph layers Data type float32 numpy.ndarray",61  61  1,

Unnamed: 0,Array,Chunk
Bytes,14.54 kiB,14.54 kiB
Shape,"(1, 61, 61)","(1, 61, 61)"
Dask graph,1 chunks in 10 graph layers,1 chunks in 10 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [6]:
from zampy.datasets import converter

ds_convert = converter.convert(ds, canopy_height_dataset, "ALMA")

height_of_vegetation renamed to Hveg.


No conversion operation was performed on 'eth-canopy-height'.
  ds_convert = converter.convert(ds, canopy_height_dataset, "ALMA")


For testing purpose only. <br>
Since the canopy height dataset doesn't need to have a unit conversion performed, we just fake a dataset to trigger the conversion step.

In [7]:
# concerning the memory limit, we take a subset for testing
ds_test = ds.sel(latitude=slice(51, 52), longitude=slice(3.0,4.0))

In [8]:
ds_test

Unnamed: 0,Array,Chunk
Bytes,1.72 kiB,1.72 kiB
Shape,"(1, 21, 21)","(1, 21, 21)"
Dask graph,1 chunks in 11 graph layers,1 chunks in 11 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.72 kiB 1.72 kiB Shape (1, 21, 21) (1, 21, 21) Dask graph 1 chunks in 11 graph layers Data type float32 numpy.ndarray",21  21  1,

Unnamed: 0,Array,Chunk
Bytes,1.72 kiB,1.72 kiB
Shape,"(1, 21, 21)","(1, 21, 21)"
Dask graph,1 chunks in 11 graph layers,1 chunks in 11 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [9]:
ds_test["Latent_heat_flux"] = ds_test["height_of_vegetation"] * 0.5
ds_test["Latent_heat_flux"].attrs["units"] = "watt/decimeter**2"
ds_test

Unnamed: 0,Array,Chunk
Bytes,1.72 kiB,1.72 kiB
Shape,"(1, 21, 21)","(1, 21, 21)"
Dask graph,1 chunks in 11 graph layers,1 chunks in 11 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.72 kiB 1.72 kiB Shape (1, 21, 21) (1, 21, 21) Dask graph 1 chunks in 11 graph layers Data type float32 numpy.ndarray",21  21  1,

Unnamed: 0,Array,Chunk
Bytes,1.72 kiB,1.72 kiB
Shape,"(1, 21, 21)","(1, 21, 21)"
Dask graph,1 chunks in 11 graph layers,1 chunks in 11 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.72 kiB,1.72 kiB
Shape,"(1, 21, 21)","(1, 21, 21)"
Dask graph,1 chunks in 12 graph layers,1 chunks in 12 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.72 kiB 1.72 kiB Shape (1, 21, 21) (1, 21, 21) Dask graph 1 chunks in 12 graph layers Data type float32 numpy.ndarray",21  21  1,

Unnamed: 0,Array,Chunk
Bytes,1.72 kiB,1.72 kiB
Shape,"(1, 21, 21)","(1, 21, 21)"
Dask graph,1 chunks in 12 graph layers,1 chunks in 12 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [10]:
from dask.distributed import Client
client = Client(n_workers=4, threads_per_worker=2)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 8,Total memory: 15.33 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:39905,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: Just now,Total memory: 15.33 GiB

0,1
Comm: tcp://127.0.0.1:32795,Total threads: 2
Dashboard: http://127.0.0.1:36437/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:35425,
Local directory: /tmp/dask-scratch-space/worker-16z3qkvm,Local directory: /tmp/dask-scratch-space/worker-16z3qkvm

0,1
Comm: tcp://127.0.0.1:41021,Total threads: 2
Dashboard: http://127.0.0.1:39457/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:37585,
Local directory: /tmp/dask-scratch-space/worker-5fgxu36j,Local directory: /tmp/dask-scratch-space/worker-5fgxu36j

0,1
Comm: tcp://127.0.0.1:37195,Total threads: 2
Dashboard: http://127.0.0.1:39719/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:45501,
Local directory: /tmp/dask-scratch-space/worker-hzpagfi2,Local directory: /tmp/dask-scratch-space/worker-hzpagfi2

0,1
Comm: tcp://127.0.0.1:36275,Total threads: 2
Dashboard: http://127.0.0.1:39317/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:44503,
Local directory: /tmp/dask-scratch-space/worker-pnen7itr,Local directory: /tmp/dask-scratch-space/worker-pnen7itr


In [11]:
ds_convert = converter.convert(ds_test, canopy_height_dataset, "ALMA")

height_of_vegetation renamed to Hveg.
Latent_heat_flux renamed to Qle.
Conversion of dataset 'eth-canopy-height' following ALMA convention is complete!


In [12]:
ds_convert.compute()

In [13]:
# check the conversion
assert np.allclose(
    ds_convert["Qle"][0,:20,:20].values / 100,
    ds_test["Latent_heat_flux"][0,:20,:20].values,
    equal_nan=True,
)