### Handle ETH Canopy Height dataset with Zampy
Demo notebook for developers.

Import packages and configure paths.

In [1]:
import numpy as np
from zampy.datasets import EthCanopyHeight
from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds
from pathlib import Path

work_dir = Path("/home/yangliu/EcoExtreML/temp")
download_dir = Path(work_dir, "download")
ingest_dir = Path(work_dir, "ingest")
times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-12-31"))
bbox_demo = SpatialBounds(54, 6, 51, 3)

Download dataset.

In [2]:
canopy_height_dataset = EthCanopyHeight()
canopy_height_dataset.download(
    download_dir=download_dir,
    time_bounds=times,
    spatial_bounds=bbox_demo,
    variable_names=["canopy-height"],
)

File 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.tif' already exists, skipping...


True

Data ingestion to the unified format in `zampy`.

In [3]:
canopy_height_dataset.ingest(download_dir, ingest_dir)

File 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc' already exists, skipping...


True

In [4]:
ds = canopy_height_dataset.load(
    ingest_dir=ingest_dir,
    time_bounds=times,
    spatial_bounds=bbox_demo,
    variable_names=["canopy-height"],
)

In [5]:
from zampy.datasets import converter

ds_convert = converter.convert(ds, canopy_height_dataset, "ALMA")

Variable 'canopy-height' is not included in 'ALMA' convention.
All variables already follow the ALMA convention or not included in the ALMA convention.
No conversion operation was performed on 'eth-canopy-height'.


For testing purpose only. <br>
Since the canopy height dataset doesn't have variable included in ALMA convention, we just fake a dataset to trigger the conversion step.

In [6]:
# concerning the memory limit, we take a subset for testing
ds_test = ds_convert.sel(latitude=slice(51, 52), longitude=slice(3.0,4.0))

In [7]:
ds_test

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,15.26 MiB
Shape,"(1, 12000, 12000)","(1, 2000, 2000)"
Dask graph,36 chunks in 3 graph layers,36 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 549.32 MiB 15.26 MiB Shape (1, 12000, 12000) (1, 2000, 2000) Dask graph 36 chunks in 3 graph layers Data type float32 numpy.ndarray",12000  12000  1,

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,15.26 MiB
Shape,"(1, 12000, 12000)","(1, 2000, 2000)"
Dask graph,36 chunks in 3 graph layers,36 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [8]:
ds_test["Latent_heat_flux"] = ds_test["canopy-height"] * 0.5
ds_test["Latent_heat_flux"].attrs["units"] = "watt/decimeter**2"
ds_test

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,15.26 MiB
Shape,"(1, 12000, 12000)","(1, 2000, 2000)"
Dask graph,36 chunks in 3 graph layers,36 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 549.32 MiB 15.26 MiB Shape (1, 12000, 12000) (1, 2000, 2000) Dask graph 36 chunks in 3 graph layers Data type float32 numpy.ndarray",12000  12000  1,

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,15.26 MiB
Shape,"(1, 12000, 12000)","(1, 2000, 2000)"
Dask graph,36 chunks in 3 graph layers,36 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,15.26 MiB
Shape,"(1, 12000, 12000)","(1, 2000, 2000)"
Dask graph,36 chunks in 4 graph layers,36 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 549.32 MiB 15.26 MiB Shape (1, 12000, 12000) (1, 2000, 2000) Dask graph 36 chunks in 4 graph layers Data type float32 numpy.ndarray",12000  12000  1,

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,15.26 MiB
Shape,"(1, 12000, 12000)","(1, 2000, 2000)"
Dask graph,36 chunks in 4 graph layers,36 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [9]:
from dask.distributed import Client
client = Client(n_workers=4, threads_per_worker=2)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 8,Total memory: 7.65 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:43925,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: Just now,Total memory: 7.65 GiB

0,1
Comm: tcp://127.0.0.1:45219,Total threads: 2
Dashboard: http://127.0.0.1:34539/status,Memory: 1.91 GiB
Nanny: tcp://127.0.0.1:38903,
Local directory: /tmp/dask-scratch-space/worker-s1vsdw7q,Local directory: /tmp/dask-scratch-space/worker-s1vsdw7q

0,1
Comm: tcp://127.0.0.1:44605,Total threads: 2
Dashboard: http://127.0.0.1:39505/status,Memory: 1.91 GiB
Nanny: tcp://127.0.0.1:41701,
Local directory: /tmp/dask-scratch-space/worker-pr0vuots,Local directory: /tmp/dask-scratch-space/worker-pr0vuots

0,1
Comm: tcp://127.0.0.1:38147,Total threads: 2
Dashboard: http://127.0.0.1:42193/status,Memory: 1.91 GiB
Nanny: tcp://127.0.0.1:36263,
Local directory: /tmp/dask-scratch-space/worker-ejvqu9cr,Local directory: /tmp/dask-scratch-space/worker-ejvqu9cr

0,1
Comm: tcp://127.0.0.1:45829,Total threads: 2
Dashboard: http://127.0.0.1:45041/status,Memory: 1.91 GiB
Nanny: tcp://127.0.0.1:35057,
Local directory: /tmp/dask-scratch-space/worker-x_tdvxl7,Local directory: /tmp/dask-scratch-space/worker-x_tdvxl7


In [11]:
ds_convert = converter.convert(ds_test, canopy_height_dataset, "ALMA")

Variable 'canopy-height' is not included in 'ALMA' convention.
Conversion of dataset 'eth-canopy-height' following ALMA convention is complete!


In [12]:
ds_convert.compute()

In [15]:
# check the conversion
assert np.allclose(ds_convert["Latent_heat_flux"][0,:20,:20].values / 100,
                   ds_test["Latent_heat_flux"][0,:20,:20].values)