### Handle ETH Canopy Height dataset with Zampy
Demo notebook for developers.

Import packages and configure paths.

In [1]:
import numpy as np
from zampy.datasets import EthCanopyHeight
from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds
from pathlib import Path

work_dir = Path("/home/yangliu/EcoExtreML/temp")
download_dir = Path(work_dir, "download")
ingest_dir = Path(work_dir, "ingest")
times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-12-31"))
bbox_demo = SpatialBounds(54, 6, 51, 3)

Download dataset.

In [2]:
canopy_height_dataset = EthCanopyHeight()
canopy_height_dataset.download(
    download_dir=download_dir,
    time_bounds=times,
    spatial_bounds=bbox_demo,
    variable_names=["canopy-height"],
)

File 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.tif' already exists, skipping...


True

Data ingestion to the unified format in `zampy`.

In [3]:
canopy_height_dataset.ingest(download_dir, ingest_dir)

File 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc' already exists, skipping...


True

In [4]:
ds = canopy_height_dataset.load(
    ingest_dir=ingest_dir,
    time_bounds=times,
    spatial_bounds=bbox_demo,
    variable_names=["canopy-height"],
)

In [5]:
ds

Unnamed: 0,Array,Chunk
Bytes,4.83 GiB,137.33 MiB
Shape,"(1, 36000, 36000)","(1, 6000, 6000)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.83 GiB 137.33 MiB Shape (1, 36000, 36000) (1, 6000, 6000) Dask graph 36 chunks in 2 graph layers Data type float32 numpy.ndarray",36000  36000  1,

Unnamed: 0,Array,Chunk
Bytes,4.83 GiB,137.33 MiB
Shape,"(1, 36000, 36000)","(1, 6000, 6000)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [6]:
from zampy.datasets import converter

fname = "ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc"

ds_convert = converter.convert(ds, fname, "ALMA")

Variable 'canopy-height' is not included in 'ALMA' convention.
All variables already follow the ALMA convention or not included in the ALMA convention.
No conversion operation was performed on 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc'.


For testing purpose only. <br>
Since the canopy height dataset doesn't have variable included in ALMA convention, we just fake a dataset to trigger the conversion step.

In [7]:
# concerning the memory limit, we take a subset for testing
ds_test = ds_convert.sel(latitude=slice(51, 52), longitude=slice(3.0,4.0))

In [8]:
ds_test

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,137.33 MiB
Shape,"(1, 12000, 12000)","(1, 6000, 6000)"
Dask graph,4 chunks in 3 graph layers,4 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 549.32 MiB 137.33 MiB Shape (1, 12000, 12000) (1, 6000, 6000) Dask graph 4 chunks in 3 graph layers Data type float32 numpy.ndarray",12000  12000  1,

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,137.33 MiB
Shape,"(1, 12000, 12000)","(1, 6000, 6000)"
Dask graph,4 chunks in 3 graph layers,4 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [9]:
# re-chunk the data
ds_test = ds_test.chunk({"longitude": 200, "latitude":200})

In [10]:
ds_test

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,156.25 kiB
Shape,"(1, 12000, 12000)","(1, 200, 200)"
Dask graph,3600 chunks in 4 graph layers,3600 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 549.32 MiB 156.25 kiB Shape (1, 12000, 12000) (1, 200, 200) Dask graph 3600 chunks in 4 graph layers Data type float32 numpy.ndarray",12000  12000  1,

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,156.25 kiB
Shape,"(1, 12000, 12000)","(1, 200, 200)"
Dask graph,3600 chunks in 4 graph layers,3600 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [11]:
ds_test["Latent_heat_flux"] = ds_test["canopy-height"] * 0.5
ds_test["Latent_heat_flux"].attrs["units"] = "watt/decimeter**2"
ds_test

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,156.25 kiB
Shape,"(1, 12000, 12000)","(1, 200, 200)"
Dask graph,3600 chunks in 4 graph layers,3600 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 549.32 MiB 156.25 kiB Shape (1, 12000, 12000) (1, 200, 200) Dask graph 3600 chunks in 4 graph layers Data type float32 numpy.ndarray",12000  12000  1,

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,156.25 kiB
Shape,"(1, 12000, 12000)","(1, 200, 200)"
Dask graph,3600 chunks in 4 graph layers,3600 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,156.25 kiB
Shape,"(1, 12000, 12000)","(1, 200, 200)"
Dask graph,3600 chunks in 5 graph layers,3600 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 549.32 MiB 156.25 kiB Shape (1, 12000, 12000) (1, 200, 200) Dask graph 3600 chunks in 5 graph layers Data type float32 numpy.ndarray",12000  12000  1,

Unnamed: 0,Array,Chunk
Bytes,549.32 MiB,156.25 kiB
Shape,"(1, 12000, 12000)","(1, 200, 200)"
Dask graph,3600 chunks in 5 graph layers,3600 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [12]:
from dask.distributed import Client
client = Client(n_workers=4, threads_per_worker=2)
client

2023-06-09 11:16:46,312 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-ulwxsn6w', purging
2023-06-09 11:16:46,313 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-77cc_tps', purging
2023-06-09 11:16:46,313 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-6gw5lorw', purging
2023-06-09 11:16:46,314 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-mtqlswuy', purging


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 8,Total memory: 7.65 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:35275,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: Just now,Total memory: 7.65 GiB

0,1
Comm: tcp://127.0.0.1:35727,Total threads: 2
Dashboard: http://127.0.0.1:34907/status,Memory: 1.91 GiB
Nanny: tcp://127.0.0.1:35733,
Local directory: /tmp/dask-worker-space/worker-to0mbqbk,Local directory: /tmp/dask-worker-space/worker-to0mbqbk

0,1
Comm: tcp://127.0.0.1:42985,Total threads: 2
Dashboard: http://127.0.0.1:38611/status,Memory: 1.91 GiB
Nanny: tcp://127.0.0.1:43401,
Local directory: /tmp/dask-worker-space/worker-re18w7a6,Local directory: /tmp/dask-worker-space/worker-re18w7a6

0,1
Comm: tcp://127.0.0.1:45665,Total threads: 2
Dashboard: http://127.0.0.1:46063/status,Memory: 1.91 GiB
Nanny: tcp://127.0.0.1:41627,
Local directory: /tmp/dask-worker-space/worker-9tbvo4j8,Local directory: /tmp/dask-worker-space/worker-9tbvo4j8

0,1
Comm: tcp://127.0.0.1:37521,Total threads: 2
Dashboard: http://127.0.0.1:38335/status,Memory: 1.91 GiB
Nanny: tcp://127.0.0.1:38449,
Local directory: /tmp/dask-worker-space/worker-k_yft6oh,Local directory: /tmp/dask-worker-space/worker-k_yft6oh


In [13]:
ds_convert = converter.convert(ds_test, fname, "ALMA")

Variable 'canopy-height' is not included in 'ALMA' convention.
Conversion of dataset 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc' following ALMA convention is complete!


In [14]:
ds_convert.compute()

In [15]:
# check the conversion
assert np.allclose(ds_convert["Latent_heat_flux"][0,:20,:20].values / 100,
                   ds_test["Latent_heat_flux"][0,:20,:20].values)