### Handle ETH Canopy Height dataset with Zampy
Demo notebook for developers.

Import packages and configure paths.

In [1]:
import numpy as np
from zampy.datasets import EthCanopyHeight
from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds
from pathlib import Path

work_dir = Path("Path2work_dir")
download_dir = Path(work_dir, "download")
ingest_dir = Path(work_dir, "ingest")
times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-12-31"))
bbox_demo = SpatialBounds(54, 6, 51, 3)

Download dataset.

In [2]:
canopy_height_dataset = EthCanopyHeight()
canopy_height_dataset.download(
    download_dir=download_dir,
    time_bounds=times,
    spatial_bounds=bbox_demo,
    variable_names=["canopy-height"],
)

File 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.tif' already exists, skipping...


True

Data ingestion to the unified format in `zampy`.

In [3]:
canopy_height_dataset.ingest(download_dir, ingest_dir)

File 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc' already exists, skipping...


True

In [4]:
ds = canopy_height_dataset.load(
    ingest_dir=ingest_dir,
    time_bounds=times,
    spatial_bounds=bbox_demo,
    variable_names=["canopy-height"],
)

In [5]:
ds

Unnamed: 0,Array,Chunk
Bytes,4.83 GiB,137.33 MiB
Shape,"(1, 36000, 36000)","(1, 6000, 6000)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.83 GiB 137.33 MiB Shape (1, 36000, 36000) (1, 6000, 6000) Dask graph 36 chunks in 2 graph layers Data type float32 numpy.ndarray",36000  36000  1,

Unnamed: 0,Array,Chunk
Bytes,4.83 GiB,137.33 MiB
Shape,"(1, 36000, 36000)","(1, 6000, 6000)"
Dask graph,36 chunks in 2 graph layers,36 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [6]:
convert_dir = Path(work_dir, "convert")

canopy_height_dataset.convert(
    ingest_dir=ingest_dir,
    convert_dir=convert_dir,
    convention="ALMA",
)

Start converting data to follow the 'ALMA' convention.
Start processing file `ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc`.
Variable 'canopy-height' is not included in 'ALMA' convention.
All variables already follow the ALMA convention or not included in the ALMA convention.
No conversion operation was performed on 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc'.


For testing purpose only. <br>
Since the canopy height dataset doesn't have variable included in ALMA convention, we just fake a dataset to trigger the conversion step.

In [7]:
import xarray as xr

ds_fake = xr.Dataset(
    {
        "Qle":(
               ("lat", "lon"),
               np.random.rand(4).reshape(2, 2) * 5,)
    },
        coords={"lat": [10, 20], "lon": [150, 160]},
)

ds_fake.Qle.attrs["units"] = "watt/centimeter**2"
ds_fake

In [8]:
# save the fake datasets to ingest directory (with a different file name)
ds_fake.to_netcdf(
    Path(
        ingest_dir,
        "eth-canopy-height",
        "ETH_GlobalCanopyHeight_10m_2020_N51E003_fake_Map.nc",
    )
)

In [9]:
# Let's run the convert code again
canopy_height_dataset.convert(
    ingest_dir=ingest_dir,
    convert_dir=convert_dir,
    convention="ALMA",
)

Start converting data to follow the 'ALMA' convention.
Start processing file `ETH_GlobalCanopyHeight_10m_2020_N51E003_fake_Map.nc`.
Conversion of dataset 'ETH_GlobalCanopyHeight_10m_2020_N51E003_fake_Map.nc' following ALMA convention is complete!
Start processing file `ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc`.
Variable 'canopy-height' is not included in 'ALMA' convention.
All variables already follow the ALMA convention or not included in the ALMA convention.
No conversion operation was performed on 'ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc'.


In [10]:
# load converted dataset
ds_convert = xr.load_dataset(
    Path(
        convert_dir,
        "eth-canopy-height",
        "ETH_GlobalCanopyHeight_10m_2020_N51E003_fake_Map.nc",
    )
)
ds_convert

In [11]:
# check the conversion
assert np.allclose(ds_convert["Qle"].values / 10000, ds_fake["Qle"].values)