In [1]:
import os
import xarray as xr
import xdggs
import zarr
os.environ['DGGRID_PATH']='/home/dick/micromamba/envs/test/bin/dggrid'
from xdggs_dggrid4py.IGEO7 import IGEO7Index

## A simple step by step howto
This notebook showcases converting a 2D array dataset to a 1D dataset with DGGS (IGEO7) as an index. 
- prepare data
- conversion from 2D coordinates to DGGS cell id
- some use cases on DGGS

### Prepare Data

In [2]:
# data source : https://stacindex.org/catalogs/cubes-and-clouds-snow-cover#/BXM1FNA8yFQD4Mwf6jW28EX3DqXfYKUah?t=2
data = xr.open_dataset("https://esa.pages.eox.at/cubes-and-clouds-catalog/contributions/valentin.printemps@gmail.com/openEO_uint8.tif", band_as_variable=True, engine='rasterio',chunks='auto')
data

Unnamed: 0,Array,Chunk
Bytes,120.70 kiB,120.70 kiB
Shape,"(206, 150)","(206, 150)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 120.70 kiB 120.70 kiB Shape (206, 150) (206, 150) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",150  206,

Unnamed: 0,Array,Chunk
Bytes,120.70 kiB,120.70 kiB
Shape,"(206, 150)","(206, 150)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


We need to specify the attributes of DGGS for conversion; the content varies from different DGGS.
The full attributes content of IGEO7 is :
```python
    {    "grid_name": "igeo7",  
             "level": -1,  
          "src_epsg": "EPSG:32632", 
           "method" : "nearestpoint", # centroid or nearestpoint 
       "coordinate" : ["x","y"], # what are the coordinate name in the xarray
               "mp" : 1, # for multiprocessing, right now only for linux with pymp-pypi install manually
             "chunk" : (100,100),
    }
```
After that, we assign the attributes to one of the current coordinates, for this example, either x or y.

In [3]:
attrs={"grid_name": "igeo7",
        "level": -1,
        "src_epsg": "EPSG:32632",
        "method" : "nearestpoint",
        "coordinate" : ["x","y"]}
data['y'].attrs=attrs

### Conversion from 2D coordinates to DGGS cell id
To peform the conversion, we use the stack function from xarray to create a new Index with the class `IGEO7Index` provided.

In [4]:
%%time
dggs_data = data.stack(cell_ids=("x", "y"), index_cls=IGEO7Index)

x shape: ((150,)), y shape: ((206,))
Calculate Auto resolution
4937085.0,315065.0,4939135.0,316555.0
Total Bounds (EPSG:32632): [ 315065. 4937085.  316555. 4939135.]
Total Bounds (wgs84): [ 6.67045856 44.56335938  6.68994264 44.58218062]
Total Bounds Area (km^2): 3.2299606105177596
Area per center point (km^2): 0.000104529469596044
Auto resolution : 14
--- Multiprocessing 1 ---


  0%|          | 0/1 [00:00<?, ?it/s]

---Generate Cell ID with resolution 14 by nearestpoint method, x batch:1, y batch:1---
cell generation time: (0.9296696186065674)
Cell ID calcultion completed, unique cell id :28463
CPU times: user 241 ms, sys: 8.31 ms, total: 249 ms
Wall time: 1.03 s


  return ogr_read(


After conversion, the dataset is transformed into one dimension, as shown below.
However, there are some drawbacks to using the stack : 
- After conversion, the index can be used immediately, as it becomes an multi-index `(cell_ids, x, y)`
- The attributes can't be carried to the newly created index

So we have to : 
1. assign back the attributes manually to cell_ids
2. save the dataset to disk and load it back to decomposit the multi-index

In [5]:
dggs_data

Unnamed: 0,Array,Chunk
Bytes,120.70 kiB,120.70 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 120.70 kiB 120.70 kiB Shape (30900,) (30900,) Dask graph 1 chunks in 4 graph layers Data type float32 numpy.ndarray",30900  1,

Unnamed: 0,Array,Chunk
Bytes,120.70 kiB,120.70 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [7]:
# Very important ! copy the attributes to cell_ids
dggs_data.cell_ids.attrs = dggs_data.xindexes.get('cell_ids')._grid.to_dict()
# save it to zarr
compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2)
dggs_data.to_zarr(f'dggs_data.zar',encoding={"band_1": {"compressor": compressor},"cell_ids": {"compressor": compressor}})

<xarray.backends.zarr.ZarrStore at 0x7f62629b1510>

After we load the dataset from the disk again, the index becomes a single index, but it is loaded as PandasIndex.
We can use the `xdggs.decode` function to re-initialize it to the IGEO7 index.

In [8]:
dggs_zarr = xr.open_zarr('./dggs_data.zar')
dggs_zarr 

Unnamed: 0,Array,Chunk
Bytes,241.41 kiB,241.41 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 241.41 kiB 241.41 kiB Shape (30900,) (30900,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",30900  1,

Unnamed: 0,Array,Chunk
Bytes,241.41 kiB,241.41 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,241.41 kiB,241.41 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 241.41 kiB 241.41 kiB Shape (30900,) (30900,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",30900  1,

Unnamed: 0,Array,Chunk
Bytes,241.41 kiB,241.41 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,120.70 kiB,120.70 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 120.70 kiB 120.70 kiB Shape (30900,) (30900,) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",30900  1,

Unnamed: 0,Array,Chunk
Bytes,120.70 kiB,120.70 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [9]:
dggs_zarr = xdggs.decode(dggs_zarr) 
dggs_zarr

Unnamed: 0,Array,Chunk
Bytes,241.41 kiB,241.41 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 241.41 kiB 241.41 kiB Shape (30900,) (30900,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",30900  1,

Unnamed: 0,Array,Chunk
Bytes,241.41 kiB,241.41 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,241.41 kiB,241.41 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 241.41 kiB 241.41 kiB Shape (30900,) (30900,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",30900  1,

Unnamed: 0,Array,Chunk
Bytes,241.41 kiB,241.41 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,120.70 kiB,120.70 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 120.70 kiB 120.70 kiB Shape (30900,) (30900,) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",30900  1,

Unnamed: 0,Array,Chunk
Bytes,120.70 kiB,120.70 kiB
Shape,"(30900,)","(30900,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [10]:
dggs_zarr = dggs_zarr.drop_vars(['x','y'])
dggs_zarr.dggs.sel_latlon(latitude=[44.56375059,44.56369803],longitude=[6.68935115])

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 8 B 8 B Shape (2,) (2,) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [11]:
dggs_zarr.dggs.cell_centers()

  return ogr_read(


In [12]:
dggs_zarr.dggs.cell_boundaries()

  return ogr_read(
