In [1]:
import netCDF4
import xarray as xr

# IGBP data: from NetCDF to Zarr

The IGBP/landcover dataset is provided as chunked NetCDF4/HDF5 data, it was reprojected with ERA5Land in crib. The original data is ESACCI-LC-L4-LCCS-Map-300m-P1Y-*-v2.0.7cds.nc. It is global.

## Input variables

In [2]:
year = 2015

In [3]:
ROOT_DIR = '/gpfs/work2/0/ttse0619'
IGBP_PATH = (
    f'{ROOT_DIR}/qianqian/global_data_Qianqian/'
    f'1input_data/{year}global/igbp/landcover10km_global.nc'
)
OUT_PATH = (
    f'{ROOT_DIR}/qianqian/global_data_Qianqian/'
    f'1input_data/{year}global/igbp/landcover.zarr'
)

In [4]:
ERA5_PATH = (
    f'{ROOT_DIR}/francesco/Projects/EcoExtreML/Data/'
    f'1input_data/{year}global/era5land/era5land.zarr'
)

## Converting the dataset

Find out chunking strategy of the dataset:

In [5]:
def get_chunking(nc_path):
    with netCDF4.Dataset(nc_path) as nc:
        chunks = {
            name: dict(zip(var.dimensions, var.chunking()))
            for name, var in nc.variables.items()
        }
    return chunks

In [6]:
chunks = get_chunking(IGBP_PATH)
chunks

{'x': {'x': 'c'},
 'y': {'y': 'c'},
 'time': {'time': 'c'},
 'spatial_ref': {},
 'lccs_class': {'time': 1, 'y': 901, 'x': 1800}}

We now open the landcover NetCDF files using the same chunking scheme as in the NetCDF file:

In [7]:
landcover = xr.open_dataset(IGBP_PATH, chunks=chunks['lccs_class'])
landcover = landcover.squeeze('time')  # drop time dimension

In [8]:
landcover

Unnamed: 0,Array,Chunk
Bytes,24.73 MiB,6.19 MiB
Shape,"(1801, 3600)","(901, 1800)"
Dask graph,4 chunks in 3 graph layers,4 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 24.73 MiB 6.19 MiB Shape (1801, 3600) (901, 1800) Dask graph 4 chunks in 3 graph layers Data type float32 numpy.ndarray",3600  1801,

Unnamed: 0,Array,Chunk
Bytes,24.73 MiB,6.19 MiB
Shape,"(1801, 3600)","(901, 1800)"
Dask graph,4 chunks in 3 graph layers,4 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [9]:
# fix naming of coordinates
landcover = landcover.rename(
    x='longitude',
    y='latitude',
)

# (-180;180) -> (0;360)
landcover = landcover.assign_coords(
    longitude=(landcover.longitude + 360) % 360,
    latitude=landcover.latitude
)

# roll array to reorder coordinates 
nroll = (landcover.longitude.values > 180 ).sum()
landcover = landcover.roll(longitude=-nroll, roll_coords=True)

In [17]:
landcover.lccs_class.mean().compute()

In [18]:
xr.open_zarr(OUT_PATH).lccs_class.mean().compute()

In [13]:
landcover = landcover.chunk(
    longitude=250, 
    latitude=250
)
landcover.to_zarr(OUT_PATH, mode='w')

<xarray.backends.zarr.ZarrStore at 0x150e41ff3ba0>