In [1]:
import netCDF4
import xarray as xr

# Vcmax data: from NetCDF to Zarr

The Vcmax dataset is provided as chunked NetCDF4/HDF5 data. It was reprojected in CRIB. It is global.

## Input variables

In [2]:
year = 2015

In [3]:
ROOT_DIR = '/gpfs/work2/0/ttse0619'
vcmax_PATH = (
    f'{ROOT_DIR}/qianqian/global_data_Qianqian/'
    f'1input_data/{year}global/vcmax/TROPOMI_Vmax_Tg_mean10km_global.nc'
)
OUT_PATH = (
    f'{ROOT_DIR}/qianqian/global_data_Qianqian/'
    f'1input_data/{year}global/vcmax/vcmax.zarr'
)

In [4]:
ERA5_PATH = (
    f'{ROOT_DIR}/francesco/Projects/EcoExtreML/Data/'
    f'1input_data/{year}global/era5land/era5land.zarr'
)

## Converting the dataset

Find out chunking strategy of the dataset:

In [5]:
def get_chunking(nc_path):
    with netCDF4.Dataset(nc_path) as nc:
        chunks = {
            name: dict(zip(var.dimensions, var.chunking()))
            for name, var in nc.variables.items()
        }
    return chunks

In [7]:
chunks = get_chunking(vcmax_PATH)
chunks # no chunks

{'x': {'x': 'c'},
 'y': {'y': 'c'},
 'band': {'band': 'c'},
 'spatial_ref': {},
 '__xarray_dataarray_variable__': {'band': 'c', 'y': 'o', 'x': 'n'}}

We now open the landcover NetCDF files using the same chunking scheme as in the NetCDF file:

In [8]:
vcmax = xr.open_dataset(vcmax_PATH)
vcmax = vcmax.squeeze('band')  # drop band dimension

In [11]:
vcmax.__xarray_dataarray_variable__.mean().compute()

In [12]:
# fix naming of coordinates
vcmax = vcmax.rename(
    x='longitude',
    y='latitude',
)

# (-180;180) -> (0;360)
vcmax = vcmax.assign_coords(
    longitude=(vcmax.longitude + 360) % 360,
    latitude=vcmax.latitude
)

# roll array to reorder coordinates 
nroll = (vcmax.longitude.values > 180 ).sum()
vcmax = vcmax.roll(longitude=-nroll, roll_coords=True)

In [13]:
vcmax = vcmax.rename({"__xarray_dataarray_variable__":'vcmax'})

In [14]:
vcmax = vcmax.chunk(
    longitude=250, 
    latitude=250
)
vcmax.to_zarr(OUT_PATH, mode='w')

<xarray.backends.zarr.ZarrStore at 0x150e345fe0b0>

In [16]:
xr.open_zarr(OUT_PATH).vcmax.mean().compute()