In [1]:
import numpy as np
from pathlib import Path
import rioxarray as rxr
import xarray as xr

In [2]:
from src.hls_tools import preprocess_fmask, preprocess_bands

In [3]:
PARENT_DIR = Path(r"/home/iborlafm/Downloads/Mozambique/hls")

clip = "intact"

IN_DIR = PARENT_DIR / f"clip_{clip}"

## List the files

In [4]:
product, tile, res, version = "HLS", "T36KXE", "30", "v2.0"

band_paths = sorted(IN_DIR.glob(f"{product}_{tile}_*{res}_{version}_bands.tif"))

In [5]:
fmask_paths = [p.parent / p.name.replace("_bands", "_fmask") for p in band_paths]

## Preprocess the bands

In [6]:
bands = xr.open_mfdataset(
    band_paths,
    chunks="auto",
    concat_dim="time",
    combine="nested",
    preprocess=preprocess_bands,
    # mask_and_scale=False,
    engine="rasterio",
    parallel=True,
    band_as_variable=True,
    )

## Preprocess the Fmask

In [7]:
fmask = xr.open_mfdataset(
    fmask_paths,
    chunks="auto",
    concat_dim="time",
    combine="nested",
    preprocess=preprocess_fmask,
    mask_and_scale=False,
    engine="rasterio",
    parallel=True
    )

## Make a raster definition

In [8]:
example_bands = rxr.open_rasterio(band_paths[0])#.encoding

example_attrs = example_bands.attrs
example_encoding = example_bands.encoding

crs = example_bands.rio.crs
transform = example_bands.rio.transform()

## Make and apply the encodings

In [9]:
band_atts = {}

band_encoding = {
    'dtype': example_encoding["rasterio_dtype"],
    'add_offset': example_attrs["add_offset"],
    'scale_factor': example_attrs["scale_factor"],
    '_FillValue': example_attrs["_FillValue"],
    "zlib": True,
    "grid_mapping": "spatial_ref"
}

fmask_encoding = {"zlib": True, "grid_mapping": "spatial_ref"} # "dtype": "uint8", 

In [10]:
bands.attrs.update(**band_atts)
bands.encoding = band_encoding
bands.rio.write_crs(crs, inplace=True)
bands.rio.write_transform(transform, inplace=True)

masks = fmask["masks"].to_dataset(dim="flag")
masks.rio.write_crs(crs, inplace=True)
masks.rio.write_transform(transform, inplace=True)
#masks.encoding = fmask_encoding

aerosol = fmask["aerosol"]
aerosol.rio.write_crs(crs, inplace=True)
aerosol.rio.write_transform(transform, inplace=True);
#aerosol.encoding = fmask_encoding

In [11]:
encoding = {}
for band_name in bands.data_vars:
    encoding[band_name] = band_encoding

for mask_name in masks.data_vars:
    encoding[mask_name] = fmask_encoding

encoding["aerosol"] = fmask_encoding

## Write

In [12]:
merged = xr.merge([bands, masks, aerosol])

In [14]:
merged.to_netcdf(PARENT_DIR / f"{product}_{clip}_b{res}_{version}.nc", encoding=encoding)