In [1]:
import numpy as np
from pathlib import Path
import rioxarray as rxr
import xarray as xr

In [2]:
from src.hls_tools import preprocess_fmask, preprocess_bands

In [3]:
PARENT_DIR = Path(r"/home/iborlafm/Downloads/Mozambique/hls")

IN_DIR = PARENT_DIR / "automated"

## List the files

In [4]:
product, tile, res, version = "HLS", "T36KXE", "30", "v2.0"

band_paths = sorted(IN_DIR.glob(f"{product}_{tile}_*{res}_{version}_bands.tif"))

In [5]:
fmask_paths = [p.parent / p.name.replace("_bands", "_fmask") for p in band_paths]

## Preprocess the Fmask

In [6]:
fmask = xr.open_mfdataset(
    fmask_paths,
    chunks="auto",
    concat_dim="time",
    combine="nested",
    preprocess=preprocess_fmask,
    mask_and_scale=False,
    engine="rasterio",
    parallel=True
    )

## Preprocess the bands

In [7]:
bands = xr.open_mfdataset(
    band_paths,
    chunks="auto",
    concat_dim="time",
    combine="nested",
    preprocess=preprocess_bands,
    # mask_and_scale=False,
    engine="rasterio",
    parallel=True,
    band_as_variable=True,
    )

## Make a raster definition

In [8]:
example_bands = rxr.open_rasterio(band_paths[0])#.encoding

example_attrs = example_bands.attrs
example_encoding = example_bands.encoding

crs = example_bands.rio.crs
transform = example_bands.rio.transform()

## Mask and make the yearly mosaics

In [10]:
first_year = bands["time"].min().dt.year.item()
last_year = bands["time"].max().dt.year.item()

In [11]:
out_attrs = {
    #'unit': '%',
    # 'long_name': 'Quality Flag',
    #'Class': 'DATA',
    # 'grid_mapping': example_encoding["grid_mapping"],
    #'coordinates': 'time lat lon',
    'add_offset': example_attrs["add_offset"],
    'scale_factor': example_attrs["scale_factor"],
    #'valid_range': [0, 200]
    }

out_encoding = {
    'dtype': example_encoding["rasterio_dtype"],
    #'add_offset': example_attrs["add_offset"],
    #'scale_factor': example_attrs["scale_factor"],
    '_FillValue': example_attrs["_FillValue"],
    "zlib": True,

}

# What to exclude
cloud_flags = ['cloud shadow', 'adjacent to cloud', 'cloud', 'cirrus cloud']

# For every year
for target_year in range(first_year, last_year + 1):

    out_path = PARENT_DIR / f"{product}_{tile}_{target_year}_b{res}_{version}.nc"
    if out_path.exists():
        continue
    
    # define what to mask
    is_cloudy = (
        fmask["masks"]
        .sel(time=(fmask.time.dt.year == target_year), flag=cloud_flags)
        .any(dim="flag")
        )
    
    # Mask the bands, calculate the quantiles
    quantiles = (
        bands.sel(time=(bands.time.dt.year == target_year))
        .sortby("time")
        .where(np.logical_not(is_cloudy))
        # .resample({"time": "YS"})
        .quantile(np.arange(0, 1.01, .1), dim="time", skipna=True)
        .sortby("y", ascending=False)
    )

    # Apply the scaling
    scaled = (
        (quantiles/out_attrs["scale_factor"])
        .rio.write_crs(crs)
        .rio.write_transform(transform)
        )
    
    # set the attributes and the encoding
    for band_name in quantiles.keys():

        scaled[band_name].attrs.update(long_name=band_name, **out_attrs)
        scaled[band_name].encoding.update(**out_encoding)


    # Write
    scaled.to_netcdf(out_path)
    
    print(target_year)