In [1]:
from datetime import datetime as dt
import numpy as np
from pathlib import Path
import re
import rioxarray as rxr
import xarray as xr

In [2]:
def att2time(att):
    if (";" in att) or ("+" in att):
        split_time = re.split("\s?[\+\;]\s", att)
        start, end = [dt.fromisoformat(s[:-1]) for s in split_time]
        time = start + (end - start)/2
    else:
        time = dt.fromisoformat(att[:-1])
    return time

In [3]:
# %xmode minimal

In [4]:
PARENT_DIR = Path(r"/home/iborlafm/Downloads/Mozambique/hls")

IN_DIR = PARENT_DIR / "automated"

## List the files

In [5]:
product, tile, res, version = "HLS", "T36KXE", "30", "v2.0"

band_paths = sorted(IN_DIR.glob(f"{product}_{tile}_*{res}_{version}_bands.tif"))

In [6]:
fmask_paths = [p.parent / p.name.replace("_bands", "_fmask") for p in band_paths]

## Preprocess the Fmask

In [7]:
def preprocess_fmask(fmask):

    debanded = fmask["band_data"]
    bits = xr.apply_ufunc(
        np.unpackbits,
        debanded,
        input_core_dims=[["band", "y", "x"]],
        output_core_dims=[["flag", "y", "x"]],
        exclude_dims=set(["band"]),
        keep_attrs=True,
        kwargs={"axis": 0},
        dask="allowed"
    )

    # Convert the flags to bool, set the names
    flags = bits.sel(flag=slice(2, 9)).astype(bool)
    flags["flag"] = [
        "water", "snow/ice",
        "cloud shadow", "adjacent to cloud", "cloud", "cirrus cloud"
    ]
    flags.name = "masks"

    # Convert the aerosol data, set the name
    aerosol_parts = bits.sel(flag=slice(0, 2))
    aerosol = (
        (aerosol_parts.sel(flag=1) + 10 * aerosol_parts.sel(flag=0))
        )
    aerosol.name = "aerosol"

    # set the time
    time = att2time(debanded.attrs["SENSING_TIME"])
    return xr.merge([flags, aerosol]).expand_dims({"time": [time]}, axis=0)

# flags = preprocess_fmask(fmask)

In [8]:
fmask = xr.open_mfdataset(
    fmask_paths,
    chunks="auto",
    concat_dim="time",
    combine="nested",
    preprocess=preprocess_fmask,
    mask_and_scale=False,
    engine="rasterio",
    parallel=True
    )

## Preprocess the bands

In [9]:
def preprocess_bands(bands):

    # Set the band names
    renames = {}
    for var_name in bands.keys():
        renames[var_name] = bands[var_name].attrs["long_name"]
    
    # Set the time
    time = att2time(bands.attrs["SENSING_TIME"])
    return bands.rename_vars(renames).expand_dims({"time": [time]})


In [10]:
bands = xr.open_mfdataset(
    band_paths,
    chunks="auto",
    concat_dim="time",
    combine="nested",
    preprocess=preprocess_bands,
    # mask_and_scale=False,
    engine="rasterio",
    parallel=True,
    band_as_variable=True,
    )

## Make a raster definition

In [11]:
example_bands = rxr.open_rasterio(band_paths[0])#.encoding

example_attrs = example_bands.attrs
example_encoding = example_bands.encoding

crs = example_bands.rio.crs
transform = example_bands.rio.transform()

In [12]:
nc_keywords = [
    'significant_digits', 'complevel', 'contiguous', 'szip_coding', 'fletcher32',
    'shuffle', 'quantize_mode', 'zlib', 'endian', 'chunksizes', 'szip_pixels_per_block',
    'blosc_shuffle', '_FillValue', 'dtype', 'compression', 'least_significant_digit'
]

# att_keywords = ["grid_mapping", "_FillValue"] # "add_offset", "scale_factor",  

## Mask and make the yearly mosaics

In [13]:
first_year = bands["time"].min().dt.year.item()
last_year = bands["time"].max().dt.year.item()

In [None]:
out_attrs = {
    #'unit': '%',
    # 'long_name': 'Quality Flag',
    #'Class': 'DATA',
    # 'grid_mapping': example_encoding["grid_mapping"],
    #'coordinates': 'time lat lon',
    'add_offset': example_attrs["add_offset"],
    'scale_factor': example_attrs["scale_factor"],
    #'valid_range': [0, 200]
    }

out_encoding = {
    'dtype': example_encoding["rasterio_dtype"],
    #'add_offset': example_attrs["add_offset"],
    #'scale_factor': example_attrs["scale_factor"],
    '_FillValue': example_attrs["_FillValue"],
    "zlib": True,

}

# What to exclude
cloud_flags = ['cloud shadow', 'adjacent to cloud', 'cloud', 'cirrus cloud']

# For every year
for target_year in range(first_year, last_year + 1):

    out_path = PARENT_DIR / f"{product}_{tile}_{target_year}_b{res}_{version}.nc"
    if out_path.exists():
        continue
    
    # define what to mask
    is_cloudy = (
        fmask["masks"]
        .sel(time=(fmask.time.dt.year == target_year), flag=cloud_flags)
        .any(dim="flag")
        )
    
    # Mask the bands, calculate the quantiles
    quantiles = (
        bands.sel(time=(bands.time.dt.year == target_year))
        .sortby("time")
        .where(np.logical_not(is_cloudy))
        # .resample({"time": "YS"})
        .quantile(np.arange(0, 1.01, .1), dim="time", skipna=True)
        .sortby("y", ascending=False)
    )

    # Apply the scaling
    scaled = (
        (quantiles/out_attrs["scale_factor"])
        .rio.write_crs(crs)
        .rio.write_transform(transform)
        )
    
    # set the attributes and the encoding
    for band_name in quantiles.keys():

        scaled[band_name].attrs.update(long_name=band_name, **out_attrs)
        scaled[band_name].encoding.update(**out_encoding)


    # Write
    scaled.to_netcdf(out_path)
    
    print(target_year)