In [None]:
from datetime import datetime as dt

import geopandas as gpd
import numpy as np

from pathlib import Path

import pandas as pd
from pprint import pprint

import rasterio as rio
from rasterio import windows

from shapely import box

from src import hls_tools as hls

In [3]:
def clip_and_stack(entries, location, write_to):

    if isinstance(entries, pd.Series):
        entries = entries.to_dict()
    elif isinstance(entries, dict):
        entres = entries
    else:
        raise TypeError(f"entries must be a pandas series or a dictionary, not {type(entries)}")
    
    # Retrieve the content of the rasters
    array_pairs, profile_pairs, shape_pairs, tag_pairs = {}, {}, {}, {}
    for raster_name, raster_path in entries.items():

        with rio.open(raster_path) as raster:

            # Set the profile
            profile = raster.profile

            # Set the shared extent
            naive_bounds = box(*aoi.to_crs(profile["crs"]).total_bounds)
            raster_bounds = box(*raster.bounds)
            shared_bounds = raster_bounds.intersection(naive_bounds).bounds

            # Set the clipping window, and update the profile
            window = windows.from_bounds(*shared_bounds, profile["transform"])
            window = window.round()
            profile["transform"] = windows.transform(window, profile["transform"])
            profile["height"] = window.height
            profile["width"] = window.width
            
            # Do a PARTIAL reading, set profiles and tags
            array_pairs[raster_name] = raster.read(1, window=window)

            profile_pairs[raster_name] = pd.Series(profile)
            tag_pairs[raster_name] = pd.Series(raster.tags())
       
    # Concatenate the profiles and the tags
    profile_frame = pd.concat(profile_pairs, axis=1).T
    tag_frame = pd.concat(tag_pairs, axis=1).T

    # Check for duplicate entries on the profiles
    nunique_profiles = profile_frame.nunique(axis=0)
    several_profiles = (nunique_profiles > 1)
    if several_profiles.any():
        offending_entries = nunique_profiles[several_profiles].index.tolist()
        raise RuntimeError(f"{offending_entries} have several possible values")
    
    # make the output profile and update
    out_profile = profile_frame.drop_duplicates().iloc[0].to_dict()
    out_profile["count"] = len(array_pairs)

    # keep only tags where the count is larger than one
    nunique_tags = tag_frame.nunique(axis=0)
    relevant_tags = nunique_tags[nunique_tags == 1].index.tolist()
    tags = tag_frame[relevant_tags].drop_duplicates().iloc[0].to_dict()

    # Write the output file
    with rio.open(write_to, "w", **out_profile) as out_raster:

        # update the scales and offsets
        if "add_offset" in tag_frame.columns:
            out_raster.offsets = tag_frame["add_offset"].astype(float)
        
        if "scale_factor" in tag_frame.columns:
            out_raster.scales = tag_frame["scale_factor"].astype(float)
        
        out_raster.update_tags(**tags)

        # out_raster.update_tags(**{k.replace(" ", "_"): v for k, v in tags.items()})
        for band_idx, (band_name, band_data) in enumerate(array_pairs.items(), 1):

            out_raster.write(band_data, band_idx)
            out_raster.set_band_description(band_idx, band_name)

In [4]:
PARENT_DIR = Path(r"/home/iborlafm/Downloads/Mozambique/hls")

In [5]:
aoi = gpd.read_file(PARENT_DIR / "extent.geojson")
geom = aoi.loc[0, "geometry"]

## List the available files

In [6]:
uri_frame = hls.tabulate_hls_uris(PARENT_DIR.rglob("HLS.*tif"))

In [7]:
uri_frame

Unnamed: 0,uri,product,tile,time,sensor,version,suffix,stem
15,/home/iborlafm/Downloads/Mozambique/hls/2018/H...,HLS,T36KXE,2018239T073719,S30,v2.0,B01,HLS_T36KXE_2018239T073719_S30_v2.0
21,/home/iborlafm/Downloads/Mozambique/hls/2018/H...,HLS,T36KXE,2018239T073719,S30,v2.0,B02,HLS_T36KXE_2018239T073719_S30_v2.0
26,/home/iborlafm/Downloads/Mozambique/hls/2018/H...,HLS,T36KXE,2018239T073719,S30,v2.0,B03,HLS_T36KXE_2018239T073719_S30_v2.0
24,/home/iborlafm/Downloads/Mozambique/hls/2018/H...,HLS,T36KXE,2018239T073719,S30,v2.0,B04,HLS_T36KXE_2018239T073719_S30_v2.0
30,/home/iborlafm/Downloads/Mozambique/hls/2018/H...,HLS,T36KXE,2018239T073719,S30,v2.0,B05,HLS_T36KXE_2018239T073719_S30_v2.0
22,/home/iborlafm/Downloads/Mozambique/hls/2018/H...,HLS,T36KXE,2018239T073719,S30,v2.0,B06,HLS_T36KXE_2018239T073719_S30_v2.0
29,/home/iborlafm/Downloads/Mozambique/hls/2018/H...,HLS,T36KXE,2018239T073719,S30,v2.0,B07,HLS_T36KXE_2018239T073719_S30_v2.0
23,/home/iborlafm/Downloads/Mozambique/hls/2018/H...,HLS,T36KXE,2018239T073719,S30,v2.0,B08,HLS_T36KXE_2018239T073719_S30_v2.0
31,/home/iborlafm/Downloads/Mozambique/hls/2018/H...,HLS,T36KXE,2018239T073719,S30,v2.0,B08A,HLS_T36KXE_2018239T073719_S30_v2.0
20,/home/iborlafm/Downloads/Mozambique/hls/2018/H...,HLS,T36KXE,2018239T073719,S30,v2.0,B09,HLS_T36KXE_2018239T073719_S30_v2.0


## Reshape the frame, rename, dump non-shared bands

In [8]:
both_frame = hls.harmonize_hls_frame(uri_frame)

## Clip and stack the rasters

In [9]:
selected_columns = ["Blue", "Green", "Red", "NIRnarrow", "SWIR1", "SWIR2"]

In [10]:
for observation_name, observation_files in both_frame.iterrows():

    
    clip_and_stack(
        entries=observation_files[selected_columns],
        location=aoi,
        write_to=(PARENT_DIR / f"{observation_name}_bands.tif")
    )

    clip_and_stack(
        entries=observation_files[["Fmask"]],
        location=aoi,
        write_to=(PARENT_DIR / f"{observation_name}_Fmask.tif")
    )

In [11]:
import rioxarray as rxr
import xarray as xr

In [12]:
ex= rxr.open_rasterio(
    "/home/iborlafm/shares/radar/Projects/DrySat/07_data/HarmonizedLandsatSentiel/HLS_T36KXE_2018239T073719_S30_v2.0_bands.tif",
    #band_as_variable=True
    )

In [13]:
ex.rio.crs

CRS.from_wkt('PROJCS["WGS 84 / UTM zone 36N",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",33],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","32636"]]')

In [14]:
aoi#.to_crs(ex.rio.crs)

Unnamed: 0,Name,geometry
0,36KXE,"MULTIPOLYGON (((33.94931 -18.65128, 34.34486 -..."
