In [1]:
import numpy as np
import rasterio as rio
from rasterio import windows
from pathlib import Path

from typing import List

In [2]:
def list_gfc_urls(
    x_min: int | float,
    x_max: int | float,
    y_min: int | float,
    y_max: int | float,
    product: str,
    step_size: int = 10,
    version: str ="GFC-2024-v1.12",
    root: str ="https://storage.googleapis.com/earthenginepartners-hansen",
    ) -> List[str]:

    # Check wether the chosen product is available
    available_products = ("treecover2000", "gain", "lossyear", "datamask", "first", "last")
    if product not in available_products:
        raise ValueError(f"'{product}' is not one of the supported products {available_products}")
    
    # Make the common header
    header = f"{root}/{version}/Hansen_{version}_{product}"

    # Make bouns to use during the iteration
    original_bounds = np.array([x_min, x_max, y_min, y_max])
    divided_bounds = original_bounds/step_size
    left, bottom = 10 * np.floor(divided_bounds[[0, 2]]).astype(int)
    right, top = 10 * np.ceil(divided_bounds[[1, 3]]).astype(int)

    # Iterate to create the tile names
    urls = []
    for row_idx in range(bottom + step_size, top + step_size, step_size):

        row_direction = "N" if row_idx >= 0 else "S"
        row_name = f"{abs(row_idx):02}{row_direction}"

        for col_idx in range(left, right, step_size):

            col_direction = "E" if col_idx >= 0 else "W"
            col_name = f"{abs(col_idx):03}{col_direction}"

            tile_url = f"{header}_{row_name}_{col_name}.tif"
            urls.append(tile_url)

    return urls

In [3]:
bounding_box = {
    "y_min": -19.6,
    "y_max": -17.1,
    "x_min": 32.9,
    "x_max": 34.4,
    }

DATA_DIR = Path("/home/iborlafm/shares/home/20250729-DrySat/data")

In [4]:
gfc_tiles = list_gfc_urls(**bounding_box, product="lossyear")

In [None]:
def clip_rasters(
    uris: List[str | Path],
    x_min: int | float,
    x_max: int | float,
    y_min: int | float,
    y_max: int | float,
    output_dir: str | Path
) -> bool:

    target_bounds = (x_min, y_min, x_max, y_max)
    
    exceptions = {}
    for raster_uri in uris:

        try:

            # https://gdal.org/en/stable/user/virtual_file_systems.html
            with rio.open(raster_uri, "r") as original_raster:
                
                # Read the dataset description
                profile = original_raster.profile
                original_affine = profile["transform"]
                
                # set the window and its transform
                overlap_window = windows.intersection(
                    windows.from_bounds(*target_bounds, original_affine),
                    windows.from_bounds(*original_raster.bounds, original_affine)
                )
                overlap_affine = windows.transform(overlap_window, original_affine)
                
                # read the relevant part
                array = original_raster.read(window=overlap_window)
            
            # Update the profile
            count, height, width = array.shape
            profile.update(count=count, height=height, width=width, transform=overlap_affine)

            # Determine the output name
            if isinstance(raster_uri, Path):
                original_stem = raster_uri.stem
                original_suffixes = ".".join(raster_uri.suffixes)
            elif isinstance(raster_uri, str):
                name_parts = raster_uri.split("/")[-1].split(".")
                original_stem = name_parts[0]
                original_suffixes = ".".join(name_parts[1:])
            output_name = f"{original_stem}_clip.{original_suffixes}"

            # Ceate a smaller raster
            output_path = Path(output_dir) / output_name
            with rio.open(output_path, "w", **profile) as clipped_raster:
                clipped_raster.write(array)
        
        except Exception as exception_messsage:
            exceptions[raster_uri] = exception_messsage

    return exceptions

clip_rasters(gfc_tiles, output_dir=DATA_DIR, **bounding_box)