In [1]:
import pathlib
import tempfile
import zipfile

import geopandas
import pystac
import stac2dcache

from shapely.geometry import Polygon
from stac2dcache.utils import get_asset

from eratosthenes.generic.gis_tools import ll2utm, shape2raster

# Rasterize and retile RGI according to Sentinel-2 scheme

In [2]:
catalog_url = ("https://webdav.grid.surfsara.nl:2880"
               "/pnfs/grid.sara.nl/data/eratosthenes/"
               "disk/red-glacier_sentinel-2")
collection_id = "sentinel-s2-l1c"
rgi_root_url = (
    "https://webdav.grid.surfsara.nl:2880"
    "/pnfs/grid.sara.nl/data/eratosthenes/"
    "disk/GIS/Glacier"
)
rgi_index_url = f"{rgi_root_url}/00_rgi60_regions.zip"
tmp_path = "./"
rgi_tiles_url = (
    "https://webdav.grid.surfsara.nl:2880"
    "/pnfs/grid.sara.nl/data/eratosthenes/"
    "disk/RasterRGI_tiles_sentinel-2"
)

In [3]:
# configure connection to dCache
dcache = stac2dcache.configure(
    filesystem="dcache", 
    token_filename="macaroon.dat"
)
dcache.api_url = "https://dcacheview.grid.surfsara.nl:22880/api/v1"

In [4]:
# read RGI index
with dcache.open(rgi_index_url) as f:
    rgi_index = geopandas.read_file(f)

In [5]:
def read_catalog(url):
    """
    Read STAC catalog from URL
    
    :param url: urlpath to the catalog root
    :return: PySTAC Catalog object
    """
    url = url if url.endswith("catalog.json") else f"{url}/catalog.json"
    catalog = pystac.Catalog.from_file(url)
    return catalog

In [6]:
# read image catalog
catalog = read_catalog(catalog_url)
subcatalog = catalog.get_child(collection_id)

In [7]:
TILE_ID_KEYS = [
    "sentinel:utm_zone", 
    "sentinel:latitude_band", 
    "sentinel:grid_square"
] 
def get_sentinel2_tile_id(item):
    """
    Construct the tile ID for a Sentinel-2 STAC item
    
    :param item: PySTAC Item object
    :return: tile ID
    """
    return "".join([
        str(item.properties[k]) for k in TILE_ID_KEYS
    ])

In [8]:
# loop over catalog, look for all the tiles presents
tiles = {}
for item in subcatalog.get_all_items():
    tile_id = get_sentinel2_tile_id(item)
    if tile_id not in tiles:
        tiles[tile_id] = item
tiles

{'5VMG': <Item id=S2B_5VMG_20210329_0_L1C>}

In [9]:
def get_itersecting_RGI_regions(index, geometry):
    """ 
    Find the RGI regions that intersect the geometry and return the 
    corresponding RGI codes. NOTE: the geometries need to be in the 
    same CRS!
    
    :param index: RGI region index (GeoDataFrame)
    :param geometry: shapely geometry object
    :returl: RGI code array
    """
    mask = index.intersects(geometry)
    index = index[mask]
    return index.RGI_CODE

In [10]:
# loop over identified tiles
for tile_id, item in tiles.items():

    da = get_asset(
        catalog,
        asset_key="B02",
        item_id=item.id,
        filesystem=dcache,
        load=False
    )
    bbox = da.rio.bounds()
    crs = da.spatial_ref.crs_wkt
    transform = da.rio.transform()
    
    tile_geometry = Polygon.from_bounds(
        *da.rio.transform_bounds(rgi_index.crs)
    )
    rgi_codes = get_itersecting_RGI_regions(
        rgi_index,
        tile_geometry
    )
    
    assert len(rgi_codes) == 1, "Only one RGI region implemented!"
    
    rgi_code = rgi_codes.iloc[0]
    rgi_region_path = dcache.glob(f"{rgi_root_url}/{rgi_code:02d}_rgi60_*.zip")
    rgi_region_path = rgi_region_path.pop()

    with tempfile.TemporaryDirectory(dir=tmp_path) as tmpdir:
    
        remote_path = pathlib.Path(rgi_region_path)
        local_path = pathlib.Path(tmpdir) / remote_path.name
        dcache.download(remote_path.as_posix(), local_path.as_posix())
        with zipfile.ZipFile(local_path, "r") as f:
            f.extractall(path=tmpdir)
        shapefile = local_path.with_suffix(".shp")
        shapefile_utm = shapefile.with_name("utm.shp")
        
        ll2utm(shapefile.as_posix(), shapefile_utm.as_posix(), crs)
        shape2raster(shapefile_utm.as_posix(), tile_id, transform.to_gdal(), da.y.size, da.x.size)
        
    # save raster file and upload it
    output_file = f"{tile_id}.tif"
    dcache.upload(output_file, f"{rgi_tiles_url}/{output_file}")

GDAL headers saved to: /var/folders/t6/r2gjczqj7bb8798wr4g1p87m0000gn/T/tmpa8ahdkd9
