In [18]:
import pathlib
import tempfile
import zipfile

import geopandas
import pystac
import stac2dcache
import urlpath

from shapely.geometry import Polygon
from stac2dcache.utils import get_asset

from eratosthenes.generic.gis_tools import ll2utm, shape2raster

# Rasterize and retile coastline shape according to Sentinel-2 scheme

In [3]:
catalog_url = ("https://webdav.grid.surfsara.nl:2880"
               "/pnfs/grid.sara.nl/data/eratosthenes/"
               "disk/red-glacier_sentinel-2")
collection_id = "sentinel-s2-l1c"
gshhs_url = (
    "https://webdav.grid.surfsara.nl:2880"
    "/pnfs/grid.sara.nl/data/eratosthenes/"
    "disk/GIS/Coastline/GSHHS_f_L1.shp"
)
tmp_path = "./"
gshhs_tiles_url = (
    "https://webdav.grid.surfsara.nl:2880"
    "/pnfs/grid.sara.nl/data/eratosthenes/"
    "disk/RasterGSHHS_tiles_sentinel-2"
)

In [4]:
# configure connection to dCache
dcache = stac2dcache.configure(
    filesystem="dcache", 
    token_filename="macaroon.dat"
)
dcache.api_url = "https://dcacheview.grid.surfsara.nl:22880/api/v1"

In [5]:
def read_catalog(url):
    """
    Read STAC catalog from URL
    
    :param url: urlpath to the catalog root
    :return: PySTAC Catalog object
    """
    url = url if url.endswith("catalog.json") else f"{url}/catalog.json"
    catalog = pystac.Catalog.from_file(url)
    return catalog

In [6]:
# read image catalog
catalog = read_catalog(catalog_url)
subcatalog = catalog.get_child(collection_id)

In [7]:
TILE_ID_KEYS = [
    "sentinel:utm_zone", 
    "sentinel:latitude_band", 
    "sentinel:grid_square"
] 
def get_sentinel2_tile_id(item):
    """
    Construct the tile ID for a Sentinel-2 STAC item
    
    :param item: PySTAC Item object
    :return: tile ID
    """
    return "".join([
        str(item.properties[k]) for k in TILE_ID_KEYS
    ])

In [8]:
# loop over catalog, look for all the tiles presents
tiles = {}
for item in subcatalog.get_all_items():
    tile_id = get_sentinel2_tile_id(item)
    if tile_id not in tiles:
        tiles[tile_id] = item
tiles

{'5VMG': <Item id=S2B_5VMG_20210329_0_L1C>}

In [77]:
# loop over identified tiles
with tempfile.TemporaryDirectory(dir=tmp_path) as tmpdir:
    
    local_path = pathlib.Path(tmpdir) 

    url = urlpath.URL(gshhs_url)
    remote_shp_path = pathlib.Path(url.path)
    remote_regex_path = remote_shp_path.with_suffix(".*")
    remote_paths = dcache.glob(remote_regex_path.as_posix())
    
    # get (all parts of) shapefile
    for remote_path in remote_paths:
        path = pathlib.Path(remote_path)
        dcache.download(
            path.as_posix(), 
            (local_path/path.name).as_posix()
        )
        
    local_shp_path = local_path/remote_shp_path.name
    gshhs = geopandas.read_file(local_shp_path)
        
    for tile_id, item in tiles.items():

        da = get_asset(
            catalog,
            asset_key="B02",
            item_id=item.id,
            filesystem=dcache,
            load=False
        )
        bbox = da.rio.bounds()
        crs = da.spatial_ref.crs_wkt
        transform = da.rio.transform()

        tile_geometry = Polygon.from_bounds(
            *da.rio.transform_bounds(gshhs.crs)
        )
        
        mask = gshhs.intersects(tile_geometry)
        gshhs_sub = gshhs[mask]
        res = gshhs_sub.intersection(tile_geometry)
        
        shape_utm = geopandas.GeoDataFrame(
            dict(
                land=[1], 
                geometry=[res.to_crs(crs).unary_union]
            ), 
            crs=res.crs
        )
        local_shp_utm_path = local_shp_path.with_name(
            local_shp_path.stem + "_utm.shp"
        )
        shape_utm.to_file(local_shp_utm_path.as_posix())
        shape2raster(local_shp_utm_path.as_posix(), tile_id, transform.to_gdal(), 
                     da.y.size, da.x.size, aoi="land")

        # save raster file and upload it
        output_file = f"{tile_id}.tif"
        dcache.upload(output_file, f"{gshhs_tiles_url}/{output_file}")

GDAL headers saved to: /var/folders/t6/r2gjczqj7bb8798wr4g1p87m0000gn/T/tmpy6jtkan9
