In [1]:
import os

import geopandas
import pyproj
import pystac
import shapely
import stac2dcache

from stac2dcache.utils import get_asset

# Generate catalog for shadow classification

We look here for all the Sentinel-2 scenes that are suitable for the shadow classification of the specified area of interest (AOI). We create a new catalog where we create items that link the matching:
* L1C scenes: used as raw data for for the shadow classification algorithm.
* L2A scenes: if available, we use these to get the scene classification layer (SCL), from which we compute the cloud mask. 

While initially we included all the scenes with a footprint intersecting the AOI, we then selected the only scenes with a significant overlap (>70%), thus dropping the few scenes which had a very small overlap.

In [2]:
# input cell
MACAROON_PATH = "./macaroon.dat"

CATALOG_OUT_ID = "red-glacier_shadows"
CATALOG_OUT_TEMPLATE = "${year}/${month}/${day}"

CATALOG_L1C_URL = "https://webdav.grid.surfsara.nl:2880/pnfs/grid.sara.nl/data/eratosthenes/disk/red-glacier_copernicushub-gcp"
CATALOG_L2A_URL = "https://webdav.grid.surfsara.nl:2880/pnfs/grid.sara.nl/data/eratosthenes/disk/red-glacier_earth-search"
CATALOG_OUT_URL = f"https://webdav.grid.surfsara.nl:2880/pnfs/grid.sara.nl/data/eratosthenes/disk/{CATALOG_OUT_ID}"

AOI_WKT = "POLYGON ((-153.17475999752926 59.921268739678986, -153.1756065979441 60.08139181090761, -152.71003577814346 60.08119061686564, -152.71143368231296 59.92106883348645, -153.17475999752926 59.921268739678986))"

In [3]:
def _read_catalog(url, stac_io=None):
    """
    Read STAC catalog from URL

    :param url: urlpath to the catalog root
    :param stac_io (optional): STAC IO instance to read the catalog
    :return: PyStac Catalog object
    """
    url = url if url.endswith("catalog.json") else f"{url}/catalog.json"
    catalog = pystac.Catalog.from_file(url, stac_io=stac_io)
    return catalog


def _save_catalog(items, url, template=None, stac_io=None):
    """
    Save STAC items in a catalog at the provided URL

    :param items: iterable with all items
    :param url: urlpath where to save the catalog root
    :param stac_io (optional): STAC IO instance to read the catalog
    """
    url = url if not url.endswith("catalog.json") else os.path.split(url)[0]
    catalog_id = os.path.split(url)[1]
    catalog = pystac.Catalog(
        id=catalog_id,
        description="",
        catalog_type="SELF_CONTAINED",
    )
    catalog.add_items(items)
    if template is not None:
        catalog.generate_subcatalogs(template)
    if stac_io is not None:
        catalog._stac_io = stac_io
    catalog.normalize_and_save(url)


def _get_l2a_item(item_l1c, catalog_l2a):
    """
    Get Sentinel L2A item from the corresponding L1C item.

    :param item_l1c: Sentinel L1C item
    :param catalog_l2a: catalog with Sentinel L2A data 
    :param stac_io (optional): STAC IO instance to read the catalog
    :return:
    """
    platform = item_l1c.properties["platform"][-1].capitalize()  # e.g. "A"
    mgrs_tile = item_l1c.properties["s2:mgrs_tile"]  # e.g. "05VMG"
    utm_zone = int(mgrs_tile[0:2])  # e.g. 5
    cell = mgrs_tile[2:]  # e.g. "VMG"
    date = item_l1c.datetime.strftime("%Y%m%d")  # e.g. "20220202"
    
    item_l2a_id = f"S2{platform}_{utm_zone}{cell}_{date}_0_L2A"

    return catalog_l2a.get_item(item_l2a_id, recursive=True)

In [4]:
# configure connection to dCache
stac2dcache.configure(token_filename=MACAROON_PATH)

# read data catalogs
catalog_l1c = _read_catalog(CATALOG_L1C_URL, stac_io=stac2dcache.stac_io)
catalog_l2a = _read_catalog(CATALOG_L2A_URL, stac_io=stac2dcache.stac_io)

In [5]:
# loop over all L1C items
items = []
for item_l1c in catalog_l1c.get_all_items():

    # find out whether there is overlap between item and glacier
    item_geometry = shapely.geometry.shape(item_l1c.geometry)
    aoi_geometry = shapely.wkt.loads(AOI_WKT)
    geometry = item_geometry.intersection(aoi_geometry)

    # if not geometry.is_empty:
    if (geometry.area / aoi_geometry.area) > 0.70:
        
        # create new item to collect output
        item = pystac.Item(
            id=item_l1c.datetime.strftime("S2_%Y-%m-%d"),
            geometry=shapely.geometry.mapping(geometry),
            bbox=geometry.bounds,
            datetime=item_l1c.datetime,
            properties={}
        )

        # get corresponding L2A item
        item_l2a = _get_l2a_item(item_l1c, catalog_l2a)

        # add links to l1c/l2a items for input
        item.add_link(pystac.Link("item-L1C", item_l1c))
        if item_l2a is not None:
            item.add_link(pystac.Link("item-L2A", item_l2a))

        items.append(item)

In [6]:
# save items back to the storage
_save_catalog(
    items, 
    CATALOG_OUT_URL, 
    template=CATALOG_OUT_TEMPLATE,
    stac_io=stac2dcache.stac_io,
)

In [7]:
# # uncomment to clean up
# stac2dcache.fs.api_url = "https://dcacheview.grid.surfsara.nl:22880/api/v1"
# stac2dcache.fs.rm(CATALOG_OUT_URL, recursive=True)