In [2]:
import datetime
import dcachefs
import getpass
import os
import pyproj
import pystac
import stac2webdav
import tqdm

from pystac import Catalog, Collection, Item, Asset
from shapely.geometry import Polygon, MultiPolygon, mapping, shape
from shapely.ops import transform
from stac2webdav.drivers import RasterioDriver
from stac2webdav.utils import catalog2geopandas

ModuleNotFoundError: No module named 'stac2webdav'

In [2]:
# configure pystac to write to dCache
dcache_fs = stac2webdav.configure(
    filesystem="dcache",
    token_filename="./iceshelf_2021-01-27_365D_francesco.conf",
)

In [3]:
# path to the tiles
tiles_path = f"/pnfs/grid.sara.nl/data/iceshelf/disk/S2_composite_2019-11-1_2020-3-1/*.tif"
tiles_path = f"/pnfs/grid.sara.nl/data/iceshelf/disk/S1_mosaic_2020-01-01_2020-02-01/*.tif"

# set URL to be able to list via API
dcache_fs.api_url = "https://dcacheview.grid.surfsara.nl:22880/api/v1/"
tiles = dcache_fs.glob(tiles_path, detail=False)
len(tiles)
tiles[0:10]

NameError: name 'dcache_fs' is not defined

In [5]:
# for mosaics, define earliest/latest datetimes of original images
start_datetime = datetime.datetime.fromisoformat("2019-11-01")
end_datetime = datetime.datetime.fromisoformat("2020-03-01") 

# need WebDAV door to set asset paths 
webdav_url="https://webdav.grid.surfsara.nl:2880"

# images contain four bands: visible (B2, B3, B4) and NIR (B11)  
asset_id = "B2-B3-B4-B11"

# we use WGS84 in the catalog, which is the standard for GeoJSON 
transformer = pyproj.Transformer.from_crs(
    "EPSG:3031",
    "WGS84",
    always_xy=True
)

In [6]:
items = {}

for tile in tqdm.tqdm(tiles):
    
    _, filename = os.path.split(tile)
    item_id, _ = os.path.splitext(filename)
    
    asset_uri = f"{webdav_url}{tile}"

    # read raster data to extract geo-info
    driver = RasterioDriver(uri=asset_uri)
    driver.set_filesystem(dcache_fs)
    bands = driver.get()

    # create Asset object
    asset = Asset(
        href=asset_uri,  # link to asset
        title=", ".join(bands.attrs['long_name']),
        media_type=pystac.MediaType.GEOTIFF # or COG - verify e.g. with with https://github.com/rouault/cog_validator 
    )

    # use bbox as footprint geometry (composites should fill the tiles)
    polygon = Polygon.from_bounds(*bands.rio.bounds())
    polygon = transform(transformer.transform, polygon)
    bbox = polygon.bounds
    geometry = mapping(polygon)
    
    # create Item object
    item = Item(
        id=item_id,
        geometry=geometry,
        bbox=bbox,
        datetime=None,  
        properties=dict(
            start_datetime=start_datetime.isoformat(),
            end_datetime=end_datetime.isoformat()   
        )
    )
    item.add_asset(asset_id, asset)

    items[item_id] = item

100%|██████████| 312/312 [04:13<00:00,  1.23it/s]


In [7]:
# spatial extent
footprints = (shape(i.geometry).envelope for i in items.values())
collection_bbox = MultiPolygon(footprints).bounds
spatial_extent = pystac.SpatialExtent(bboxes=[collection_bbox])

# temporal extent
start = (i.properties.get('start_datetime', item.datetime) for i in items.values())
start = sorted(start)[0]
end = (i.properties.get('end_datetime', item.datetime) for i in items.values())
end = sorted(end)[-1]
temporal_extent = pystac.TemporalExtent(
    intervals=[[datetime.datetime.fromisoformat(start), 
                datetime.datetime.fromisoformat(end)]]
)

extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)

In [8]:
# create Collection object
collection_id="2019-11-1_2020-3-1"

collection = Collection(
    id=collection_id,
    description="Composite Sentinel-2 images for time span 2019-11-01/2020-03-01",
    extent=extent,
)

collection.add_items(items.values())
collection

<Collection id=2019-11-1_2020-3-1>

In [9]:
# create STAC catalog
catalog_id = "S2_composite"

catalog = Catalog(
    id=catalog_id,
    description="Composite Sentinel-2 images generated using GEE"
)

catalog.add_child(collection)

In [10]:
# save it on dCache
catalog.normalize_and_save(
    f"{webdav_url}/pnfs/grid.sara.nl/data/iceshelf/disk/S2_composite_catalog", 
    catalog_type="SELF_CONTAINED"
)

In [11]:
# convert it to GeoDataFrame for inspection
gdf = catalog2geopandas(catalog)
gdf

Unnamed: 0,geometry,start_datetime,end_datetime,datetime
0,"POLYGON ((-63.43559 -64.67813, -61.18918 -64.1...",2019-11-01T00:00:00,2020-03-01T00:00:00,
1,"POLYGON ((-61.18988 -64.16869, -59.03628 -63.6...",2019-11-01T00:00:00,2020-03-01T00:00:00,
2,"POLYGON ((-60.94604 -66.62668, -58.57037 -66.0...",2019-11-01T00:00:00,2020-03-01T00:00:00,
3,"POLYGON ((-53.13093 -78.53210, -48.81395 -77.8...",2019-11-01T00:00:00,2020-03-01T00:00:00,
4,"POLYGON ((-48.81493 -77.81454, -45.00000 -77.0...",2019-11-01T00:00:00,2020-03-01T00:00:00,
...,...,...,...,...
307,"POLYGON ((-82.87520 -80.74381, -75.96362 -80.5...",2019-11-01T00:00:00,2020-03-01T00:00:00,
308,"POLYGON ((-75.96416 -80.53354, -69.44402 -80.1...",2019-11-01T00:00:00,2020-03-01T00:00:00,
309,"POLYGON ((-69.44452 -80.19327, -63.43472 -79.7...",2019-11-01T00:00:00,2020-03-01T00:00:00,
310,"POLYGON ((-63.43564 -79.73618, -57.99457 -79.1...",2019-11-01T00:00:00,2020-03-01T00:00:00,


In [12]:
# save a local copy of the catalog as well
catalog.normalize_and_save(
    "./S2_composite_catalog",
    catalog_type="SELF_CONTAINED"
)