In [1]:
import datetime

In [2]:
import rasterio
from shapely.geometry import Polygon, mapping

In [3]:
import pystac
from pystac.extensions.eo import Band, EOExtension

In [4]:
from pathlib import Path
from py_linq import Enumerable
import json

In [5]:
import numpy as np
import pandas as pd

In [6]:
Path.lsTif = lambda x: Enumerable(x.iterdir()).where(lambda p: p.suffix == '.tiff').to_list()
Path.lsTifName = lambda x: Enumerable(x.iterdir()).where(lambda p: p.suffix == '.tif').select(lambda p: p.name).to_list()
Path.lsTifStem = lambda x: Enumerable(x.iterdir()).where(lambda p: p.suffix == '.tif').select(lambda p: p.stem).to_list()

### Define Root folder and Data folder

In [7]:
rootPath = Path('//mule.sbs.arizona.edu/')/'btellman'/'Projects'/'NASA'/'NIP'/'Data'/'Raster'/'CVPR23'/'CVPR23FractionalInundationHistoryData'

In [8]:
fractionInundatedFolder = 'CVPR23FractionalInundationHistory'

In [9]:
dataPath = rootPath/fractionInundatedFolder
stacPath = rootPath/(fractionInundatedFolder+'STAC')

### Create STAC

Define function to extract bounding box and footprint from open tif and do it for one file

In [10]:
def get_bbox_and_footprint(ds):
    bounds = ds.bounds
    bbox = [bounds.left, bounds.bottom, bounds.right, bounds.top]
    footprint = Polygon([
        [bounds.left, bounds.bottom],
        [bounds.left, bounds.top],
        [bounds.right, bounds.top],
        [bounds.right, bounds.bottom]
    ])

    return (bbox, mapping(footprint))

In [11]:
filePath = dataPath.lsTif()[0]
with rasterio.open(filePath) as ds:
    bbox_footprints = get_bbox_and_footprint(ds)

Load the modis info file

In [12]:
df_modisInfoTimes = pd.read_csv('MODISInfo.csv')

Define band name

In [13]:
sen1_bands =  [
    Band.create(name='b1', description='Fractional Inundated Area')
]

Create function to convert time stamp to date time format

In [14]:
def getDateFromTimeStamp(timeStamp):
    return datetime.datetime.fromtimestamp(timeStamp / 1000.0, tz=datetime.timezone.utc)

Get item from raster URI

In [15]:
def getWaterItem(raster_uri):
    # use stem of file as id
    idx = raster_uri.stem
    
    # get modis info from pandas dataframe
    modisInfo = df_modisInfoTimes[df_modisInfoTimes.Time == int(idx)]
    
    # get bbox and footprint
    bbox, footprint = bbox_footprints
    
    # get dates from modisInfo and convert to datetime object
    date = getDateFromTimeStamp(int(idx))
    startDate = getDateFromTimeStamp(int(modisInfo.StartDate.values[0]))
    endDate = getDateFromTimeStamp(int(modisInfo.EndDate.values[0]))
    
    #create item    
    item = pystac.Item(
        id=idx,
        geometry=footprint,
        bbox=bbox,
        stac_extensions=['https://stac-extensions.github.io/projection/v1.0.0/schema.json'],
        datetime=date,
        start_datetime=startDate,
        end_datetime=endDate,
        properties=dict(
            tile='FractionalInundatedArea_' + idx
        )
    )
    
    # set resolution
    item.common_metadata.gsd = 500
    
    # apply eo extension
    eo = EOExtension.ext(item, add_if_missing=True)
    eo.apply(bands=sen1_bands)
    
    # add asset to item with raster path
    item.add_asset(
        key='FractionalInundatedArea',
        asset=pystac.Asset(
            title='Fractional Inundated Area',
            href= raster_uri.as_posix(),
            media_type=pystac.MediaType.GEOTIFF
        )
    )
    return item

Create catalog

In [16]:
# create catalog
catalog = pystac.Catalog(id='Bangladesh Historical Fractional Inundated Area', description='STAC catalog for Bangladesh Historical Fractional Inundated Area Dataset')

# create modis collection
modisTimeSeriesCollection = pystac.Collection(id="FractionalInundationHistory_ts", title='Fractional Inundation History Time Series', description='Bangladesh Historical Fractional Inundated Area Dataset', extent=bbox_footprints)

# set the collection's catalog
modisTimeSeriesCollection.catalog = catalog

# crawl data folder to get all files
modisFiles = dataPath.lsTif()

# loop over all files
for file in modisFiles:
    # get item for file
    item = getWaterItem(file)
    # set the item's collection
    item.collection = modisTimeSeriesCollection
    # add item to collection
    modisTimeSeriesCollection.add_item(item)

# update the collection's extent from the items
modisTimeSeriesCollection.update_extent_from_items()

# add collection to catalogue
catalog.add_child(modisTimeSeriesCollection)

# normalise all paths relative to stac folder
catalog.normalize_hrefs(str(stacPath))
# make them all relative
catalog.make_all_asset_hrefs_relative()
# save catalog
catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)