# Create STAC Catalog on dCache

## Preliminary Steps

Create macaroon to connect to dCache:

```shell 
get-macaroon --url https://webdav.grid.surfsara.nl:2880/path/to/catalog \
        --user username \
        --permissions DOWNLOAD,UPLOAD,DELETE,MANAGE,LIST,READ_METADATA,UPDATE_METADATA \
        --duration P7D \
        --output rclone token \
        --ip '0.0.0.0/0'
```

## Search

In [1]:
import intake
import satsearch

In [2]:
# search imagery using sat-search
search = satsearch.Search.search(
    url="https://earth-search.aws.element84.com/v0",
    collections=["sentinel-s2-l2a-cogs"],
    datetime="2018-02-25/2018-03-25",
    # query sentinel-2 tile 5VNK
    query=[
        "sentinel:utm_zone=5",
        "sentinel:latitude_band=V",
        "sentinel:grid_square=NK"
    ]
)
items = search.items()

In [3]:
col = intake.open_stac_item_collection(items)

In [4]:
# inspect item collection as a geo-data-frame
gdf = col.to_geopandas()
gdf

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,geometry,datetime,platform,constellation,instruments,gsd,view:off_nadir,proj:epsg,sentinel:utm_zone,sentinel:latitude_band,sentinel:grid_square,sentinel:sequence,sentinel:product_id,sentinel:data_coverage,eo:cloud_cover,sentinel:valid_cloud_cover,created,updated,data_coverage
0,"POLYGON ((-152.51967 62.14316, -153.00036 62.1...",2018-03-25T21:55:23Z,sentinel-2b,sentinel-2,[msi],10,0,32605,5,V,NK,1,S2B_MSIL2A_20180325T215529_N0001_R029_T05VNK_2...,43.51,0.0,False,2020-09-28T19:40:26.599Z,2020-09-28T19:40:26.599Z,
1,"POLYGON ((-150.89497 62.12799, -153.00036 62.1...",2018-03-24T21:36:36Z,sentinel-2a,sentinel-2,[msi],10,0,32605,5,V,NK,0,S2A_MSIL2A_20180324T213531_N0001_R086_T05VNK_2...,99.25,59.91,True,2020-08-31T01:44:22.133Z,2020-08-31T01:44:22.133Z,
2,"POLYGON ((-150.89497 62.12799, -153.00036 62.1...",2018-03-22T21:45:24Z,sentinel-2b,sentinel-2,[msi],10,0,32605,5,V,NK,0,S2B_MSIL2A_20180322T214529_N0001_R129_T05VNK_2...,100.0,67.66,True,2020-09-28T20:07:22.943Z,2020-09-28T20:07:22.943Z,
3,"POLYGON ((-150.89497 62.12799, -153.00036 62.1...",2018-03-19T21:35:22Z,sentinel-2b,sentinel-2,[msi],10,0,32605,5,V,NK,0,S2B_MSIL2A_20180319T213519_N0001_R086_T05VNK_2...,99.18,65.88,True,2020-09-08T08:46:42.791Z,2020-09-08T08:46:42.791Z,99.18
4,"POLYGON ((-152.51070 62.14313, -153.00036 62.1...",2018-03-15T21:55:21Z,sentinel-2b,sentinel-2,[msi],10,0,32605,5,V,NK,0,S2B_MSIL2A_20180315T215519_N0001_R029_T05VNK_2...,43.83,44.18,True,2020-08-31T00:27:58.372Z,2020-08-31T00:27:58.372Z,
5,"POLYGON ((-150.89497 62.12799, -153.00036 62.1...",2018-03-14T21:36:04Z,sentinel-2a,sentinel-2,[msi],10,0,32605,5,V,NK,0,S2A_MSIL2A_20180314T213521_N0001_R086_T05VNK_2...,99.18,71.62,True,2020-09-28T04:53:53.672Z,2020-09-28T04:53:53.672Z,99.18
6,"POLYGON ((-152.54218 62.14323, -153.00036 62.1...",2018-03-10T21:55:24Z,sentinel-2a,sentinel-2,[msi],10,0,32605,5,V,NK,0,S2A_MSIL2A_20180310T215521_N0001_R029_T05VNK_2...,42.47,68.59,True,2020-09-08T12:20:44.485Z,2020-09-08T12:20:44.485Z,42.47
7,"POLYGON ((-150.89497 62.12799, -153.00036 62.1...",2018-03-07T21:45:26Z,sentinel-2a,sentinel-2,[msi],10,0,32605,5,V,NK,0,S2A_MSIL2A_20180307T214531_N0001_R129_T05VNK_2...,100.0,40.26,True,2020-08-31T01:46:15.040Z,2020-08-31T01:46:15.040Z,
8,"POLYGON ((-152.53328 62.14320, -153.00036 62.1...",2018-03-05T21:55:19Z,sentinel-2b,sentinel-2,[msi],10,0,32605,5,V,NK,0,S2B_MSIL2A_20180305T215519_N0001_R029_T05VNK_2...,42.77,0.0,True,2020-09-08T08:56:12.672Z,2020-09-08T08:56:12.672Z,42.77
9,"POLYGON ((-150.89497 62.12799, -153.00036 62.1...",2018-03-04T21:37:24Z,sentinel-2a,sentinel-2,[msi],10,0,32605,5,V,NK,0,S2A_MSIL2A_20180304T213531_N0001_R086_T05VNK_2...,99.38,46.6,True,2020-09-28T09:25:40.513Z,2020-09-28T09:25:40.513Z,99.38


## Create Catalog

In [5]:
import copy
import pathlib
import configparser
import pystac

from pystac import Catalog, Collection, Item, Asset

In [6]:
# assets than will be linked within the catalog
assets = ('metadata', 'B01')

In [7]:
# create catalog
catalog = Catalog(
    id='Eratosthenes', 
    description='Catalog for the Eratosthenes GO project'
)
catalog

<Catalog id=Eratosthenes>

In [8]:
def _get_property_value(item, property_name):
    """ 
    Return the property value given an item 
    and the propery name. If the property is 
    not available, return the property name.
    """
    return str(item.properties.get(property_name,
                                   property_name))

In [9]:
# define subcatalog template to organize items in the collection
item_path_template='${sentinel:utm_zone}/${sentinel:latitude_band}/${sentinel:grid_square}/'
item_path_template = pathlib.Path(item_path_template)

In [10]:
item_path_template.parts

('${sentinel:utm_zone}',
 '${sentinel:latitude_band}',
 '${sentinel:grid_square}')

In [11]:
for item in items:
    
    # check whether collection is present in catalog
    collection = catalog.get_child(item.collection().id)
    
    if collection is None:
        # if not, create collection entry
        collection = Collection.from_dict(
            item.collection()._data
        )
        
        catalog.add_child(collection)

    # create nested subcatalog structure
    parent = collection
    for part in item_path_template.parts:
        prop_name = part.strip('${}')
        prop_value = _get_property_value(item, prop_name)
        subcatalog = parent.get_child(prop_value)
        if subcatalog is None:
            subcatalog = Catalog(
                id=prop_value,
                description=f"{prop_name} catalog"
            )
            parent.add_child(subcatalog)
        parent = subcatalog
        
    # check whether element is present in the collection
    item_local = parent.get_item(item.id)
    if item_local is None:
        # if not, first strip assets links
        item_dict = copy.deepcopy(item._data)
        item_dict.update({'assets': {}})
        # then create item entry
        item_local = Item.from_dict(item_dict)
        parent.add_item(item_local)

    # check whether asset is available
    for asset in assets:
        if asset not in item_local.assets:
            # if not add asset to item
            item_local.add_asset(
                key=asset,
                asset=Asset.from_dict(item.asset(asset))
            )

In [12]:
# catalog is ready!
catalog.describe()

* <Catalog id=Eratosthenes>
    * <Collection id=sentinel-s2-l2a-cogs>
        * <Catalog id=5>
            * <Catalog id=V>
                * <Catalog id=NK>
                  * <Item id=S2B_5VNK_20180325_1_L2A>
                  * <Item id=S2A_5VNK_20180324_0_L2A>
                  * <Item id=S2B_5VNK_20180322_0_L2A>
                  * <Item id=S2B_5VNK_20180319_0_L2A>
                  * <Item id=S2B_5VNK_20180315_0_L2A>
                  * <Item id=S2A_5VNK_20180314_0_L2A>
                  * <Item id=S2A_5VNK_20180310_0_L2A>
                  * <Item id=S2A_5VNK_20180307_0_L2A>
                  * <Item id=S2B_5VNK_20180305_0_L2A>
                  * <Item id=S2A_5VNK_20180304_0_L2A>
                  * <Item id=S2A_5VNK_20180228_0_L2A>
                  * <Item id=S2A_5VNK_20180225_0_L2A>


## Write Catalog - Local

In [13]:
# save the catalog locally
catalog.normalize_and_save(
    root_href='catalog-pystac',
    catalog_type=pystac.CatalogType.SELF_CONTAINED
)

## Write Catalog - dCache

In [14]:
import requests
import urllib

In [15]:
class dCacheIO(object):
    def __init__(self, url, token):
        self.url = url
        self.token = token
        
    def get_headers(self):
        return dict(
            Authorization=f'Bearer {self.token}'
        )
    
    def read(self, uri):
        response = requests.get(url=uri, 
                                headers=self.get_headers())
        return response.text

    def write(self, uri, text):
        parsed = urllib.parse.urlparse(uri)
        parent_uri = urllib.parse.urlunparse(
            (parsed.scheme, 
             parsed.netloc, 
             pathlib.Path(parsed.path).parent.as_posix(),   
             parsed.params,
             parsed.query,
             parsed.fragment)
        )
        # NOTE: the following will return 405 if dir exists!
        response = requests.request('MKCOL',
                                    url=parent_uri,
                                    headers=self.get_headers())
        # if response.status_code == 405: pass
        response = requests.put(url=uri,
                                data=text,
                                headers=self.get_headers())
    
    @classmethod
    def from_config_file(cls, filename):
        filepath = pathlib.Path(filename)
        config = configparser.ConfigParser()
        config.read(filepath)
        url = config[filepath.stem]['url']
        token = config[filepath.stem]['bearer_token']
        return dCacheIO(url=url, token=token)
    

In [16]:
# configure dCache reader/writer using the generated token
dcache_io = dCacheIO.from_config_file('token.conf')

In [17]:
# overwrite the PySTAC read/write methods
pystac.STAC_IO.read_text_method = dcache_io.read
pystac.STAC_IO.write_text_method = dcache_io.write

In [18]:
url = "https://webdav.grid.surfsara.nl:2880/path/to/catalog/catalog-pystac"

In [19]:
# save self-contained catalog on dCache
catalog.normalize_and_save(
    root_href=url,
    catalog_type=pystac.CatalogType.SELF_CONTAINED
)

In [22]:
# read it again from dCache
url_catalog = "https://webdav.grid.surfsara.nl:2880/path/to/catalog/catalog-pystac/catalog.json"
cat = Catalog.from_file(url_catalog)
cat

<Catalog id=Eratosthenes>

In [23]:
# voila!
for root, _, itms in cat.walk():
    print(root)
    for item in itms:
        print(item)

<Catalog id=Eratosthenes>
<Collection id=sentinel-s2-l2a-cogs>
<Catalog id=5>
<Catalog id=V>
<Catalog id=NK>
<Item id=S2B_5VNK_20180325_1_L2A>
<Item id=S2A_5VNK_20180324_0_L2A>
<Item id=S2B_5VNK_20180322_0_L2A>
<Item id=S2B_5VNK_20180319_0_L2A>
<Item id=S2B_5VNK_20180315_0_L2A>
<Item id=S2A_5VNK_20180314_0_L2A>
<Item id=S2A_5VNK_20180310_0_L2A>
<Item id=S2A_5VNK_20180307_0_L2A>
<Item id=S2B_5VNK_20180305_0_L2A>
<Item id=S2A_5VNK_20180304_0_L2A>
<Item id=S2A_5VNK_20180228_0_L2A>
<Item id=S2A_5VNK_20180225_0_L2A>
