### `rs-client-libraries` installation

In [100]:
import rs_client
import rs_common
import rs_workflows

# Set logger level to info
import logging
rs_common.logging.Logging.level = logging.INFO

### Environment

In [101]:
import os

# In local mode, all your services are running locally.
# In hybrid or cluster mode, we use the services deployed on the RS-Server website.
# This configuration is set in an environment variable.
local_mode = (os.getenv("RSPY_LOCAL_MODE") == "1")

# In local mode, the service URLs are hardcoded in the docker-compose file
if local_mode:
    rs_server_href = None # not used
    RSPY_HOST_AUXIP = "http://localhost:8001/docs"
    RSPY_HOST_CADIP = "http://localhost:8002/docs"
    RSPY_HOST_CATALOG = "http://localhost:8003/catalog/api.html"

# In hybrid or cluster mode, they are set in an environment variables
else:
    rs_server_href = os.environ["RSPY_WEBSITE"]

# Create client + S3 bucket storage

In [102]:
import json
from rs_client.rs_client import RsClient
from rs_common.config import ECadipStation

# Create client instances
generic_client = RsClient(rs_server_href, rs_server_api_key=None, owner_id=None, logger=None)
print(f"STAC catalog owner: {generic_client.owner_id!r}")

# From this generic instance, get an Auxip client instance
auxip_client = generic_client.get_auxip_client()

# Or get a Cadip client instance. Pass the cadip station.
cadip_station = ECadipStation.CADIP # you can also have: INS, MPS, MTI, NSG, SGS
cadip_client = generic_client.get_cadip_client(cadip_station)

# Or get a Stac client to access the catalog
stac_client = generic_client.get_stac_client()

# Create S3 bucket storage
# We use these bucket names that are deployed on the cluster. 
# RS-Server has read/write access to these buckets, but as an end-user, you won't manipulate them directly.
RSPY_TEMP_BUCKET = os.environ["RSPY_TEMP_BUCKET"]
RSPY_CATALOG_BUCKET = os.environ["RSPY_CATALOG_BUCKET"]
print(f"Temporary bucket: {RSPY_TEMP_BUCKET!r}")
print(f"Final bucket: {RSPY_CATALOG_BUCKET!r}")

# Except in local mode, where we use a local MinIO object storage instance.
# We need to manually create the buckets.
if local_mode:
    from resources.utils import create_s3_buckets
    create_s3_buckets()

STAC catalog owner: 'jovyan'
Temporary bucket: 'rs-cluster-temp'
Final bucket: 'rs-cluster-catalog'


In [103]:
# In hybrid or cluster mode, show information from the user account
if not local_mode:
    print(f"User login: {generic_client.oauth2_user_login!r}")
    iam_roles = "\n".join (sorted (generic_client.oauth2_iam_roles))
    print(f"\nAPI key IAM roles: \n{iam_roles}")

# Create collections and items

In [104]:
# Stage files
from datetime import datetime
import json
from time import sleep
from rs_common.config import EDownloadStatus, EPlatform

# Define a search interval
start_date = datetime(2010, 1, 1, 12, 0, 0)
stop_date = datetime(2024, 1, 2, 16, 0, 0)

# We use this bucket name that is deployed on the cluster. 
# RS-Server has read/write access to this bucket, but as an end-user, you won't manipulate it directly.
RSPY_TEMP_BUCKET = os.environ["RSPY_TEMP_BUCKET"]

In [105]:
# Stage files from auxip and cadip stations
temp_s3_files = []
for client in [auxip_client]: ###cadip_client
    files = client.search_stations(start_date, stop_date, limit=2)
    ###assert len(files) == 1
    file_id_list = [x["id"] for x in files]
    s3_path = f"s3://{RSPY_TEMP_BUCKET}/{client.owner_id}/{client.station_name}"

    for file_id in file_id_list:
        temp_s3_files.append (f"{s3_path}/{file_id}") # save it for later
        local_path = None
        client.staging(file_id, s3_path=s3_path, tmp_download_path=local_path)
        while True:
            status = client.staging_status(file_id)
            print (f"Staging status for {file_id!r}: {status.value}")
            if status in [EDownloadStatus.DONE, EDownloadStatus.FAILED]:
                print("\n")
                break
            sleep(1)        
        assert status == EDownloadStatus.DONE, "Staging has failed"

Staging status for 'S2__OPER_AUX_ECMWFD_PDMC_20200216T120000_V20190217T090000_20190217T210000.TGZ': IN_PROGRESS
Staging status for 'S2__OPER_AUX_ECMWFD_PDMC_20200216T120000_V20190217T090000_20190217T210000.TGZ': DONE


Staging status for 'S2__OPER_AUX_ECMWFD_PDMC_20190216T120000_V20190217T090000_20190217T210000.TGZ': IN_PROGRESS
Staging status for 'S2__OPER_AUX_ECMWFD_PDMC_20190216T120000_V20190217T090000_20190217T210000.TGZ': DONE




In [106]:
# Create 2 new collections
from pystac import Collection, Extent, SpatialExtent, TemporalExtent

COLLECTION_LIST = ["S1_L1", "S2_L1"]
# Clean the existing collection, if any
for collection in COLLECTION_LIST:
    stac_client.remove_collection(collection)

for collection in COLLECTION_LIST:
    response = stac_client.add_collection(
        Collection(
            id=collection,
            description=None, # rs-client will provide a default description for us
            extent=Extent(
                spatial=SpatialExtent(bboxes=[-180.0, -90.0, 180.0, 90.0]),
                temporal=TemporalExtent([start_date, stop_date])
            )
        ))
    response.raise_for_status()

# See all my personal catalog collections
for collection in stac_client.get_collections():
    print(f"I have collection: {collection} at {collection.self_href}")

I have collection: <CollectionClient id=jovyan_S1_L1> at http://rs-server-catalog:8000/catalog/collections/jovyan:S1_L1
I have collection: <CollectionClient id=jovyan_S2_L1> at http://rs-server-catalog:8000/catalog/collections/jovyan:S2_L1


In [107]:
from pystac.asset import Asset
from pystac.item import Item

# Simulated values
WIDTH=2500
HEIGHT=2500

# We will add one Auxip and one Cadip file that were staged from the previous notebook
for i, temp_s3_file in enumerate(temp_s3_files):
    print(f"Add catalog item from: {temp_s3_file!r}")
    item_id = os.path.basename(temp_s3_file)
    assets = {temp_s3_file.split("/")[-1]: Asset(href=temp_s3_file)}
    # Other hardcoded parameters for this demo
    geometry = {
        "type": "Polygon",
        "coordinates": [[[-180, -90], [180, -90], [180, 90], [-180, 90], [-180, -90]]],
    }
    bbox = [-180.0, -90.0, 180.0, 90.0]
    now = datetime.now()
    properties = {
        "gsd": 0.12345,
        "width": WIDTH,
        "height": HEIGHT,
        "datetime": datetime.now(),
        "proj:epsg": 3857,
        "orientation": "nadir",
    }

    # Add item to the STAC catalog collection, check status is OK
    # NOTE: in future versions, this pystac Item object will be returned automatically by rs-client-libraries.
    item = Item(
        id=item_id,
        geometry=geometry,
        bbox=bbox,
        datetime=now,
        properties=properties,
        assets=assets)
    response = stac_client.add_item(COLLECTION_LIST[i], item)
    response.raise_for_status()

Add catalog item from: 's3://rs-cluster-temp/jovyan/AUXIP/S2__OPER_AUX_ECMWFD_PDMC_20200216T120000_V20190217T090000_20190217T210000.TGZ'
Add catalog item from: 's3://rs-cluster-temp/jovyan/AUXIP/S2__OPER_AUX_ECMWFD_PDMC_20190216T120000_V20190217T090000_20190217T210000.TGZ'


### Search endpoint using multiple collections + implicit naming mechanism

![title](resources/search_demo.png)

- Possibility to specify or not the owner_id at the beginning  of the collection name or you can specify the owner_id in the filter
- First check to see if collection exist -> if not concatenate the owner_id to the collection name
- If the owner id is neither defined in the collection name nor in the filter, the default owner_id is selected

**Case 1:** you can choose to specify the owner_id directly in the name of the collection. If the collection exists, the items validating the search request will be returned:

In [116]:
# Search collection - POST method - owner_id specified in the name of the collection

WIDTH=2500
HEIGHT=2500

my_collections = ['jovyan_S1_L1', 'jovyan_S2_L1']
filter_on_dimensions = {
    "op": "and",
    "args": [
        {"op": "=", "args" : [{"property": "orientation"}, "nadir"]},
        {"op": "=", "args" : [{"property": "width"}, WIDTH]},
        {"op": "=", "args" : [{"property": "height"}, HEIGHT]},
    ]
}

try:
    search = stac_client.search(method='POST', filter_lang="cql2-json", filter=filter_on_dimensions, collections=my_collections)
    results = list(search.items_as_dicts())
    assert results, f"There should be at least one item for width={WIDTH} height={HEIGHT}"
    print(f"\nFound {len(results)} results for width={WIDTH} height={HEIGHT}")
    for result in results:
        print(f"({result['collection']}) {result['id']}")
except Exception as e:
    print(f"Error: {e}")

Error: HTTPConnectionPool(host='rs-server-catalog', port=8000): Read timed out. (read timeout=30)


**Case 2:** If the user is not specified in the name of the collection but is specified in the filter, the catalog will first check if the collection exists as it is defined. If not, the owner_id defined in the filter will be automaticlly concatenated to the name of the collection, and the search process will be applied using this new concatenated name.
In the example below, we have one collection name that contains the owner_id and one that doesn't, and we can see that we are still able to retrieve the same number of items in the result.

In [109]:
# Search collection - POST method - owner_id specified both in one of the collection name and in the filter
WIDTH=2500
HEIGHT=2500

my_collections = ['jovyan_S1_L1', 'S2_L1']
filter_on_dimensions = {
    "op": "and",
    "args": [
        {"op": "=", "args" : [{"property": "owner"}, "jovyan"]},
        {"op": "=", "args" : [{"property": "orientation"}, "nadir"]},
        {"op": "=", "args" : [{"property": "width"}, WIDTH]},
        {"op": "=", "args" : [{"property": "height"}, HEIGHT]},
    ]
}
try:
    search = stac_client.search(method='POST', filter_lang="cql2-json", filter=filter_on_dimensions, collections=my_collections)
    results = list(search.items_as_dicts())
    assert results, f"There should be at least one item for width={WIDTH} height={HEIGHT}"
    print(f"\nFound {len(results)} results for width={WIDTH} height={HEIGHT}")
    for result in results:
        print(f"({result['collection']}) {result['id']}")
except Exception as e:
    print(f"Error: {e}")


Found 2 results for width=2500 height=2500
(S2_L1) S2__OPER_AUX_ECMWFD_PDMC_20190216T120000_V20190217T090000_20190217T210000.TGZ
(S1_L1) S2__OPER_AUX_ECMWFD_PDMC_20200216T120000_V20190217T090000_20190217T210000.TGZ


**Case 3:** If the user is neither specified in the name of the collection nor in the filter, the catalog first check for each collection if it exists, and if it is not the case
the default owner id (local/cluster user login according to the current mode) is concatenated  to the name of the collection (here "user"). In our example, there are no collection belonging 
to the owner "user" so we will get an error in this case. 

In [110]:
# If owner_id is not specified -> take the default one: here "user"

# Search collection - POST method
WIDTH=2500
HEIGHT=2500

my_collections = ['S1_L1', 'S2_L1']
filter_on_dimensions = {
    "op": "and",
    "args": [
        {"op": "=", "args" : [{"property": "orientation"}, "nadir"]},
        {"op": "=", "args" : [{"property": "width"}, WIDTH]},
        {"op": "=", "args" : [{"property": "height"}, HEIGHT]},
    ]
}
try:
    search = stac_client.search(method='POST', filter_lang="cql2-json", filter=filter_on_dimensions, collections=my_collections)
    results = list(search.items_as_dicts())
    assert results, f"There should be at least one item for width={WIDTH} height={HEIGHT}"
    print(f"\nFound {len(results)} results for width={WIDTH} height={HEIGHT}")
    for result in results:
        print(f"({result['collection']}) {result['id']}")
except Exception as e:
    print(f"Error: {e}")

Error: {"error":"Collection user_S1_L1 not found."}


The same tests can be done with the GET method. Here is an example:

In [111]:
# Search collection - GET method
filter_on_dimensions = "width=2500 AND owner='jovyan'"
collections = "S1_L1,toto_S2_L1"

search = stac_client.search(method='GET', filter_lang="cql2-text", filter=filter_on_dimensions, collections=my_collections)
results = list(search.items_as_dicts())
assert results, f"There should be at least one item for width={WIDTH} height={HEIGHT}"
print(f"\nFound {len(results)} results for width={WIDTH} height={HEIGHT}")
for result in results:
    print(f"({result['collection']}) {result['id']}")


Found 2 results for width=2500 height=2500
(S2_L1) S2__OPER_AUX_ECMWFD_PDMC_20190216T120000_V20190217T090000_20190217T210000.TGZ
(S1_L1) S2__OPER_AUX_ECMWFD_PDMC_20200216T120000_V20190217T090000_20190217T210000.TGZ
