## This demo showcases the implementation of user story 342

All initiated jobs for the staging process should be stored in a persistent PostgreSQL database. This ensures that even if the rs-server-staging pod is restarted, users can still check the status of all previously submitted jobs.

In [None]:
import requests
import os
import pprint
# Init environment before running a demo notebook.
from resources.utils import *

def update_href(data, new_href, pos = None):
    id = 0
    for feature in data.get('features', []):
        id += 1
        if pos and (id - 1) != pos:
            continue
        assets = feature.get('assets', {})
        for key, asset in assets.items():
            if 'href' in asset:
                asset['href'] = new_href
    return data

pp = pprint.PrettyPrinter(indent=2, width=80, sort_dicts=False, compact=True)
session = requests.Session()
user = os.environ["JUPYTERHUB_USER"] if cluster_mode else os.environ["RSPY_HOST_USER"]
auxip_client, cadip_client, stac_client, staging_client = init_demo(owner_id = user)
if os.getenv("RSPY_LOCAL_MODE") == "1":
    href = "http://rs-server-cadip:8000"
    href_adgs = "http://rs-server-adgs:8000"
    href_staging = "http://rs-server-staging:8000"
else:
    href = href_adgs = os.environ["RSPY_WEBSITE"]
    href_staging = "https://rsserverstaging.dev-rspy.esa-copernicus.eu"
    session.cookies.set ("session", os.environ["RSPY_OAUTH2_COOKIE"])

cadip_collection_id = "cadip"
adgs_collection_id = "adgs"
TIMEOUT = 10


In [None]:
# Create a test collection 
collection = create_test_collection()

### Check the newly created collection with the rs-server-catalog. It should be empty.

In [None]:
# Check the catalog for agrosu_my_test_collection
result = session.get(f"{stac_client.href_catalog}/catalog/collections/{user}:{TEST_COLLECTION}/items")
catalog_collection = result.json()
assert catalog_collection.get("type") == "FeatureCollection"
print(f"Number of items found in the '{TEST_COLLECTION}' collection: {catalog_collection.get('context').get('returned')}")

### Getting all the sessions from the cadip_s1A collection found in the configuration of CADIP station

In [None]:
result = session.get(f"{href}/cadip/collections/{cadip_collection_id}/items")
items_collection = result.json()
assert items_collection.get("type") == "FeatureCollection"
assert len(items_collection.get("features")) > 0
for item in items_collection.get("features"):
    print(f"Session {item.get('id')} has {len(item.get('assets'))} assets with datetime {item.get('properties').get('datetime')}")

### Getting all the assets from the aux_interval collection found in the configuration of ADGS station

We will deliberately alter the value of a download link for one of the files, causing the staging process to fail at a certain point.

In [None]:
result = session.get(f"{href_adgs}/auxip/collections/{adgs_collection_id}/items")
items_collection_adgs = result.json()
assert items_collection_adgs.get("type") == "FeatureCollection"
assert len(items_collection_adgs.get("features")) > 0
# alter an asset, expect the staging process to fail afterwards for auxip
items_collection_adgs = update_href(items_collection_adgs, 'http://fake-auxip', 2)
#pprint.PrettyPrinter(indent=4).pprint(items_collection_adgs)
for item in items_collection_adgs.get("features"):
    print(f"AUXIP asset {item.get('id')} has datetime {item.get('properties').get('datetime')}")

### Check the jobs table

In [None]:
import requests
post_response = session.get(f"{href_staging}/jobs",                               
                              timeout = TIMEOUT,)

jobs = json.loads(post_response.content)
pprint.PrettyPrinter(indent=4).pprint(jobs)
if jobs.get("numberMatched") > 0:
    delete_jobs = True
    if os.getenv("RSPY_LOCAL_MODE") != "1":
        delete_jobs = input(f"There are {jobs.get('numberMatched')} jobs in the table. Do you want to delete them all (y/n)?").lower().strip() == 'y'
    if delete_jobs:
        print("Deleting all the jobs...")
        for job in jobs.get("jobs"):
            post_response = session.delete(f"{href_staging}/jobs/{job.get('identifier')}",                               
                                  timeout = TIMEOUT,)
        post_response = session.get(f"{href_staging}/jobs",                               
                                  timeout = TIMEOUT,)
    
        jobs = json.loads(post_response.content)
        pprint.PrettyPrinter(indent=4).pprint(jobs)

### Starting 2 staging processes, one from the CADIP station and one from the ADGS station
The staging process from the ADGS station is expected to fail because one of the assets contains an incorrect download link for the file.

In [None]:
for items in [items_collection, items_collection_adgs]:
    staging_body = {
        "version": "0.2.0",
        "id": "staging",
        "title": {
            "en": "Staging"
        },
        "description": {
            "en": "A process that takes an external STAC ItemCollection, asynchronously download its assets into the RS catalog bucket and creates the corresponding STAC items in the RS catalog."
        },
        "jobControlOptions": [
            "async-execute"
        ],
        "keywords": [
            "stac",
            "staging"
        ],
        "links": [
            {
                "type": "text/html",
                "rel": "about",
                "title": "documentation",
                "href": "https://home.rs-python.eu/rs-documentation/rs-server/docs/doc/users/functionalities/#staging",
                "hreflang": "en-US"
            }
        ],
        "inputs": {
            "collection": {
                "title": "Target collection",
                "description": "The target collection identifier in the RS catalog",
                "id": TEST_COLLECTION,
                "schema": {
                    "type": "string"
                },
                "minOccurs": 1,
                "maxOccurs": 1
            },
            "items": items,
            "provider": "cadip"
        },
        "outputs": {
            "result": {
                "title": "Output STAC items",
                "id": "some_output_id",
                "description": "The staged STAC ItemCollection",
                "schema": "false",
                "minOccurs": 1,
                "maxOccurs": 1
            }
        }
    }
    
    
    post_response = session.post(f"{href_staging}/processes/staging/execution", 
                                  json=staging_body,                              
                                  timeout = TIMEOUT,)
    
    resp = json.loads(post_response.content)
    pprint.PrettyPrinter(indent=4).pprint(resp)
    assert post_response.status_code == 200
    if not resp["status"].get("started", None):        
        assert False, "Check the collection"    
    
    job_id = resp["status"]["started"]
    print(f"\nThe job_id = {job_id} started\n")
    
    import time
    timeout = 120
    while timeout > 0:
        post_response = requests.get(f"{href_staging}/jobs/{job_id}",
                                  **apikey_headers,
                                  timeout = TIMEOUT,)
        try:
            resp = json.loads(post_response.content)
            pprint.PrettyPrinter(indent=4).pprint(resp)
            print("\n")
            if resp["status"] == "FINISHED":
                print("Job COMPLETED\n")
                break
                
            if resp["status"] == "FAILED":
                print("Job FAILED")
                break
        except (    
                json.JSONDecodeError,
            ):        
            continue
        time.sleep(2)
        timeout -= 2

### Check the my_test_collection, it should have five items now

In [None]:
# Check the catalog for agrosu_my_test_collection
result = session.get(f"{stac_client.href_catalog}/catalog/collections/{TEST_COLLECTION}/items")
catalog_collection = result.json()
assert catalog_collection.get("type") == "FeatureCollection"
assert len(catalog_collection.get("features")) == 5
for item in catalog_collection.get("features"):
    print(f"Item {item.get('id')} has {len(item.get('assets'))} assets")    

### Check the jobs table

In [None]:
import requests
post_response = session.get(f"{href_staging}/jobs",                               
                              timeout = TIMEOUT,)

jobs = json.loads(post_response.content)
pprint.PrettyPrinter(indent=4).pprint(jobs)
print("\n\nSummary")
for job in jobs.get("jobs"):    
    post_response = session.get(f"{href_staging}/jobs/{job['identifier']}/results",
                                timeout = TIMEOUT,)
    print(f"{job['identifier']} status is {json.loads(post_response.content)}")

### Delete the whole collection

In [None]:
result = session.delete(f"{stac_client.href_catalog}/catalog/collections/{TEST_COLLECTION}")
assert result.json()["deleted collection"] == TEST_COLLECTION
pp.pprint(result.json())