## This demo showcases the implementation of user story 530

All initiated jobs for the staging process should be stored in a persistent PostgreSQL database. This ensures that even if the rs-server-staging pod is restarted, users can still check the status of all previously submitted jobs.

In [None]:
import requests
import os
import pprint
import time
# Init environment before running a demo notebook.
from resources.utils import *

pp = pprint.PrettyPrinter(indent=2, width=80, sort_dicts=False, compact=True)
session = requests.Session()
auxip_client, cadip_client, stac_client, staging_client = init_demo()

if os.getenv("RSPY_LOCAL_MODE") == "1":
    href_cadip = "http://rs-server-cadip:8000"
    href_adgs = "http://rs-server-adgs:8000"
else:
    href_cadip = href_adgs = os.environ["RSPY_WEBSITE"]
    session.cookies.set ("session", os.environ["RSPY_OAUTH2_COOKIE"])

cadip_collection_id = "cadip_sentinel1" #"cadip_s1A"
adgs_collection_id = "adgs" #"aux_interval"
TIMEOUT = 10

Auxip service: http://rs-server-adgs:8000
CADIP service: http://rs-server-cadip:8000
Catalog service: http://rs-server-catalog:8000
Staging service: http://rs-server-staging:8000


In [29]:
# Create a test collection 
collection = create_test_collection()

### Check the newly created collection with the rs-server-catalog. It should be empty.

In [30]:
# Check the catalog for my_test_collection
collection = stac_client.get_collection(TEST_COLLECTION)
for item in collection.get_items():
    print(f"Item {item.id} has {len(item.assets)} assets")

### Getting all the sessions from the input collection found in the configuration of CADIP station

In [None]:
# TODO: CadipClient should inherit pystac_client.Client so we can use its wrapped functions
result = session.get(f"{href_cadip}/cadip/collections/{cadip_collection_id}/items")
items_collection_cadip = result.json()
assert items_collection_cadip.get("type") == "FeatureCollection"
assert len(items_collection_cadip.get("features")) > 0
for item in items_collection_cadip.get("features"):
    print(f"Session {item.get('id')} has {len(item.get('assets'))} assets with datetime {item.get('properties').get('datetime')}")

Session S1A_20231120061537234567 has 60 assets with datetime 2023-11-20T06:15:37.234000+00:00
Session S1A_20220715090550123456 has 40 assets with datetime 2022-07-15T09:05:50.123000+00:00
Session S1A_20210410031928012345 has 21 assets with datetime 2021-04-10T03:39:28.012000+00:00
Session S1A_20200105072204051312 has 60 assets with datetime 2020-01-05T18:52:26.165000+00:00


### Getting all the assets from the input collection found in the configuration of ADGS station

We will deliberately alter the value of a download link for one of the files, causing the staging process to fail at a certain point.

In [32]:
# TODO: AuxipClient should inherit pystac_client.Client so we can use its wrapped functions
result = session.get(f"{href_adgs}/auxip/collections/{adgs_collection_id}/items")

items_collection_adgs = result.json()
assert items_collection_adgs.get("type") == "FeatureCollection"
assert len(items_collection_adgs.get("features")) > 0

#pprint.PrettyPrinter(indent=4).pprint(items_collection_adgs)
for item in items_collection_adgs.get("features"):
    print(f"AUXIP asset {item.get('id')} has datetime {item.get('properties').get('datetime')}")

AUXIP asset S1A_OPER_AUX_RESORB_OPOD_20231218T110702_V20231218T071044_20231218T102814.EOF has datetime 2023-12-18T00:00:00Z
AUXIP asset S1A_OPER_AUX_RESORB_OPOD_20230405T110702_V20230405T071044_20230405T102814.EOF has datetime 2023-04-05T00:00:00Z
AUXIP asset S2__OPER_AUX_ECMWFD_PDMC_20230216T120000_V20190217T090000_20190217T210000.TGZ has datetime 2023-02-17T09:00:00Z
AUXIP asset S1A_OPER_AUX_PREORB_OPOD_20230122T062732_V20230122T062732_20230122T062732.EOF has datetime 2023-01-22T00:00:00Z
AUXIP asset S1A_OPER_MPL_ORBPRE_20210214T021411_20210221T021411_0001.EOF has datetime 2021-02-21T00:00:00Z
AUXIP asset S1A_OPER_AUX_OBMEMC_PDMC_20210123T000000.xml has datetime 2021-01-23T00:00:00Z
AUXIP asset S1A_OPER_AUX_PREORB_OPOD_20201002T062732_V20201002T053140_20201002T120640.EOF has datetime 2020-10-02T00:00:00Z
AUXIP asset S1A_OPER_MPL_ORBSCT_20200829T150704_99999999T999999_0025.EOF has datetime 2020-08-29T00:00:00Z
AUXIP asset S2__OPER_AUX_ECMWFD_PDMC_20200216T120000_V20190217T090000_20190

### Check the jobs table

In [33]:
import requests

post_response = staging_client.get_jobs()

jobs = json.loads(post_response.content)
pprint.PrettyPrinter(indent=4).pprint(jobs)
if jobs.get("numberMatched") > 0:
    delete_jobs = True
    if cluster_mode != "1":
        delete_jobs = input(f"There are {jobs.get('numberMatched')} jobs in the table. Do you want to delete them all (y/n)?").lower().strip() == 'y'
    if delete_jobs:
        print("Deleting all the jobs...")
        for job in jobs.get("jobs"):
            post_response = staging_client.delete_job(job.get('identifier'))
        # Check that the jobs have been deleted
        post_response = staging_client.get_jobs()
        jobs = json.loads(post_response.content)
        pprint.PrettyPrinter(indent=4).pprint(jobs)

{   'jobs': [   {   'created_at': '2024-12-20T10:58:49.022547',
                    'detail': 'Finished',
                    'identifier': '61865947-3dc3-422c-9e51-01e67b426421',
                    'progress': 100.0,
                    'status': 'FINISHED',
                    'updated_at': '2024-12-20T10:58:54.397269'},
                {   'created_at': '2024-12-20T10:21:43.453341',
                    'detail': 'Finished',
                    'identifier': '09db8072-05b5-4c8e-9dd3-32e23657abe5',
                    'progress': 100.0,
                    'status': 'FINISHED',
                    'updated_at': '2024-12-20T10:21:46.066332'},
                {   'created_at': '2024-12-20T10:21:43.320987',
                    'detail': 'Finished',
                    'identifier': 'e616a550-57df-4845-8b91-631effdb995a',
                    'progress': 100.0,
                    'status': 'FINISHED',
                    'updated_at': '2024-12-20T10:21:48.609662'},
                {   'c

There are 12 jobs in the table. Do you want to delete them all (y/n)? n


### Starting 2 staging processes, one from the CADIP station and one from the ADGS station
The staging process from the ADGS station is expected to fail because one of the assets contains an incorrect download link for the file.

In [34]:
response_list = []
for items in [items_collection_cadip, items_collection_adgs]:
    response_list.append(staging_client.run_staging(items, TEST_COLLECTION))

08:39:44.338 [[32mINFO[0m] (rs_client.rs_client) Staging job b1d1885d-3974-4d22-b2a7-19a9af0e2269 successfully launched !
08:39:44.560 [[32mINFO[0m] (rs_client.rs_client) Staging job dff9d530-b5aa-4edb-9e66-b847d5149ed1 successfully launched !


In [35]:
timeout = 120
for response in response_list:
    while timeout > 0:
        get_response = staging_client.get_job_info(response[1])
        try:
            resp = json.loads(get_response.content)
            pprint.PrettyPrinter(indent=4).pprint(resp)
            print("\n")
            if resp["status"] == "FINISHED":
                print("Job COMPLETED")
                break

            if resp["status"] == "FAILED":
                print("Job FAILED")
                break
        except (json.JSONDecodeError,):
            continue
        time.sleep(2)
        timeout -= 2

{   'created_at': '2025-01-07T08:39:44.293604',
    'detail': 'In progress',
    'identifier': 'b1d1885d-3974-4d22-b2a7-19a9af0e2269',
    'progress': 1.66,
    'status': 'IN_PROGRESS',
    'updated_at': '2025-01-07T08:39:44.749319'}


{   'created_at': '2025-01-07T08:39:44.293604',
    'detail': 'In progress',
    'identifier': 'b1d1885d-3974-4d22-b2a7-19a9af0e2269',
    'progress': 44.75,
    'status': 'IN_PROGRESS',
    'updated_at': '2025-01-07T08:39:46.782557'}


{   'created_at': '2025-01-07T08:39:44.293604',
    'detail': 'In progress',
    'identifier': 'b1d1885d-3974-4d22-b2a7-19a9af0e2269',
    'progress': 100.0,
    'status': 'IN_PROGRESS',
    'updated_at': '2025-01-07T08:39:47.540492'}


{   'created_at': '2025-01-07T08:39:44.293604',
    'detail': 'Finished',
    'identifier': 'b1d1885d-3974-4d22-b2a7-19a9af0e2269',
    'progress': 100.0,
    'status': 'FINISHED',
    'updated_at': '2025-01-07T08:39:49.700315'}


Job COMPLETED
{   'created_at': '2025-01-07T08:39:44.472602

### Check the catalog for my_test_collection. Four items should be present now

In [36]:
result = session.get(f"{stac_client.href_catalog}/catalog/collections/{user}:{TEST_COLLECTION}/items")
catalog_collection = result.json()
for item in catalog_collection.get("features"):
    print(f"Item {item.get('id')} has {len(item.get('assets'))} assets")

Item S1A_OPER_AUX_RESORB_OPOD_20231218T110702_V20231218T071044_20231218T102814.EOF has 1 assets
Item S1A_20231120061537234567 has 60 assets
Item S1A_OPER_AUX_RESORB_OPOD_20230405T110702_V20230405T071044_20230405T102814.EOF has 1 assets
Item S2__OPER_AUX_ECMWFD_PDMC_20230216T120000_V20190217T090000_20190217T210000.TGZ has 1 assets
Item S1A_OPER_AUX_PREORB_OPOD_20230122T062732_V20230122T062732_20230122T062732.EOF has 1 assets
Item S1A_20220715090550123456 has 40 assets
Item S1A_20210410031928012345 has 21 assets
Item S1A_OPER_MPL_ORBPRE_20210214T021411_20210221T021411_0001.EOF has 1 assets
Item S1A_OPER_AUX_OBMEMC_PDMC_20210123T000000.xml has 1 assets
Item S1A_OPER_AUX_PREORB_OPOD_20201002T062732_V20201002T053140_20201002T120640.EOF has 1 assets


In [37]:
result = stac_client.get_collection(TEST_COLLECTION)

In [41]:
# Check the catalog for my_test_collection
result = list(stac_client.get_collection(TEST_COLLECTION).get_items())
print (f"{len(result)} items before removing")
# for item in result:
#     print(f"Item {item.id} has {len(item.assets)} assets")

APIError: "Invalid endpoint."

### Check the jobs table

In [None]:
post_response = staging_client.get_jobs()
jobs = json.loads(post_response.content)

pprint.PrettyPrinter(indent=4).pprint(jobs)
print("\n\nSummary")

for job in jobs.get("jobs"):    
    post_response = staging_client.get_job_results(job["identifier"])
    print(f"{job['identifier']} status is {json.loads(post_response.content)}")

### Delete the whole collection

In [None]:
result = stac_client.remove_collection(TEST_COLLECTION)
assert result.json()["deleted collection"] == TEST_COLLECTION
pp.pprint(result.json())