## This demo showcases the implementation of user story 530

All initiated jobs for the staging process should be stored in a persistent PostgreSQL database. This ensures that even if the rs-server-staging pod is restarted, users can still check the status of all previously submitted jobs.

In [1]:
import requests
import os
import pprint
import time
# Init environment before running a demo notebook.
from resources.utils import *

pp = pprint.PrettyPrinter(indent=2, width=80, sort_dicts=False, compact=True)
session = requests.Session()
user = os.environ["JUPYTERHUB_USER"] if cluster_mode else os.environ["RSPY_HOST_USER"]
auxip_client, cadip_client, stac_client, staging_client = init_demo(owner_id = user)
if os.getenv("RSPY_LOCAL_MODE") == "1":
    href = "http://rs-server-cadip:8000"
    href_adgs = "http://rs-server-adgs:8000"
    href_staging = "http://rs-server-staging:8000"
else:
    href = href_adgs = os.environ["RSPY_WEBSITE"]
    href_staging = "https://rsserverstaging.dev-rspy.esa-copernicus.eu"
    session.cookies.set ("session", os.environ["RSPY_OAUTH2_COOKIE"])

cadip_collection_id = "cadip_sentinel1" #"cadip_s1A"
adgs_collection_id = "adgs" #"aux_interval"
TIMEOUT = 10

Auxip service: http://rs-server-adgs:8000
CADIP service: http://rs-server-cadip:8000
Catalog service: http://rs-server-catalog:8000
Staging service: http://rs-server-staging:8000


In [2]:
# Create a test collection 
collection = create_test_collection()

### Check the newly created collection with the rs-server-catalog. It should be empty.

In [3]:
# Check the catalog for agrosu_my_test_collection
result = session.get(f"{stac_client.href_catalog}/catalog/collections/{user}:{TEST_COLLECTION}/items")
catalog_collection = result.json()
assert catalog_collection.get("type") == "FeatureCollection"
print(f"Number of items found in the '{TEST_COLLECTION}' collection: {catalog_collection.get('context').get('returned')}")

Number of items found in the 'my_test_collection' collection: 0


### Getting all the sessions from the cadip_s1A collection found in the configuration of CADIP station

In [4]:
result = session.get(f"{href}/cadip/collections/{cadip_collection_id}/items")
items_collection = result.json()
assert items_collection.get("type") == "FeatureCollection"
assert len(items_collection.get("features")) > 0
for item in items_collection.get("features"):
    print(f"Session {item.get('id')} has {len(item.get('assets'))} assets with datetime {item.get('properties').get('datetime')}")

Session S1A_20231120061537234567 has 60 assets with datetime 2023-11-20T06:15:37.234000+00:00
Session S1A_20220715090550123456 has 40 assets with datetime 2022-07-15T09:05:50.123000+00:00
Session S1A_20210410031928012345 has 21 assets with datetime 2021-04-10T03:39:28.012000+00:00
Session S1A_20200105072204051312 has 60 assets with datetime 2020-01-05T18:52:26.165000+00:00


### Getting all the assets from the aux_interval collection found in the configuration of ADGS station

We will deliberately alter the value of a download link for one of the files, causing the staging process to fail at a certain point.

In [5]:
result = session.get(f"{href_adgs}/auxip/collections/{adgs_collection_id}/items")
items_collection_adgs = result.json()
assert items_collection_adgs.get("type") == "FeatureCollection"
assert len(items_collection_adgs.get("features")) > 0

#pprint.PrettyPrinter(indent=4).pprint(items_collection_adgs)
for item in items_collection_adgs.get("features"):
    print(f"AUXIP asset {item.get('id')} has datetime {item.get('properties').get('datetime')}")

AUXIP asset S1A_OPER_AUX_RESORB_OPOD_20231218T110702_V20231218T071044_20231218T102814.EOF has datetime 2023-12-18T00:00:00Z
AUXIP asset S1A_OPER_AUX_RESORB_OPOD_20230405T110702_V20230405T071044_20230405T102814.EOF has datetime 2023-04-05T00:00:00Z
AUXIP asset S2__OPER_AUX_ECMWFD_PDMC_20230216T120000_V20190217T090000_20190217T210000.TGZ has datetime 2023-02-17T09:00:00Z
AUXIP asset S1A_OPER_AUX_PREORB_OPOD_20230122T062732_V20230122T062732_20230122T062732.EOF has datetime 2023-01-22T00:00:00Z
AUXIP asset S1A_OPER_MPL_ORBPRE_20210214T021411_20210221T021411_0001.EOF has datetime 2021-02-21T00:00:00Z
AUXIP asset S1A_OPER_AUX_OBMEMC_PDMC_20210123T000000.xml has datetime 2021-01-23T00:00:00Z
AUXIP asset S1A_OPER_AUX_PREORB_OPOD_20201002T062732_V20201002T053140_20201002T120640.EOF has datetime 2020-10-02T00:00:00Z
AUXIP asset S1A_OPER_MPL_ORBSCT_20200829T150704_99999999T999999_0025.EOF has datetime 2020-08-29T00:00:00Z
AUXIP asset S2__OPER_AUX_ECMWFD_PDMC_20200216T120000_V20190217T090000_20190

### Check the jobs table

In [6]:
import requests
post_response = session.get(f"{href_staging}/jobs",                               
                              timeout = TIMEOUT,)

jobs = json.loads(post_response.content)
pprint.PrettyPrinter(indent=4).pprint(jobs)
if jobs.get("numberMatched") > 0:
    delete_jobs = True
    if os.getenv("RSPY_LOCAL_MODE") != "1":
        delete_jobs = input(f"There are {jobs.get('numberMatched')} jobs in the table. Do you want to delete them all (y/n)?").lower().strip() == 'y'
    if delete_jobs:
        print("Deleting all the jobs...")
        for job in jobs.get("jobs"):
            post_response = session.delete(f"{href_staging}/jobs/{job.get('identifier')}",                               
                                  timeout = TIMEOUT,)
        post_response = session.get(f"{href_staging}/jobs",                               
                                  timeout = TIMEOUT,)
    
        jobs = json.loads(post_response.content)
        pprint.PrettyPrinter(indent=4).pprint(jobs)

{   'jobs': [   {   'created_at': '2024-12-12T14:29:16.550683',
                    'detail': 'Finished',
                    'identifier': '3f37262d-106d-4356-b917-e679f1124bc8',
                    'progress': 100.0,
                    'status': 'FINISHED',
                    'updated_at': '2024-12-12T14:29:22.636073'},
                {   'created_at': '2024-12-12T14:29:16.722307',
                    'detail': 'Finished',
                    'identifier': '00b5ea86-eb01-4a74-99e8-721f599c54f0',
                    'progress': 100.0,
                    'status': 'FINISHED',
                    'updated_at': '2024-12-12T14:29:22.756080'}],
    'numberMatched': 2}
Deleting all the jobs...
{'jobs': [], 'numberMatched': 0}


### Starting 2 staging processes, one from the CADIP station and one from the ADGS station
The staging process from the ADGS station is expected to fail because one of the assets contains an incorrect download link for the file.

In [7]:
response_list = []
for items in [items_collection, items_collection_adgs]:
    response_list.append(staging_client.run_staging(items, TEST_COLLECTION))

14:42:22.711 [[32mINFO[0m] (rs_client.rs_client) href execute process vaut: http://rs-server-staging:8000/processes/staging/execution
14:42:22.782 [[32mINFO[0m] (rs_client.rs_client) Response vaut: {'status': {'started': '77940909-d088-4ce0-a8b2-4db0f1834fa0'}}
14:42:22.783 [[32mINFO[0m] (rs_client.rs_client) href execute process vaut: http://rs-server-staging:8000/processes/staging/execution


{'status': {'started': '77940909-d088-4ce0-a8b2-4db0f1834fa0'}}

Job ID = 77940909-d088-4ce0-a8b2-4db0f1834fa0



14:42:22.978 [[32mINFO[0m] (rs_client.rs_client) Response vaut: {'status': {'started': '5eefdf22-ffcd-43f3-bab2-b242660fb768'}}


{'status': {'started': '5eefdf22-ffcd-43f3-bab2-b242660fb768'}}

Job ID = 5eefdf22-ffcd-43f3-bab2-b242660fb768



In [8]:
timeout = 120
for response in response_list:
    while timeout > 0:
        get_response = staging_client.get_job_status(response["status"]["started"])
        try:
            resp = json.loads(get_response.content)
            pprint.PrettyPrinter(indent=4).pprint(resp)
            print("\n")
            if resp["status"] == "FINISHED":
                print("Job COMPLETED")
                break

            if resp["status"] == "FAILED":
                print("Job FAILED")
                break
        except (json.JSONDecodeError,):
            continue
        time.sleep(2)
        timeout -= 2

{   'created_at': '2024-12-12T14:42:22.748832',
    'detail': 'In progress',
    'identifier': '77940909-d088-4ce0-a8b2-4db0f1834fa0',
    'progress': 1.1,
    'status': 'IN_PROGRESS',
    'updated_at': '2024-12-12T14:42:23.251490'}


{   'created_at': '2024-12-12T14:42:22.748832',
    'detail': 'In progress',
    'identifier': '77940909-d088-4ce0-a8b2-4db0f1834fa0',
    'progress': 37.02,
    'status': 'IN_PROGRESS',
    'updated_at': '2024-12-12T14:42:25.346171'}


{   'created_at': '2024-12-12T14:42:22.748832',
    'detail': 'In progress',
    'identifier': '77940909-d088-4ce0-a8b2-4db0f1834fa0',
    'progress': 100.0,
    'status': 'IN_PROGRESS',
    'updated_at': '2024-12-12T14:42:26.408295'}


{   'created_at': '2024-12-12T14:42:22.748832',
    'detail': 'Finished',
    'identifier': '77940909-d088-4ce0-a8b2-4db0f1834fa0',
    'progress': 100.0,
    'status': 'FINISHED',
    'updated_at': '2024-12-12T14:42:28.862283'}


Job COMPLETED
{   'created_at': '2024-12-12T14:42:22.918459'

### Check the catalog for agrosu_my_test_collection. Four items should be present now

In [9]:
# Check the catalog for agrosu_my_test_collection
result = session.get(f"{stac_client.href_catalog}/catalog/collections/{TEST_COLLECTION}/items")
catalog_collection = result.json()
assert catalog_collection.get("type") == "FeatureCollection"
#assert len(catalog_collection.get("features")) == 4
for item in catalog_collection.get("features"):
    print(f"Item {item.get('id')} has {len(item.get('assets'))} assets")    

Item S1A_OPER_AUX_RESORB_OPOD_20231218T110702_V20231218T071044_20231218T102814.EOF has 1 assets
Item S1A_20231120061537234567 has 60 assets
Item S1A_OPER_AUX_RESORB_OPOD_20230405T110702_V20230405T071044_20230405T102814.EOF has 1 assets
Item S2__OPER_AUX_ECMWFD_PDMC_20230216T120000_V20190217T090000_20190217T210000.TGZ has 1 assets
Item S1A_OPER_AUX_PREORB_OPOD_20230122T062732_V20230122T062732_20230122T062732.EOF has 1 assets
Item S1A_20220715090550123456 has 40 assets
Item S1A_20210410031928012345 has 21 assets
Item S1A_OPER_MPL_ORBPRE_20210214T021411_20210221T021411_0001.EOF has 1 assets
Item S1A_OPER_AUX_OBMEMC_PDMC_20210123T000000.xml has 1 assets
Item S1A_OPER_AUX_PREORB_OPOD_20201002T062732_V20201002T053140_20201002T120640.EOF has 1 assets


### Check the jobs table

In [10]:
import requests
post_response = session.get(f"{href_staging}/jobs",                               
                              timeout = TIMEOUT,)

jobs = json.loads(post_response.content)
pprint.PrettyPrinter(indent=4).pprint(jobs)
print("\n\nSummary")
for job in jobs.get("jobs"):    
    post_response = session.get(f"{href_staging}/jobs/{job['identifier']}/results",
                                timeout = TIMEOUT,)
    print(f"{job['identifier']} status is {json.loads(post_response.content)}")

{   'jobs': [   {   'created_at': '2024-12-12T14:42:22.918459',
                    'detail': 'Finished',
                    'identifier': '5eefdf22-ffcd-43f3-bab2-b242660fb768',
                    'progress': 100.0,
                    'status': 'FINISHED',
                    'updated_at': '2024-12-12T14:42:27.691659'},
                {   'created_at': '2024-12-12T14:42:22.748832',
                    'detail': 'Finished',
                    'identifier': '77940909-d088-4ce0-a8b2-4db0f1834fa0',
                    'progress': 100.0,
                    'status': 'FINISHED',
                    'updated_at': '2024-12-12T14:42:28.862283'}],
    'numberMatched': 2}


Summary
5eefdf22-ffcd-43f3-bab2-b242660fb768 status is FINISHED
77940909-d088-4ce0-a8b2-4db0f1834fa0 status is FINISHED


### Delete the whole collection

In [11]:
result = session.delete(f"{stac_client.href_catalog}/catalog/collections/{TEST_COLLECTION}")
assert result.json()["deleted collection"] == TEST_COLLECTION
pp.pprint(result.json())

{'deleted collection': 'my_test_collection'}
