# CADU endpoints demo

In this demo we will call the rs-server CADU HTTP endpoints:

  * List available CADU products
  * Download some products into local storage and S3 bucket
  * Monitor the download status from the database.

In [8]:
# Define some variables
endpoint="http://rs-server-cadip:8000/cadip/CADIP/cadu" # rs-server host = the container name
start="2014-01-01T12:00:00.000Z"
stop="2023-12-30T12:00:00.000Z"

In [9]:
# From a terminal, to list the available CADU products, we would use the curl command:
!set -x && curl -X GET "{endpoint}/search?start_date={start}&stop_date={stop}" -H "accept: application/json"

+ curl -X GET 'http://rs-server-cadip:8000/cadip/CADIP/cadu/search?start_date=2014-01-01T12:00:00.000Z&stop_date=2023-12-30T12:00:00.000Z' -H 'accept: application/json'
{"CADIP":[["2b17b57d-fff4-4645-b539-91f305c27c69","DCS_04_S1A_20231121072204051312_ch1_DSDB_00001.raw"],["2b17b57d-fff4-4645-b539-91f305c27c60","DCS_04_S1A_20231121072204051312_ch1_DSDB_00002.raw"],["2b17b57d-fff4-4645-b539-91f305c27c61","DCS_04_S1A_20231121072204051312_ch1_DSDB_00003.raw"],["2b17b57d-fff4-4645-b539-91f305c27c62","DCS_04_S1A_20231121072204051312_ch1_DSDB_00004.raw"],["2b17b57d-fff4-4645-b539-91f305c27c63","DCS_04_S1A_20231121072204051312_ch1_DSDB_00005.raw"],["2b17b57d-fff4-4645-b539-91f305c27c64","DCS_04_S1A_20231121072204051312_ch1_DSDB_00006.raw"],["2b17b57d-fff4-4645-b539-91f305c27c65","DCS_04_S1A_20231121072204051312_ch1_DSDB_00007.raw"],["some_id_2","DCS_04_S1A_20231121072204051312_ch1_DSDB_00060.raw"],["some_id_3","DCS_04_S1A_20231121072204051312_ch2_DSDB_00046.raw"],["some_id_4","DCS_04_S1A_2023

---
**NOTE**

You can also call the HTTP endpoints from the Swagger UI: http://localhost:8002/docs

---

In [10]:
# But let's do it in python so it's easier to parse results
import requests
import pprint 

# Call the "list" endpoint
print (f"Call: '{endpoint}/search' with: start_date={start!r} stop_date={stop!r}")
data = requests.get(f"{endpoint}/search", {"start_date": start, "stop_date": stop})
assert data.status_code == 200

# Get the returned products as (id,name) lists
products = data.json()["CADIP"]
assert len(products) == 10

# Print the first n products
print ("Result:")
pprint.PrettyPrinter(indent=4).pprint(products[:3])
print("...")

# Keep only the names
product_names = [name for id, name in products]

Call: 'http://rs-server-cadip:8000/cadip/CADIP/cadu/search' with: start_date='2014-01-01T12:00:00.000Z' stop_date='2023-12-30T12:00:00.000Z'
Result:
[   [   '2b17b57d-fff4-4645-b539-91f305c27c69',
        'DCS_04_S1A_20231121072204051312_ch1_DSDB_00001.raw'],
    [   '2b17b57d-fff4-4645-b539-91f305c27c60',
        'DCS_04_S1A_20231121072204051312_ch1_DSDB_00002.raw'],
    [   '2b17b57d-fff4-4645-b539-91f305c27c61',
        'DCS_04_S1A_20231121072204051312_ch1_DSDB_00003.raw']]
...


In [11]:
# The "list" endpoint has initialised the database with the products info.
# Call the "status" endpoint to get the info from the products name.
all_status = []
print (f"Call: '{endpoint}/status' with: name='...'")
for name in product_names:
    data = requests.get(f"{endpoint}/status", {"name": name})
    assert data.status_code == 200
    all_status.append (data.json())

# Print the first n status
print ("Result:")
pprint.PrettyPrinter(indent=4).pprint(all_status[:2])
print("...")

Call: 'http://rs-server-cadip:8000/cadip/CADIP/cadu/status' with: name='...'
Result:
[   {   'available_at_station': '2023-11-26T17:01:39.528000',
        'db_id': 1,
        'download_start': '2024-01-25T14:27:39.069202',
        'download_stop': '2024-01-25T14:27:39.638256',
        'name': 'DCS_04_S1A_20231121072204051312_ch1_DSDB_00001.raw',
        'product_id': '2b17b57d-fff4-4645-b539-91f305c27c69',
        'status': 'DONE',
        'status_fail_message': None},
    {   'available_at_station': '2023-11-26T17:01:39.528000',
        'db_id': 2,
        'download_start': '2024-01-25T14:27:39.175633',
        'download_stop': '2024-01-25T14:27:39.639406',
        'name': 'DCS_04_S1A_20231121072204051312_ch1_DSDB_00002.raw',
        'product_id': '2b17b57d-fff4-4645-b539-91f305c27c60',
        'status': 'DONE',
        'status_fail_message': None}]
...


---
**NOTE**

You can also monitor the database using pgAdmin.

---

In [12]:
# We'll use boto3 to monitor the s3 bucket.
# Note: the S3_ACCESSKEY, S3_SECRETKEY and S3_ENDPOINT are given in the docker-compose.yml file.
!pip install boto3
import boto3
import os

s3_session = boto3.session.Session()
s3_client = s3_session.client(
    service_name="s3",
    aws_access_key_id=os.environ["S3_ACCESSKEY"],
    aws_secret_access_key=os.environ["S3_SECRETKEY"],
    endpoint_url=os.environ["S3_ENDPOINT"],
)



In [17]:
# S3 bucket name and dir
bucket_name = "test-data"

# If the s3 bucket already exist, remove the existing products from it
if bucket_name in [bucket["Name"] for bucket in s3_client.list_buckets()["Buckets"]]:
    for name in product_names:
        s3_client.delete_object(Bucket=bucket_name, Key=name)

# Else create the bucket
else:
    s3_client.create_bucket(Bucket=bucket_name)

# The local download directory is passed as an environment variable
from pathlib import Path
local_download_dir = Path (os.environ["RSPY_WORKING_DIR"])# + "/cadip/data")

# Create it if missing
local_download_dir.mkdir(parents=True, exist_ok=True)

# Remove all local files if they exist
for name in product_names:
    file = local_download_dir / name
    if file.is_file():
        file.unlink()

---
**NOTE**

You can also monitor the s3 bucket using the minio console: http://127.0.0.1:9001/browser with:

  * Username: _minio_
  * Password: _Strong#Pass#1234_

---

In [18]:
import asyncio

# Full bucket name and subdirs
bucket_path = f"s3://{bucket_name}"
print (f"Call: '{endpoint}' with: name='...' local={local_download_dir!r} obs={bucket_path!r}")

# Call the CADIP endpoint to download one product in background 
# and upload it (optional) to the S3 bucket.
async def download_one(name: str, save_to_s3: bool):

    params = {"name": name, "local": local_download_dir}
    # obs = the bucket URL, if requested
    if save_to_s3:
        params["obs"] = bucket_path

    data = requests.get(endpoint, params)
    assert data.status_code == 200

# In parallel, call the "status" endpoint to get and print the download status.
async def print_status():

    # Wait a second if the staus need to be passed 
    # from DONE to NOT_STARTED if we download several times.
    await asyncio.sleep(1)

    all_done = False
    while not all_done: 

        # Count the number of products not started, in progres etc ...
        all_status = {"NOT_STARTED": 0, "IN_PROGRESS": 0, "FAILED": 0, "DONE": 0}
        for name in product_names:
            
            # Call the "status" endpoint
            data = requests.get(f"{endpoint}/status", {"name": name})
            assert data.status_code == 200
            all_status[(data.json())["status"]] += 1

        # Print result
        print (" / ".join ([f"{status}:{count}" for status, count in all_status.items()]))

        if all_status["DONE"] == len(product_names):
            all_done = True
        else:
            await asyncio.sleep(1)

# Call everything in parallel
async def download_all(save_to_s3: bool):
    async with asyncio.TaskGroup() as group:
        group.create_task (print_status())
        for name in product_names:
            group.create_task(download_one (name, save_to_s3))

print ("Download everything to the local directory, not s3:")
await (download_all(save_to_s3=False))

# Check that the local files exist. 
# Wait 1 second before that or sometimes it bugs.
await asyncio.sleep(1)
for name in product_names:
    file = Path (local_download_dir) / name    
    if not file.is_file():
        raise RuntimeException (f"{file} is missing locally")
    print (f"{file} exists")

print ("\nDownload everything again, but this time upload to S3:")
await (download_all(save_to_s3=True))

# This time the local files are not kept locally, 
# but they should be uploaded into the S3 bucket.
await asyncio.sleep(1)
all_s3_filenames = [key["Key"] for key in s3_client.list_objects(Bucket=bucket_name)['Contents']]
for name in product_names:    
    if not name in all_s3_filenames:
        raise RuntimeException (f"{file} is missing from the S3 bucket")
    print (f"s3://{bucket_name}/{name} exists")

Call: 'http://rs-server-cadip:8000/cadip/CADIP/cadu' with: name='...' local=PosixPath('/rspy/working/dir') obs='s3://test-data'
Download everything to the local directory, not s3:
NOT_STARTED:0 / IN_PROGRESS:1 / FAILED:0 / DONE:9
NOT_STARTED:0 / IN_PROGRESS:0 / FAILED:0 / DONE:10
/rspy/working/dir/DCS_04_S1A_20231121072204051312_ch1_DSDB_00001.raw exists
/rspy/working/dir/DCS_04_S1A_20231121072204051312_ch1_DSDB_00002.raw exists
/rspy/working/dir/DCS_04_S1A_20231121072204051312_ch1_DSDB_00003.raw exists
/rspy/working/dir/DCS_04_S1A_20231121072204051312_ch1_DSDB_00004.raw exists
/rspy/working/dir/DCS_04_S1A_20231121072204051312_ch1_DSDB_00005.raw exists
/rspy/working/dir/DCS_04_S1A_20231121072204051312_ch1_DSDB_00006.raw exists
/rspy/working/dir/DCS_04_S1A_20231121072204051312_ch1_DSDB_00007.raw exists
/rspy/working/dir/DCS_04_S1A_20231121072204051312_ch1_DSDB_00060.raw exists
/rspy/working/dir/DCS_04_S1A_20231121072204051312_ch2_DSDB_00046.raw exists
/rspy/working/dir/DCS_04_S1A_202311

KeyError: 'Contents'

In [19]:
from datetime import datetime

dt_format = "%Y-%m-%dT%H:%M:%S.%f" # %z

# Check timeliness by substracting download stop date - publishing date.
# Call the "status" endpoint.
print ("Timeliness for:")
for name in product_names:    
    data = requests.get(f"{endpoint}/status", {"name": name})
    assert data.status_code == 200
    values = data.json()
    publication = datetime.strptime (values["available_at_station"], dt_format)
    stop = datetime.strptime (values["download_stop"], dt_format)
    timeliness = stop - publication
    print (f"  - {name}: {timeliness}")

Timeliness for:
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00001.raw: 59 days, 21:37:42.834707
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00002.raw: 59 days, 21:37:42.948852
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00003.raw: 59 days, 21:37:43.071069
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00004.raw: 59 days, 21:37:43.185491
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00005.raw: 59 days, 21:37:43.317537
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00006.raw: 59 days, 21:37:43.449508
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00007.raw: 59 days, 21:37:43.589832
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00060.raw: 59 days, 21:37:43.724688
  - DCS_04_S1A_20231121072204051312_ch2_DSDB_00046.raw: 59 days, 23:19:01.908785
  - DCS_04_S1A_20231121072204051312_ch2_DSDB_00060.raw: 59 days, 23:14:04.149800
