# CADU endpoints demo

In this demo we will call the rs-server CADU HTTP endpoints:

  * List available CADU products
  * Download some products into local storage and S3 bucket
  * Monitor the download status from the database.

In [3]:
# Define some variables
endpoint="http://rs-server:8000/cadip/CADIP/cadu" # rs-server host = the container name
start="2014-01-01T12:00:00.000Z"
stop="2023-12-30T12:00:00.000Z"

In [4]:
# From a terminal, to list the available CADU products, we would use the curl command:
!set -x && curl -X GET "{endpoint}/list?start_date={start}&stop_date={stop}" -H "accept: application/json"

+ curl -X GET 'http://rs-server:8000/cadip/CADIP/cadu/list?start_date=2014-01-01T12:00:00.000Z&stop_date=2023-12-30T12:00:00.000Z' -H 'accept: application/json'
{"CADIP":[["2b17b57d-fff4-4645-b539-91f305c27c69","DCS_04_S1A_20231121072204051312_ch1_DSDB_00001.raw"],["2b17b57d-fff4-4645-b539-91f305c27c60","DCS_04_S1A_20231121072204051312_ch1_DSDB_00002.raw"],["2b17b57d-fff4-4645-b539-91f305c27c61","DCS_04_S1A_20231121072204051312_ch1_DSDB_00003.raw"],["2b17b57d-fff4-4645-b539-91f305c27c62","DCS_04_S1A_20231121072204051312_ch1_DSDB_00004.raw"],["2b17b57d-fff4-4645-b539-91f305c27c63","DCS_04_S1A_20231121072204051312_ch1_DSDB_00005.raw"],["2b17b57d-fff4-4645-b539-91f305c27c64","DCS_04_S1A_20231121072204051312_ch1_DSDB_00006.raw"],["2b17b57d-fff4-4645-b539-91f305c27c65","DCS_04_S1A_20231121072204051312_ch1_DSDB_00007.raw"],["some_id_2","DCS_04_S1A_20231121072204051312_ch1_DSDB_00060.raw"],["some_id_3","DCS_04_S1A_20231121072204051312_ch2_DSDB_00046.raw"],["some_id_4","DCS_04_S1A_202311210722

In [5]:
# But let's do it in python so it's easier to parse results
import requests
import pprint 

# Call the "list" endpoint
data = requests.get(f"{endpoint}/list", {"start_date": start, "stop_date": stop})
assert data.status_code == 200

# Get the returned products as (id,name) lists
products = data.json()["CADIP"]
assert len(products) == 10

# Print the first n products
pprint.PrettyPrinter(indent=4).pprint(products[:3])
print("...")

# Keep only the names
product_names = [name for id, name in products]

[   [   '2b17b57d-fff4-4645-b539-91f305c27c69',
        'DCS_04_S1A_20231121072204051312_ch1_DSDB_00001.raw'],
    [   '2b17b57d-fff4-4645-b539-91f305c27c60',
        'DCS_04_S1A_20231121072204051312_ch1_DSDB_00002.raw'],
    [   '2b17b57d-fff4-4645-b539-91f305c27c61',
        'DCS_04_S1A_20231121072204051312_ch1_DSDB_00003.raw']]
...


In [6]:
# The "list" endpoint has initialised the database with the products info.
# Call the "status" endpoint to get the info from the products name.
all_status = []
for name in product_names:
    data = requests.get(f"{endpoint}/status", {"name": name})
    assert data.status_code == 200
    all_status.append (data.json())

# Print the first n status
pprint.PrettyPrinter(indent=4).pprint(all_status[:2])
print("...")

[   {   'available_at_station': '2023-11-26T17:01:39.528000',
        'cadu_id': '2b17b57d-fff4-4645-b539-91f305c27c69',
        'db_id': 1,
        'download_start': None,
        'download_stop': None,
        'name': 'DCS_04_S1A_20231121072204051312_ch1_DSDB_00001.raw',
        'status': 'NOT_STARTED',
        'status_fail_message': None},
    {   'available_at_station': '2023-11-26T17:01:39.528000',
        'cadu_id': '2b17b57d-fff4-4645-b539-91f305c27c60',
        'db_id': 2,
        'download_start': None,
        'download_stop': None,
        'name': 'DCS_04_S1A_20231121072204051312_ch1_DSDB_00002.raw',
        'status': 'NOT_STARTED',
        'status_fail_message': None}]
...


---
**NOTE**

You can also monitor the database using pgAdmin.

---

In [7]:
# We'll use boto3 to monitor the s3 bucket.
# Note: the S3_ACCESSKEY, S3_SECRETKEY and S3_ENDPOINT are given in the docker-compose.yml file.
!pip install boto3
import boto3
import os

s3_session = boto3.session.Session()
s3_client = s3_session.client(
    service_name="s3",
    aws_access_key_id=os.environ["S3_ACCESSKEY"],
    aws_secret_access_key=os.environ["S3_SECRETKEY"],
    endpoint_url=os.environ["S3_ENDPOINT"],
)

Collecting boto3
  Downloading boto3-1.34.25-py3-none-any.whl.metadata (6.6 kB)
Collecting botocore<1.35.0,>=1.34.25 (from boto3)
  Downloading botocore-1.34.25-py3-none-any.whl.metadata (5.7 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3)
  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Collecting s3transfer<0.11.0,>=0.10.0 (from boto3)
  Downloading s3transfer-0.10.0-py3-none-any.whl.metadata (1.7 kB)
Downloading boto3-1.34.25-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading botocore-1.34.25-py3-none-any.whl (11.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.9/11.9 MB[0m [31m54.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading s3transfer-0.10.0-py3-none-any.whl (82 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages

In [8]:
# S3 bucket name
bucket_name = "test-data"

# If the s3 bucket already exist, remove the existing products from it
if bucket_name in [bucket["Name"] for bucket in s3_client.list_buckets()["Buckets"]]:
    for name in product_names:
        s3_client.delete_object(Bucket=bucket_name, Key=name)

# Else create the bucket
else:
    s3_client.create_bucket(Bucket=bucket_name)

# The local download directory is passed as an environment variable
rspy_working_dir = os.environ["RSPY_WORKING_DIR"]

# Remove all local files if they exist
from pathlib import Path
for name in product_names:
    file = Path (rspy_working_dir) / name
    if file.is_file():
        file.unlink()

In [9]:
import asyncio

# Call the CADIP endpoint to download one product in background 
# and upload it (optional) to the S3 bucket.
async def download_one(name: str, save_to_s3: bool):

    params = {"name": name, "local": rspy_working_dir}
    # obs = the bucket URL, if requested
    if save_to_s3:
        params["obs"] = f"s3://{bucket_name}"

    data = requests.get(endpoint, params)
    assert data.status_code == 200

# In parallel, call the "status" endpoint to get and print the download status.
async def print_status():

    # Wait a second if the staus need to be passed 
    # from DONE to NOT_STARTED if we download several times.
    await asyncio.sleep(1)

    all_done = False
    while not all_done: 

        # Count the number of products not started, in progres etc ...
        all_status = {"NOT_STARTED": 0, "IN_PROGRESS": 0, "FAILED": 0, "DONE": 0}
        for name in product_names:
            
            # Call the "status" endpoint
            data = requests.get(f"{endpoint}/status", {"name": name})
            assert data.status_code == 200
            all_status[(data.json())["status"]] += 1

        # Print result
        print (" / ".join ([f"{status}:{count}" for status, count in all_status.items()]))

        if all_status["DONE"] == len(product_names):
            all_done = True
        else:
            await asyncio.sleep(1)

# Call everything in parallel
async def download_all(save_to_s3: bool):
    async with asyncio.TaskGroup() as group:
        group.create_task (print_status())
        for name in product_names:
            group.create_task(download_one (name, save_to_s3))

print ("Download everything to the local directory, not s3:")
await (download_all(save_to_s3=False))

# Check that the local files exist. 
# Wait 1 second before that or sometimes it bugs.
await asyncio.sleep(1)
for name in product_names:
    file = Path (local_download_dir) / name    
    if not file.is_file():
        raise RuntimeException (f"{file} is missing locally")
    print (f"{file} exists")

print ("\nDownload everything again, but this time upload to S3:")
await (download_all(save_to_s3=True))

# This time the local files are not kept locally, 
# but they should be uploaded into the S3 bucket.
await asyncio.sleep(1)
all_s3_filenames = [key["Key"] for key in s3_client.list_objects(Bucket=bucket_name)['Contents']]
for name in product_names:    
    if not name in all_s3_filenames:
        raise RuntimeException (f"{file} is missing from the S3 bucket")
    print (f"s3://{bucket_name}/{name} exists")

Download everything to the local directory, not s3:
NOT_STARTED:0 / IN_PROGRESS:1 / FAILED:0 / DONE:9
NOT_STARTED:0 / IN_PROGRESS:0 / FAILED:0 / DONE:10
/local/download/DCS_04_S1A_20231121072204051312_ch1_DSDB_00001.raw exists
/local/download/DCS_04_S1A_20231121072204051312_ch1_DSDB_00002.raw exists
/local/download/DCS_04_S1A_20231121072204051312_ch1_DSDB_00003.raw exists
/local/download/DCS_04_S1A_20231121072204051312_ch1_DSDB_00004.raw exists
/local/download/DCS_04_S1A_20231121072204051312_ch1_DSDB_00005.raw exists
/local/download/DCS_04_S1A_20231121072204051312_ch1_DSDB_00006.raw exists
/local/download/DCS_04_S1A_20231121072204051312_ch1_DSDB_00007.raw exists
/local/download/DCS_04_S1A_20231121072204051312_ch1_DSDB_00060.raw exists
/local/download/DCS_04_S1A_20231121072204051312_ch2_DSDB_00046.raw exists
/local/download/DCS_04_S1A_20231121072204051312_ch2_DSDB_00060.raw exists

Download everything again, but this time upload to S3:
NOT_STARTED:0 / IN_PROGRESS:1 / FAILED:0 / DONE:9
N

---
**NOTE**

You can also monitor the s3 bucket using the minio console: http://127.0.0.1:9001/browser with:

  * Username: _minio_
  * Password: _Strong#Pass#1234_

---

In [8]:
from datetime import datetime

dt_format = "%Y-%m-%dT%H:%M:%S.%f" # %z

# Check timeliness by substracting download stop date - publishing date.
# Call the "status" endpoint.
print ("Timeliness for:")
for name in product_names:    
    data = requests.get(f"{endpoint}/status", {"name": name})
    assert data.status_code == 200
    values = data.json()
    publication = datetime.strptime (values["available_at_station"], dt_format)
    stop = datetime.strptime (values["download_stop"], dt_format)
    timeliness = stop - publication
    print (f"  - {name}: {timeliness}")

Timeliness for:
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00001.raw: 56 days, 15:36:08.625266
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00002.raw: 56 days, 15:36:08.621716
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00003.raw: 56 days, 15:36:08.628110
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00004.raw: 56 days, 15:36:08.624966
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00005.raw: 56 days, 15:36:08.743524
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00006.raw: 56 days, 15:36:08.897286
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00007.raw: 56 days, 15:36:09.043408
  - DCS_04_S1A_20231121072204051312_ch1_DSDB_00060.raw: 56 days, 15:36:09.199569
  - DCS_04_S1A_20231121072204051312_ch2_DSDB_00046.raw: 56 days, 17:17:27.394199
  - DCS_04_S1A_20231121072204051312_ch2_DSDB_00060.raw: 56 days, 17:12:29.649524
