# ADGS endpoints demo

In this demo we will call the rs-server ADGS HTTP endpoints:

  * List available ADGS products
  * Download some products into local storage and S3 bucket
  * Monitor the download status from the database.

In [None]:
# Define some variables
endpoint="http://rs-server-adgs:8000/adgs/aux" # rs-server host = the container name
datetime="2014-01-01T12:00:00Z/2023-12-30T12:00:00Z"

# Define a reusable function

In [None]:
# From a terminal, to list the available ADGS products, we would use the curl command:
!set -x && curl -X GET "{endpoint}/search?datetime={datetime}" -H "accept: application/json"

---
**NOTE**

You can also call the HTTP endpoints from the Swagger UI: http://localhost:8001/docs

---

In [None]:
# But let's do it in python so it's easier to parse results
import requests
import pprint 

# Call the "search" endpoint
print (f"Call: '{endpoint}/search' with: datetime={datetime!r}")
payload = {
    "datetime":datetime,    
}
data = requests.get(f"{endpoint}/search", payload)
assert data.status_code == 200

# Get the returned products as (id,name) lists
products = data.json()["features"]
assert len(products) == 3

# Print the first n products
print ("Result:")
pprint.PrettyPrinter(indent=4).pprint(products[:3])
print("...")


In [None]:
# We can also take one product only
# Call the "search" endpoint
print (f"Call: '{endpoint}/search' with: datetime={datetime!r}&limit=1")
payload = {
    "datetime": datetime,   
    "limit": 1, 
}
data = requests.get(f"{endpoint}/search", payload)
assert data.status_code == 200

# Get the returned products as (id,name) lists
products = data.json()["features"]
assert len(products) == 1

# Print the result
print ("Result:")
pprint.PrettyPrinter(indent=4).pprint(products)
print("...")


In [None]:
# We can sort by datetime or id, either descending or ascending
# Call the "search" endpoint with a sortby set in descending order for datetime
print (f"Call: '{endpoint}/search' with: datetime={datetime!r}&sortby=-adgs:datetime")
payload = {
    "datetime": datetime,   
    "sortby": "-adgs:datetime", 
}
data = requests.get(f"{endpoint}/search", payload)
assert data.status_code == 200

# Get the returned products as (id,name) lists
products = data.json()["features"]
assert len(products) == 3

# Print the result
print ("Result:")
pprint.PrettyPrinter(indent=4).pprint(products)
print("...")

# Call the "search" endpoint with a sortby set in ascending order for datetime
print (f"Call: '{endpoint}/search' with: datetime={datetime!r}&sortby=+adgs:datetime")
payload = {
    "datetime": datetime,   
    "sortby": "+adgs:datetime", 
}
data = requests.get(f"{endpoint}/search", payload)
assert data.status_code == 200

# Get the returned products as (id,name) lists
products = data.json()["features"]
assert len(products) == 3

# Print the result
print ("Result:")
pprint.PrettyPrinter(indent=4).pprint(products)
print("...")

# Keep only the names
product_names = []
for product in products:
    product_names.append(product["id"])

In [None]:
# The "search" endpoint has initialised the database with the products info.
# Call the "status" endpoint to get the info from the products name.
all_status = []
print (f"Call: '{endpoint}/status' with: name='...'")
for name in product_names:
    data = requests.get(f"{endpoint}/status", {"name": name})
    assert data.status_code == 200
    all_status.append (data.json())

# Print the first n status
print ("Result:")
pprint.PrettyPrinter(indent=4).pprint(all_status[:2])
print("...")

---
**NOTE**

You can also monitor the database using pgAdmin.

---

In [None]:
# We'll use boto3 to monitor the s3 bucket.
# Note: the S3_ACCESSKEY, S3_SECRETKEY and S3_ENDPOINT are given in the docker-compose.yml file.
!pip install boto3
import boto3
import os

s3_session = boto3.session.Session()
s3_client = s3_session.client(
    service_name="s3",
    aws_access_key_id=os.environ["S3_ACCESSKEY"],
    aws_secret_access_key=os.environ["S3_SECRETKEY"],
    endpoint_url=os.environ["S3_ENDPOINT"],
    region_name=os.environ["S3_REGION"],
)

In [None]:
# S3 bucket name and sub-directories
bucket_name = "demo-bucket"
bucket_dir = "adgs/data"

# Full bucket name + subdirs
bucket_url = f"s3://{bucket_name}/{bucket_dir}"

# The local download directory is passed as an environment variable
from pathlib import Path
local_download_dir = Path (os.environ["RSPY_WORKING_DIR"]) / bucket_dir

# Clean existing files
def clean_existing():

    # If the s3 bucket already exist, remove the existing products from it
    if bucket_name in [bucket["Name"] for bucket in s3_client.list_buckets()["Buckets"]]:
        for name in product_names:
            s3_client.delete_object(Bucket=bucket_name, Key=f"{bucket_dir}/{name}")
    
    # Else create the bucket
    else:
        s3_client.create_bucket(Bucket=bucket_name)
    
    # Create it if missing
    local_download_dir.mkdir(parents=True, exist_ok=True)
    
    # Remove all local files if they exist
    for name in product_names:
        file = local_download_dir / name
        if file.is_file():
            file.unlink()

import time

# Check that the files were downloaded locally
def check_existing_local():
    
    # Wait 1 second before that or sometimes it bugs.
    time.sleep(1)
    for name in product_names:
        file = Path (local_download_dir) / name    
        if not file.is_file():
            raise RuntimeError (f"{file} is missing locally")
        print (f"{file} exists")

# Check that the files were uploaded into the S3 bucket.
# This time the local files are not kept.
def check_existing_s3():
    time.sleep(1)
    try:
        all_s3_files = [key["Key"] for key in s3_client.list_objects(Bucket=bucket_name)['Contents']]
    except KeyError:
        all_s3_files = []
    for name in product_names:
        bucket_file = f"{bucket_dir}/{name}"
        if not bucket_file in all_s3_files:
            raise RuntimeError (f"s3://{bucket_name}/{bucket_file} is missing from the S3 bucket")
        print (f"s3://{bucket_name}/{bucket_file} exists")

---
**NOTE**

You can also monitor the s3 bucket using the minio console: http://127.0.0.1:9001/browser with:

  * Username: _minio_
  * Password: _Strong#Pass#1234_

---

In [None]:
import asyncio
from typing import Callable

print (f"Call: '{endpoint}' with: name='...' local={local_download_dir!r} obs={bucket_url!r}")

# Call the ADGS endpoint to download one product in background 
# and upload it (optional) to the S3 bucket.
async def download_one(name: str, save_to_s3: bool):

    params = {"name": name, "local": local_download_dir}
    if save_to_s3:
        params["obs"] = bucket_url

    data = requests.get(endpoint, params)
    assert data.status_code == 200

# Download everything in parallel
async def download_all(save_to_s3: bool, download_one: Callable=download_one):
    async with asyncio.TaskGroup() as group:
        for name in product_names:
            group.create_task(download_one (name, save_to_s3))

    #
    # In the meantime, call the "status" endpoint to get and print the download status.
    #

    all_done = False
    while not all_done: 

        # Count the number of products not started, in progres etc ...
        all_status = {"NOT_STARTED": 0, "IN_PROGRESS": 0, "FAILED": 0, "DONE": 0}
        for name in product_names:
            
            # Call the "status" endpoint
            data = requests.get(f"{endpoint}/status", {"name": name})
            assert data.status_code == 200
            all_status[(data.json())["status"]] += 1

        # Print result
        print (" / ".join ([f"{status}:{count}" for status, count in all_status.items()]))

        if (all_status["DONE"] + all_status["FAILED"]) >= len(product_names):
            all_done = True
        else:
            time.sleep(1)

clean_existing()

print ("Download everything to the local directory, not s3:")
await (download_all(save_to_s3=False))

check_existing_local()

print ("\nDownload everything again, but this time upload to S3:")
await (download_all(save_to_s3=True))

check_existing_s3()

In [None]:
# Do the same with prefect
!pip install prefect

In [None]:
from prefect import flow, task

@task
async def download_one_with_prefect(name: str, save_to_s3: bool):
    return await download_one(name, save_to_s3)

@flow(name="download adgs products")
async def download_all_with_prefect(save_to_s3: bool):
    return await download_all(save_to_s3, download_one_with_prefect)

clean_existing()

print ("[Prefect] Download everything to the local directory, not s3:")
await (download_all_with_prefect(save_to_s3=False))

check_existing_local()

print ("\n[Prefect] Download everything again, but this time upload to S3:")
await (download_all_with_prefect(save_to_s3=True))

check_existing_s3()

---
**NOTE**

Open the Prefect dashboard: http://127.0.0.1:4200

---

In [None]:
from datetime import datetime

dt_format = "%Y-%m-%dT%H:%M:%S.%f" # %z

# Check timeliness by substracting download stop date - publishing date.
# Call the "status" endpoint.
print ("Timeliness for:")
for name in product_names:    
    data = requests.get(f"{endpoint}/status", {"name": name})
    assert data.status_code == 200
    values = data.json()
    publication = datetime.strptime (values["available_at_station"], dt_format)
    stop = datetime.strptime (values["download_stop"], dt_format)
    timeliness = stop - publication
    print (f"  - {name}: {timeliness}")