This demo is using the following implemented stories:
- RSPY-25
- RSPY-85
- RSPY-100
- RSPY-115

Install the needed libraries

In [1]:
!pip install boto3
!(cd $RSPY_WHL_DIR && pip install rs_server_libraries-*.whl )

Collecting boto3
  Downloading boto3-1.34.60-py3-none-any.whl.metadata (6.6 kB)
Collecting botocore<1.35.0,>=1.34.60 (from boto3)
  Downloading botocore-1.34.60-py3-none-any.whl.metadata (5.7 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3)
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.11.0,>=0.10.0 (from boto3)
  Downloading s3transfer-0.10.0-py3-none-any.whl.metadata (1.7 kB)
Downloading boto3-1.34.60-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading botocore-1.34.60-py3-none-any.whl (12.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Downloading s3transfer-0.10.0-py3-none-any.whl (82 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 k

In [2]:
# We'll use boto3 to monitor the s3 bucket. 
# Note: the S3_ACCESSKEY, S3_SECRETKEY and S3_ENDPOINT are given in the docker-compose.yml file.
import boto3
import os

s3_session = boto3.session.Session()
s3_client = s3_session.client(
    service_name="s3",
    aws_access_key_id=os.environ["S3_ACCESSKEY"],
    aws_secret_access_key=os.environ["S3_SECRETKEY"],
    endpoint_url=os.environ["S3_ENDPOINT"],
    region_name=os.environ["S3_REGION"],
)
bucket_name = "tmp-download"
final_bucket_name = "final-catalog"
bucket_dir = "stations"
bucket_url = f"s3://{bucket_name}/{bucket_dir}"

# If bucket is already created, clear all files in order to start fresh for each demo. 
if bucket_name in [bucket["Name"] for bucket in s3_client.list_buckets()["Buckets"]]:
    if 'Contents' in s3_client.list_objects(Bucket=bucket_name):
        objects = s3_client.list_objects(Bucket=bucket_name)['Contents']
        for obj in objects:
            # clear up the bucket
            s3_client.delete_object(Bucket=bucket_name, Key=obj['Key'])
else:
    s3_client.create_bucket(Bucket=bucket_name)

# create the final bucket
s3_client.create_bucket(Bucket=final_bucket_name)

print(f"{bucket_name} empty ?: ", 'Contents' not in s3_client.list_objects(Bucket=bucket_name))
print(f"{final_bucket_name} empty ?: ", 'Contents' not in s3_client.list_objects(Bucket=final_bucket_name))

tmp-download empty ?:  True
final-catalog empty ?:  True


A bucket "tmp-download" is created for the purpose of this demo. Thus, the cadip and adgs prefect flows will be asking for the rs-server endpoints to download the files from CADIP and ADGS stations and to upload them to "s3://tmp-download/stations/<station_name>"
After a succesfull upload to s3 bucket, the update stac catalog endpoint is called to update the catalog and to copy the file to the "final-catalog" bucket.

In [3]:
from dataclasses import dataclass
import requests

@dataclass
class Collection:
    """A collection for test purpose."""

    user: str
    name: str

    @property
    def id_(self) -> str:
        """Returns the id."""
        return f"{self.user}_{self.name}"

    @property
    def properties(self):
        """Returns the properties."""
        return {
            "id": self.name,
            "type": "Collection",
            "links": [
                {
                    "rel": "items",
                    "type": "application/geo+json",
                    "href": f"http://localhost:8082/collections/{self.name}/items",
                },
                {"rel": "parent", "type": "application/json", "href": "http://localhost:8082/"},
                {"rel": "root", "type": "application/json", "href": "http://localhost:8082/"},
                {
                    "rel": "self",
                    "type": "application/json",
                    "href": f"""http://localhost:8082/collections/{self.name}""",
                },
                {
                    "rel": "license",
                    "href": "https://creativecommons.org/licenses/publicdomain/",
                    "title": "public domain",
                },
            ],
            "extent": {
                "spatial": {"bbox": [[-94.6911621, 37.0332547, -94.402771, 37.1077651]]},
                "temporal": {"interval": [["2000-02-01T00:00:00Z", "2000-02-12T00:00:00Z"]]},
            },
            "license": "public-domain",
            "description": "Some description",
            "stac_version": "1.0.0",
        }
    
user = "DemoUser"
mission = "s1"
url_catalog = "http://rs-server-catalog:8000"

# Create the collection for DemoUser
collection_type = Collection(user, f"{mission}_aux")
response = requests.post(url_catalog + f"/catalog/{user}/collections", json=collection_type.properties)

In [5]:


from datetime import datetime

from rs_workflows.common import (
    PrefectFlowConfig,
    download_flow,
)
    
def run_flow(user, url, url_catalog, station, mission, tmp_local_download, bucket_url, no_of_tasks, start_date, stop_date):
    # start the prefect flow
    download_flow(PrefectFlowConfig(user,
                                    url,
                                    url_catalog,
                                    station,
                                    mission,
                                    tmp_local_download,
                                    bucket_url,
                                    no_of_tasks,
                                    datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ"),
                                    datetime.strptime(stop_date, "%Y-%m-%dT%H:%M:%SZ"),                                   
            )
)


stations = ["CADIP", "ADGS"]
url = "http://rs-server-{}:8000"

tmp_local_download = "/tmp/{}_tmp"
no_of_tasks = 1



for station in stations:
    run_flow(user,
             url.format(station.lower()),
             url_catalog,
             station,
             mission,
             tmp_local_download.format(station),
             bucket_url + f"/{station}",
             no_of_tasks,
             "2014-01-01T12:00:00Z",
             "2024-02-20T12:00:00Z",
             )    
    

22:12:19.603 | [36mINFO[0m    | Task run 'ingest_files-0' - The download progress for file S2__OPER_AUX_ECMWFD_PDMC_20230216T120000_V20190217T090000_20190217T210000.TGZ is IN_PROGRESS
22:12:20.624 | [36mINFO[0m    | Task run 'ingest_files-0' - File S2__OPER_AUX_ECMWFD_PDMC_20230216T120000_V20190217T090000_20190217T210000.TGZ has been properly downloaded...

22:12:20.919 | [36mINFO[0m    | Task run 'ingest_files-0' - The download progress for file S2__OPER_AUX_ECMWFD_PDMC_20200216T120000_V20190217T090000_20190217T210000.TGZ is IN_PROGRESS
22:12:21.938 | [36mINFO[0m    | Task run 'ingest_files-0' - File S2__OPER_AUX_ECMWFD_PDMC_20200216T120000_V20190217T090000_20190217T210000.TGZ has been properly downloaded...

22:12:22.228 | [36mINFO[0m    | Task run 'ingest_files-0' - The download progress for file S2__OPER_AUX_ECMWFD_PDMC_20190216T120000_V20190217T090000_20190217T210000.TGZ is IN_PROGRESS
22:12:23.247 | [36mINFO[0m    | Task run 'ingest_files-0' - File S2__OPER_AUX_ECMWFD

b'{"type":"Feature","stac_version":"1.0.0","stac_extensions":["https://stac-extensions.github.io/file/v2.1.0/schema.json","https://stac-extensions.github.io/alternate-assets/v1.1.0/schema.json"],"id":"S2__OPER_AUX_ECMWFD_PDMC_20230216T120000_V20190217T090000_20190217T210000.TGZ","geometry":{"type":"Polygon","coordinates":[[[180,-90],[180,90],[-180,90],[-180,-90],[180,-90]]]},"properties":{"adgs:id":"id3","datetime":"2023-02-16T12:00:00.000Z","start_datetime":"2023-02-17T09:00:00.000Z","end_datetime":"2023-02-17T21:00:00.000Z","owner":"DemoUser"},"links":[{"rel":"collection","type":"application/json","href":"http://rs-server-catalog:8000/collections/DemoUser_s1_aux"},{"rel":"parent","type":"application/json","href":"http://rs-server-catalog:8000/collections/DemoUser_s1_aux"},{"rel":"root","type":"application/json","href":"http://rs-server-catalog:8000/"},{"rel":"self","type":"application/geo+json","href":"http://rs-server-catalog:8000/collections/DemoUser_s1_aux/items/S2__OPER_AUX_ECMWF

D_PDMC_20230216T120000_V20190217T090000_20190217T210000.TGZ"}],"assets":{"file":{"file:size":8326253,"href":"https://rs-server/catalog/DemoUser/collections/s1_aux/items/ADGSS2__OPER_AUX_ECMWFD_PDMC_20230216T120000_V20190217T090000_20190217T210000.TGZ/download/file","alternate":{"s3":{"href":"s3://final-catalog/stations/ADGSS2__OPER_AUX_ECMWFD_PDMC_20230216T120000_V20190217T090000_20190217T210000.TGZ"}}}},"collection":"DemoUser_s1_aux"}'
b'{"type":"Feature","stac_version":"1.0.0","stac_extensions":["https://stac-extensions.github.io/file/v2.1.0/schema.json","https://stac-extensions.github.io/alternate-assets/v1.1.0/schema.json"],"id":"S2__OPER_AUX_ECMWFD_PDMC_20200216T120000_V20190217T090000_20190217T210000.TGZ","geometry":{"type":"Polygon","coordinates":[[[180,-90],[180,90],[-180,90],[-180,-90],[180,-90]]]},"properties":{"adgs:id":"id2","datetime":"2020-02-16T12:00:00.000Z","start_datetime":"2020-02-17T09:00:00.000Z","end_datetime":"2020-02-17T21:00:00.000Z","owner":"DemoUser"},"links"