This demo uses the following implemented stories:
- RSPY-25
- RSPY-85
- RSPY-100
- RSPY-115
- RSPY-139

## Configuration

In [1]:
# Set local or cluster configuration
import os

if os.getenv("RSPY_LOCAL_MODE") == "1":
    url_catalog = "http://rs-server-catalog:8000"
    HEADERS={}
    local_mode = True
else:
    url_catalog = os.environ["RSPY_WEBSITE"]
    HEADERS={"headers": {"x-api-key": os.environ["RSPY_APIKEY"]}}
    local_mode = False

print(f"Using: {url_catalog}")

import requests
import json

Using: https://dev-rspy.esa-copernicus.eu


Install the needed libraries

In [2]:
!pip install boto3
if local_mode:
    !(cd $RSPY_WHL_DIR && pip install rs_client_libraries-*.whl )


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
# We'll use boto3 to monitor the s3 bucket. 
# Note: the S3_ACCESSKEY, S3_SECRETKEY and S3_ENDPOINT are given in the docker-compose.yml file.
import boto3
import os

s3_session = boto3.session.Session()
s3_client = s3_session.client(
    service_name="s3",
    aws_access_key_id=os.environ["S3_ACCESSKEY"],
    aws_secret_access_key=os.environ["S3_SECRETKEY"],
    endpoint_url=os.environ["S3_ENDPOINT"],
    region_name=os.environ["S3_REGION"],
)

buckets = ["rs-cluster-temp", "rs-cluster-catalog"] # bucket names under S3_ENDPOINT
bucket_dir = "stations"
bucket_url = f"s3://{buckets[0]}/{bucket_dir}"

# Only in local mode
if local_mode:
    
    # If the bucket is already created, clear all files to start fresh for each demo. 
    for b in buckets:
        if b in [bucket["Name"] for bucket in s3_client.list_buckets()["Buckets"]]:
            if 'Contents' in s3_client.list_objects(Bucket=b):
                objects = s3_client.list_objects(Bucket=b)['Contents']
                for obj in objects:
                    # clear up the bucket
                    s3_client.delete_object(Bucket=b, Key=obj['Key'])
        else:
            s3_client.create_bucket(Bucket=b)
    for b in buckets:
        print(f"Is {b} empty ?: ", 'Contents' not in s3_client.list_objects(Bucket=b))
    
    # Truncate the items table from catalog also if this is not the first run !
    #docker exec -it catalog-db psql -U postgres -d catalog -c "TRUNCATE items"

The bucket "rs-cluster-temp" is used for this demo. Thus, the cadip and adgs prefect flows will be asking for the rs-server endpoints to download the files from the CADIP and ADGS stations and to upload them to "s3://rs-cluster-temp/stations/".
After a successful upload to s3 bucket, the update stac catalog service is called to update the catalog and to transfer the files from the temp bucket to the "rs-cluster-catalog" bucket.

In [4]:
"# Clean previous executions\n",
requests.delete(f"{url_catalog}/catalog/collections/DemoUser:s1_aux", **HEADERS)

<Response [200]>

In [5]:
# Create the user's collection first (this has to be done on client side)

from dataclasses import dataclass
import requests

@dataclass
class Collection:
    """A collection for test purpose."""

    user: str
    name: str

    @property
    def id_(self) -> str:
        """Returns the id."""
        return f"{self.user}_{self.name}"

    @property
    def properties(self):
        """Returns the properties."""
        return {
            "id": self.name,
            "type": "Collection",
            "links": [
                {
                    "rel": "items",
                    "type": "application/geo+json",
                    "href": f"http://localhost:8082/collections/{self.name}/items",
                },
                {"rel": "parent", "type": "application/json", "href": "http://localhost:8082/"},
                {"rel": "root", "type": "application/json", "href": "http://localhost:8082/"},
                {
                    "rel": "self",
                    "type": "application/json",
                    "href": f"""http://localhost:8082/collections/{self.name}""",
                },
                {
                    "rel": "license",
                    "href": "https://creativecommons.org/licenses/publicdomain/",
                    "title": "public domain",
                },
            ],
            "extent": {
                "spatial": {"bbox": [[-94.6911621, 37.0332547, -94.402771, 37.1077651]]},
                "temporal": {"interval": [["2000-02-01T00:00:00Z", "2000-02-12T00:00:00Z"]]},
            },
            "license": "public-domain",
            "description": "Some description",
            "stac_version": "1.0.0",
            "owner": user,
        }
    
user = "DemoUser"
mission = "s1"

# Create the collection for DemoUser
collection_type = Collection(user, f"{mission}_aux")
response = requests.post(url_catalog + f"/catalog/collections", json=collection_type.properties, **HEADERS)
response.raise_for_status()

collection = json.loads(response.content)
collection

{'id': 's1_aux',
 'type': 'Collection',
 'links': [{'rel': 'items',
   'type': 'application/geo+json',
   'href': 'https://dev-rspy.esa-copernicus.eu/catalog/DemoUser/collections/s1_aux/items'},
  {'rel': 'parent',
   'type': 'application/json',
   'href': 'https://dev-rspy.esa-copernicus.eu/catalog/DemoUser'},
  {'rel': 'root',
   'type': 'application/json',
   'href': 'https://dev-rspy.esa-copernicus.eu/catalog/DemoUser'},
  {'rel': 'self',
   'type': 'application/json',
   'href': 'https://dev-rspy.esa-copernicus.eu/catalog/DemoUser/collections/s1_aux'},
  {'rel': 'items',
   'type': 'application/geo+json',
   'href': 'http://localhost:8082/collections/s1_aux/items'},
  {'rel': 'license',
   'href': 'https://creativecommons.org/licenses/publicdomain/',
   'title': 'public domain'}],
 'extent': {'spatial': {'bbox': [[-94.6911621,
     37.0332547,
     -94.402771,
     37.1077651]]},
  'temporal': {'interval': [['2000-02-01T00:00:00Z',
     '2000-02-12T00:00:00Z']]}},
 'license': 'pub

In [6]:


from datetime import datetime

from rs_workflows.common import (
    PrefectFlowConfig,
    download_flow,
)

def run_flow(user, url, url_catalog, station, mission, tmp_local_download, bucket_url, api_key, no_of_tasks, start_date, stop_date):
    # start the prefect flow
    download_flow(PrefectFlowConfig(user,
                                    url,
                                    url_catalog,
                                    station,
                                    mission,
                                    tmp_local_download,
                                    bucket_url,
                                    api_key,
                                    no_of_tasks,
                                    datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ"),
                                    datetime.strptime(stop_date, "%Y-%m-%dT%H:%M:%SZ"),                                   
            )
)


stations = ["CADIP", "ADGS"]
url = os.environ["RSPY_WEBSITE"]

tmp_local_download = "/tmp/{}_tmp"
no_of_tasks = 4

for station in stations:
    run_flow(user,
             url.format(station.lower()),
             url_catalog,
             station,
             mission,
             tmp_local_download.format(station),
             bucket_url + f"/{station}",
             os.environ.get("RSPY_APIKEY", None),
             no_of_tasks,
             "2014-01-01T12:00:00Z",
             "2024-02-20T12:00:00Z",
             )    
    

12:21:36.494 | [36mINFO[0m    | Task run 'ingest_files-0' - The download progress for file DCS_04_S1A_20231121072204051312_ch1_DSDB_00001.raw is IN_PROGRESS
12:21:37.266 | [36mINFO[0m    | Task run 'ingest_files-1' - The download progress for file DCS_04_S1A_20231121072204051312_ch1_DSDB_00004.raw is IN_PROGRESS
12:21:38.189 | [36mINFO[0m    | Task run 'ingest_files-0' - File DCS_04_S1A_20231121072204051312_ch1_DSDB_00001.raw has been properly downloaded...
12:21:38.214 | [36mINFO[0m    | Task run 'ingest_files-2' - The download progress for file DCS_04_S1A_20231121072204051312_ch1_DSDB_00007.raw is IN_PROGRESS
12:21:38.579 | [36mINFO[0m    | Task run 'ingest_files-1' - File DCS_04_S1A_20231121072204051312_ch1_DSDB_00004.raw has been properly downloaded...
12:21:38.908 | [36mINFO[0m    | Task run 'ingest_files-3' - The download progress for file DCS_04_S1A_20231121072204051312_ch2_DSDB_00046.raw is IN_PROGRESS
12:21:38.965 | [36mINFO[0m    | Task run 'ingest_files-0' - Fi

12:21:54.890 | [36mINFO[0m    | Task run 'ingest_files-2' - The download progress for file S2__OPER_AUX_ECMWFD_PDMC_20190216T120000_V20190217T090000_20190217T210000.TGZ is IN_PROGRESS
12:21:55.580 | [36mINFO[0m    | Task run 'ingest_files-1' - The download progress for file S2__OPER_AUX_ECMWFD_PDMC_20200216T120000_V20190217T090000_20190217T210000.TGZ is IN_PROGRESS
12:21:56.001 | [36mINFO[0m    | Task run 'ingest_files-0' - The download progress for file S2__OPER_AUX_ECMWFD_PDMC_20230216T120000_V20190217T090000_20190217T210000.TGZ is IN_PROGRESS
12:21:56.120 | [36mINFO[0m    | Task run 'ingest_files-2' - File S2__OPER_AUX_ECMWFD_PDMC_20190216T120000_V20190217T090000_20190217T210000.TGZ has been properly downloaded...
12:21:56.686 | [36mINFO[0m    | Task run 'ingest_files-2' - File well published: S2__OPER_AUX_ECMWFD_PDMC_20190216T120000_V20190217T090000_20190217T210000.TGZ

12:21:56.731 | [36mINFO[0m    | Task run 'ingest_files-2' - Finished in state [32mCompleted[0m()
12

In [7]:
import json
catalog_data = json.loads((requests.get(url_catalog.rstrip("/") + f"/catalog/collections/{user}:{mission}_aux/items?limit=20", **HEADERS).content.decode()))

for feature in catalog_data['features']:
    print(requests.get(url_catalog.rstrip("/") + f"/catalog/collections/{user}:{mission}_aux/items/{feature['id']}/download/file", **HEADERS).content)

NameError: name 'apikey_headers' is not defined