# Resource Registration BB - Harvester Demo

In this demo we will use the Flowable [REST API](https://registration-harvester-api.develop.eoepca.org/flowable-rest/docs) to deploy and execute workflows.

In [None]:
from requests import Session
from requests.auth import HTTPBasicAuth
import json
import requests
import os

# Setup connection to Flowable API
flowable_base_url = "https://registration-harvester-api.develop.eoepca.org/flowable-rest"
flowable_rest_user = "eoepca"
flowable_rest_pw = "eoepca"
flowable_session = Session()
flowable_session.auth = HTTPBasicAuth(flowable_rest_user, flowable_rest_pw)

## Workflow Management
### Deployments

List current deployments

In [None]:
url = f"{flowable_base_url}/service/repository/deployments"
print(f"GET {url}")
response = flowable_session.get(url)
deployments = response.json()["data"]
if len(deployments) == 0:
    print("No workflows deployed")
else:
    for idx, deployment in enumerate(deployments, 1):
        print("%-2s %-25s deployed at: %-5s " % (idx, deployment['name'], deployment['deploymentTime']))
        #print(f"{idx} {deployment['name']} deployed at: {deployment['deploymentTime']}")

Deploy the Landsat workflow

In [None]:
landsat_bpmn_files = [
    "https://raw.githubusercontent.com/EOEPCA/registration-harvester/refs/heads/main/workflows/landsat.bpmn", 
    "https://raw.githubusercontent.com/EOEPCA/registration-harvester/refs/heads/main/workflows/landsat-scene-ingestion.bpmn"
]
for bpmn in landsat_bpmn_files:
    bpmn_file = {os.path.basename(bpmn): requests.get(bpmn).text}
    response = flowable_session.post(f"{flowable_base_url}/service/repository/deployments", files=bpmn_file)
    print(f"Sucessfully deployed workflow: {response.json()["url"]}")

Deploy the Sentinel workflow

In [None]:
sentinel_bpmn_files = [
    "https://raw.githubusercontent.com/EOEPCA/registration-harvester/refs/heads/main/workflows/sentinel.bpmn", 
    "https://raw.githubusercontent.com/EOEPCA/registration-harvester/refs/heads/main/workflows/sentinel-scene-ingestion.bpmn"
]
for bpmn in sentinel_bpmn_files:
    bpmn_file = {os.path.basename(bpmn): requests.get(bpmn).text}
    response = flowable_session.post(f"{flowable_base_url}/service/repository/deployments", files=bpmn_file)
    print(f"Sucessfully deployed workflow: {response.json()["url"]}")

Delete workflows

In [None]:
for deployment in deployments:
    deployment_id = deployment['id']
    url = f"{flowable_base_url}/service/repository/deployments/{deployment_id}"
    response = flowable_session.delete(url)
    print(f"Delete {deployment['name']} Status: {response.status_code}")

### Workflow definitions (process definitions)
List deployed workflows (process definitions)

In [None]:
url = f"{flowable_base_url}/service/repository/process-definitions"
print(f"GET {url}")
response = flowable_session.get(url)
processes = response.json()["data"]
if len(processes) == 0:
    print("No workflow definitions")
else:
    for idx, process in enumerate(processes, 1):
        print("%-2s %-28s version: %-5s id: %s" % (idx, process['name'], process['version'], process['id']))
        if process["name"] == "Landsat Workflow":
            landsat_process_id = process["id"]
        if process["name"] == "Sentinel Registration":
            sentinel_process_id = process["id"]        

### Running workflows (process instances)
List running workflows (processe instances)

In [None]:
url = f"{flowable_base_url}/service/runtime/process-instances"
print(f"GET {url}")
response = flowable_session.get(url)
process_instances = response.json()["data"]
if len(process_instances) == 0:
    print("No running workflows")
else:
    for idx, process in enumerate(process_instances, 1):
        print("%s %-25s started at: %-25s id: %s" % (idx, process['processDefinitionName'], process['startTime'], process['id']))

### Execute Landsat workflow

The Landsat harvesting workflow consists of two BPMN processes. The main process (Landsat Registration) searches for new data at USGS. For each new scene found, the workflow executes another process (Landsat Scene Ingestion) which performs the individual steps for harvesting and registering the data.

![Landsat workflow](img/landsat-workflow-bpmn.png)

![Landsat Scene Ingstion](img/landsat-scene-ingestion.png)

Define the filter to start the workflow with

In [None]:
query = json.dumps({ "created": { "gte": "2024-12-13T15:00:00.000000Z", "lt": "2024-12-13T16:00:00.000000Z" } })
variables = [
    # {"name": "datetime_interval", "type": "string", "value": "2024-11-12T15:00:00.000000Z/2024-11-12T16:00:00.000000Z"},    
    {"name": "collections", "type": "string", "value": "landsat-c2l2-sr"},
    {"name": "bbox", "type": "string", "value": "8,40,18,60"},
    {"name": "query", "type": "string", "value": query},
]

# Create the JSON body for HTTP request which triggers the workflow
body = {}
body["processDefinitionId"] = landsat_process_id
body["variables"] = variables
print(json.dumps(body, indent=4))

Send the HTTP request to Flowable REST API to start the workflow

In [None]:
response = flowable_session.post(url=f"{flowable_base_url}/service/runtime/process-instances", json=body)
print(response.status_code)
print(f"Created process instance at {response.json()["url"]}")

### Execute Sentinel workflow

The Sentinel harvesting workflow consists of two BPMN processes. The main process (Sentinel Registration Hourly) will be executed automatically be the Flowable engine every hour and searches for new data at CDSE. For each new scene discovered, the workflow  executes another process (Sentinel Scene Ingestion) which performs the individual steps for harvesting and registering the data.

![Sentinel Workflow](img/sentinel-workflow-bpmn.png)

![Sentinel Scene Ingstion](img/sentinel-scene-ingestion.png)

Define the filter to start the workflow with

In [None]:
start_time = "2025-01-06T00:00:00.000000Z"
stop_time = "2025-01-08T00:00:00.000000Z"
datetime = f"ContentDate/Start ge {start_time} and ContentDate/Start lt {stop_time}"
collection = "startswith(Name,'S2') and contains(Name,'L2A') and not contains(Name,'_N9999')"
spatial = "intersects(area=geography'SRID=4326;POLYGON((3 55, 3 47, 18 47, 18 55, 3 55))')"
online = "Online eq true"
cloudcover = "Attributes/OData.CSC.DoubleAttribute/any(att:att/Name eq 'cloudCover' and att/OData.CSC.DoubleAttribute/Value le 10)"
odata_filter = f"({datetime}) and ({collection}) and ({spatial}) and ({online}) and ({cloudcover})"

# Create the JSON body for HTTP request which triggers the workflow
body = {}
body["processDefinitionId"] = sentinel_process_id
body["variables"] = [
    {
        "name": "filter",
        "type": "string",
        "value": odata_filter,
    }
]
print(json.dumps(body, indent=4))

Send the HTTP request to Flowable REST API to start the workflow

In [None]:
response = flowable_session.post(url=f"{flowable_base_url}/service/runtime/process-instances", json=body)
print(response.status_code)
print(f"Created process instance at {response.json()["url"]}")