Import the modules 

In [26]:
from pycalrissian.context import CalrissianContext
from pycalrissian.job import CalrissianJob
from pycalrissian.execution import CalrissianExecution
import base64
import os
import yaml
from kubernetes.client.models.v1_job import V1Job


In [8]:
os.environ["KUBECONFIG"] = "/home/mambauser/.kube/kubeconfig-t2-dev.yaml"


## Create the image pull secrets.

There's one for docker.hub and one for Gitlab container registry as the CWL description to run refers container images published on those two container registries

In [9]:
username = "<username>"
password = "<password or token>"
email = "<add email here>"
registry = "https://index.docker.io/v1/"

auth = base64.b64encode(f"{username}:{password}".encode("utf-8")).decode(
    "utf-8"
)

secret_config = {
    "auths": {
        registry: {
            "username": username,
            "password": password,
            "email": email,
            "auth": auth,
        },
        "registry.gitlab.com": {
            "auth": "<add a gitlab token>"
        },
    }
}

**Take away messages about image pull secrets**

* they're created as a dictionary with the same structure as your `~/.docker/config` file
* you can use the username/password pair or the auth string 

## Create the CalrissianContext object

The CalrissianContext creates a kubernetes namespace on the cluster.

Note: our kubernetes cluster uses the `longhorn` RWX storage class, adapt it to your cluster configuration

In [10]:
namespace_name = "job-namespace"

session = CalrissianContext(
            namespace=namespace_name,
            storage_class="longhorn",
            volume_size="10G",
            image_pull_secrets=secret_config,
)

Now trigger the `CalrissianContext` initialisation with:

In [11]:
session.initialise()

2022-07-21 09:55:40.116 | INFO     | pycalrissian.context:initialise:42 - create namespace job-namespace
2022-07-21 09:55:40.175 | INFO     | pycalrissian.context:create_namespace:236 - creating namespace job-namespace
2022-07-21 09:55:40.526 | INFO     | pycalrissian.context:create_namespace:250 - namespace job-namespace created
2022-07-21 09:55:40.527 | INFO     | pycalrissian.context:initialise:59 - create role pod-manager-role
2022-07-21 09:55:40.782 | INFO     | pycalrissian.context:initialise:68 - create role binding for role pod-manager-role
2022-07-21 09:55:40.868 | INFO     | pycalrissian.context:initialise:59 - create role log-reader-role
2022-07-21 09:55:40.981 | INFO     | pycalrissian.context:initialise:68 - create role binding for role log-reader-role
2022-07-21 09:55:41.063 | INFO     | pycalrissian.context:initialise:73 - create persistent volume claim 'calrissian-wdir' of 10G with storage class longhorn
2022-07-21 09:55:41.151 | INFO     | pycalrissian.context:initiali

## Read the CWL document

Now load a CWL document and create a dictionary with the parameters:


In [14]:
with open("../tests/app-s2-composites.0.1.0.cwl", "r") as stream:
    cwl = yaml.safe_load(stream)

params = {
    "post_stac_item": "https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2B_53HPA_20210723_0_L2A", # noqa: E501
    "pre_stac_item": "https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2B_53HPA_20210703_0_L2A", # noqa: E501
    "aoi": "136.659,-35.96,136.923,-35.791",
}

**Take away messages**

* The CWL description is loaded into a Python dictionary
* The parameters are a Python dictionary

So you can discover the CWL Workflow parameters with something like:

In [23]:
cwl["$graph"][0]["inputs"]

{'pre_stac_item': {'doc': 'Pre-event Sentinel-2 item', 'type': 'string'},
 'post_stac_item': {'doc': 'Post-event Sentinel-2 item', 'type': 'string'},
 'aoi': {'doc': 'area of interest as a bounding box', 'type': 'string'},
 'bands': {'type': 'string[]', 'default': ['B8A', 'B12', 'SCL']}}

## Create the `CalrissianJob`

In [35]:
job = CalrissianJob(
    cwl=cwl,
    params=params,
    runtime_context=session,
    cwl_entry_point="dnbr",
    max_cores=2,
    max_ram="4G",
)

2022-07-21 10:14:30.734 | INFO     | pycalrissian.job:__init__:59 - using default security context {'runAsUser': 0, 'runAsGroup': 0, 'fsGroup': 0}
2022-07-21 10:14:30.736 | INFO     | pycalrissian.job:__init__:70 - job name: job-1658398470736707-9dcb0c7c-d4ba-4e06-bc4a-242767d42aa2
2022-07-21 10:14:30.737 | INFO     | pycalrissian.job:__init__:71 - create CWL configMap
2022-07-21 10:14:33.259 | INFO     | pycalrissian.job:__init__:73 - create processing parameters configMap


The `CalrissianJob` object is constructed with:

* a CWL dictionary
* a parameters dictionaty
* a runtime context, a CalrissianContext object
* the maximum number of cores the pods can use 
* the maximum amount of RAM  the pods can use

The CalrissianJob can be serialized to a Kubernetes Job object:

In [36]:
isinstance(job.to_k8s_job(), V1Job)

True

Or to a Kubernetes Job manifest in YAML:

In [37]:
job.to_yaml("job.yml")

2022-07-21 10:14:36.672 | INFO     | pycalrissian.job:to_yaml:134 - job job-1658398470736707-9dcb0c7c-d4ba-4e06-bc4a-242767d42aa2 serialized to job.yml


At this stage, you could do `kubectl -n job-namespace apply -f job.yml` to submit the job on kubernetes

## Create the `CalrissianExecution` 


In [38]:
execution = CalrissianExecution(job=job, runtime_context=session)

Submit the job with:

In [39]:
execution.submit()

2022-07-21 10:14:44.069 | INFO     | pycalrissian.execution:submit:27 - submit job job-1658398470736707-9dcb0c7c-d4ba-4e06-bc4a-242767d42aa2
2022-07-21 10:14:45.360 | INFO     | pycalrissian.execution:submit:33 - job job-1658398470736707-9dcb0c7c-d4ba-4e06-bc4a-242767d42aa2 submitted


Monitor the execution with:

In [40]:
execution.monitor(interval=20)

2022-07-21 10:15:10.377 | INFO     | pycalrissian.execution:monitor:147 - job job-1658398470736707-9dcb0c7c-d4ba-4e06-bc4a-242767d42aa2 is active
2022-07-21 10:15:30.454 | INFO     | pycalrissian.execution:monitor:147 - job job-1658398470736707-9dcb0c7c-d4ba-4e06-bc4a-242767d42aa2 is active
2022-07-21 10:15:50.922 | INFO     | pycalrissian.execution:monitor:147 - job job-1658398470736707-9dcb0c7c-d4ba-4e06-bc4a-242767d42aa2 is active
2022-07-21 10:16:12.313 | INFO     | pycalrissian.execution:monitor:147 - job job-1658398470736707-9dcb0c7c-d4ba-4e06-bc4a-242767d42aa2 is active
2022-07-21 10:16:32.381 | INFO     | pycalrissian.execution:monitor:147 - job job-1658398470736707-9dcb0c7c-d4ba-4e06-bc4a-242767d42aa2 is active
2022-07-21 10:16:52.430 | INFO     | pycalrissian.execution:monitor:147 - job job-1658398470736707-9dcb0c7c-d4ba-4e06-bc4a-242767d42aa2 is active
2022-07-21 10:17:12.493 | INFO     | pycalrissian.execution:monitor:147 - job job-1658398470736707-9dcb0c7c-d4ba-4e06-bc4a-2

Get the execution log

In [41]:
log = execution.get_log()
print(log)

[1;30mINFO[0m calrissian 0.11.0-sprint1 (cwltool 3.1.20211004060744)
[1;30mINFO[0m Resolved '/workflow-input/workflow.cwl#dnbr' to 'file:///workflow-input/..2022_07_21_10_14_47.495866009/workflow.cwl#dnbr'
[1;30mINFO[0m [workflow ] starting step node_nbr
[1;30mINFO[0m [step node_nbr] start
[1;30mINFO[0m [workflow node_nbr] starting step node_stac_2
[1;30mINFO[0m [step node_stac_2] start
[1;30mINFO[0m [step node_stac_2] start
[1;30mINFO[0m [step node_stac_2] start
[1;30mINFO[0m [step node_nbr] start
[1;30mINFO[0m [workflow node_nbr_2] starting step node_stac_3
[1;30mINFO[0m [step node_stac_3] start
[1;30mINFO[0m [step node_stac_3] start
[1;30mINFO[0m [step node_stac_3] start
[1;30mINFO[0m [workflow ] start
[1;30mINFO[0m [workflow node_nbr] start
[1;30mINFO[0m [workflow node_nbr_2] start
[1;30mINFO[0m [step node_stac_2] completed success
[1;30mINFO[0m [workflow node_nbr] starting step node_subset
[1;30mINFO[0m [step node_subset] start
[1;30mINFO[0m

Get the usage report

In [43]:
usage = execution.get_usage_report()
usage

{'cores_allowed': 2.0,
 'ram_mb_allowed': 4000.0,
 'children': [{'cpus': 1.0,
   'ram_megabytes': 268.435456,
   'disk_megabytes': 0.0,
   'name': 'node_stac',
   'start_time': '2022-07-21T10:15:29+00:00',
   'finish_time': '2022-07-21T10:15:30+00:00',
   'elapsed_hours': 0.0002777777777777778,
   'elapsed_seconds': 1.0,
   'ram_megabyte_hours': 0.07456540444444444,
   'cpu_hours': 0.0002777777777777778},
  {'cpus': 1.0,
   'ram_megabytes': 268.435456,
   'disk_megabytes': 0.0,
   'name': 'node_stac_2',
   'start_time': '2022-07-21T10:15:33+00:00',
   'finish_time': '2022-07-21T10:15:34+00:00',
   'elapsed_hours': 0.0002777777777777778,
   'elapsed_seconds': 1.0,
   'ram_megabyte_hours': 0.07456540444444444,
   'cpu_hours': 0.0002777777777777778},
  {'cpus': 1.0,
   'ram_megabytes': 268.435456,
   'disk_megabytes': 0.0,
   'name': 'node_stac_3',
   'start_time': '2022-07-21T10:15:41+00:00',
   'finish_time': '2022-07-21T10:15:42+00:00',
   'elapsed_hours': 0.0002777777777777778,
   'el

Get the execution output

In [44]:
output = execution.get_output()
output

{'stac': {'location': 'file:///calrissian/3zt4a_7o',
  'basename': '3zt4a_7o',
  'class': 'Directory',
  'listing': [{'class': 'File',
    'location': 'file:///calrissian/3zt4a_7o/catalog.json',
    'basename': 'catalog.json',
    'checksum': 'sha1$a5d1d9821e889aa125778e4f2e14a788ff1512ce',
    'size': 225,
    'path': '/calrissian/3zt4a_7o/catalog.json'},
   {'class': 'File',
    'location': 'file:///calrissian/3zt4a_7o/dnbr.tif',
    'basename': 'dnbr.tif',
    'checksum': 'sha1$87a3dfee0d055453dad525e8edd8a216121d808c',
    'size': 1402218,
    'path': '/calrissian/3zt4a_7o/dnbr.tif'},
   {'class': 'File',
    'location': 'file:///calrissian/3zt4a_7o/dnbr-item.json',
    'basename': 'dnbr-item.json',
    'checksum': 'sha1$1c0a635ad501c599ab258019d05c7b276515c565',
    'size': 818,
    'path': '/calrissian/3zt4a_7o/dnbr-item.json'}],
  'path': '/calrissian/3zt4a_7o'}}

Get a few details about the execution

In [45]:
print(execution.get_start_time())
print(execution.get_completion_time())

2022-07-21 10:14:45+00:00
2022-07-21 10:18:16+00:00


In [46]:
print(f"complete {execution.is_complete()}")
print(f"succeeded {execution.is_succeeded()}")

complete True
succeeded True


Delete the Kubernetes namespace with:

In [47]:
session.dispose()

2022-07-21 10:20:18.825 | INFO     | pycalrissian.context:dispose:98 - delete pod job-1658397684816381-833c694b-c50b-49c8-be05-24323d12e0ab-hjvsq
2022-07-21 10:20:19.040 | INFO     | pycalrissian.context:dispose:98 - delete pod job-1658398470736707-9dcb0c7c-d4ba-4e06-bc4a-242767d42aa2-ql4wr
2022-07-21 10:20:19.873 | INFO     | pycalrissian.context:dispose:101 - dispose namespace job-namespace


{'api_version': 'v1',
 'code': None,
 'details': None,
 'kind': 'Namespace',
 'message': None,
 'metadata': {'_continue': None,
              'remaining_item_count': None,
              'resource_version': '13867261366',
              'self_link': None},
 'reason': None,
 'status': "{'phase': 'Terminating'}"}