In [1]:
from pprint import pprint

import requests
import yaml

import argo_workflows
from argo_workflows.api import workflow_service_api
from argo_workflows.model.io_argoproj_workflow_v1alpha1_workflow_create_request import (
    IoArgoprojWorkflowV1alpha1WorkflowCreateRequest,
)

configuration = argo_workflows.Configuration(host="https://127.0.0.1:2746")
configuration.verify_ssl = False

In [5]:
api_client = argo_workflows.ApiClient(configuration)
api_instance = workflow_service_api.WorkflowServiceApi(api_client)

# get all workflows
workflows = api_instance.list_workflows(namespace="argo")
[workflow['metadata']['name'] for workflow in workflows['items']]

first_workflow = api_instance.get_workflow(namespace="argo",name="set-a-coin-from-template-copy-8k59q")



# Workflow objects

We have a closer look at the data structure of the `Workflow` CRD as returned by the `argo` SDKs `WorkflowServiceApi.list_workflows` / `WorkflowServiceApi.get_workflow` methods/APIs.

Understanding these structures will helps us build the required meta and status data retrievers for the frontend, and also during the devleopment of the `bettmensch.ai.pipelines` SDK.

In [7]:

# len(workflows.items) # 2
# type(workflows.items[1]) # argo_workflows.model.io_argoproj_workflow_v1alpha1_workflow.IoArgoprojWorkflowV1alpha1Workflow object instance - see here for details: https://github.com/argoproj/argo-workflows/blob/main/sdks/python/client/argo_workflows/model/io_argoproj_workflow_v1alpha1_workflow.py
#first_workflow = workflows.items[1].to_dict()
first_workflow.keys() # > dict_keys(['metadata', 'spec', 'status']).

dict_keys(['metadata', 'spec', 'status'])

## `.metadata`

The section holding all the identifier's at the `Workflow` level, and some progress information in the `.metadata.labels` keys 
- `workflows.argoproj.io/completed` and 
- `workflows.argoproj.io/phase`, 

respectively. Apparently both of those (and some others) are actively being managed by the `workflow-controller`, implying real time behaviour.

In [8]:
# metadata is the Workflow manifest `metadata` field and so contains the workflow_metadata specs inherited from the WorkflowTemplate with other ArgoWorkflow server additions.
first_workflow['metadata'].keys() # > dict_keys(['name', 'generate_name', 'namespace', 'uid', 'resource_version', 'generation', 'creation_timestamp', 'labels', 'annotations', 'managed_fields'])
(
    first_workflow['metadata']['name'], 
    first_workflow['metadata']['generate_name'], 
    first_workflow['metadata']['namespace'], 
    first_workflow['metadata']['uid'], 
    first_workflow['metadata']['resource_version'], 
    first_workflow['metadata']['generation'], 
    first_workflow['metadata']['creation_timestamp'], 
    first_workflow['metadata']['labels'],
    first_workflow['metadata']['annotations'],
    first_workflow['metadata']['managed_fields']
)
# > ('set-a-coin-from-template-copy-8k59q',
#  'set-a-coin-from-template-copy-',
#  'argo',
#  '1829023a-67ec-429d-9e4a-9536aaa4e1b8',
#  '63771',
#  4,
#  datetime.datetime(2024, 5, 1, 11, 13, 20, tzinfo=tzutc()),
#  {'label_key_1': 'label_value_1',
#   'workflows.argoproj.io/completed': 'true',
#   'workflows.argoproj.io/creator': 'system-serviceaccount-argo-argo-server',
#   'workflows.argoproj.io/phase': 'Succeeded'},
#  {'annotation_key_1': 'annotation_value_1',
#   'workflows.argoproj.io/pod-name-format': 'v2'},
#  [{'manager': 'argo',
#   'operation': 'Update',
#   'api_version': 'argoproj.io/v1alpha1',
#   'time': datetime.datetime(2024, 5, 1, 11, 13, 20, tzinfo=tzutc()),
#   'fields_type': 'FieldsV1',
#   'fields_v1': {'f:metadata': {'f:generateName': {},
#     'f:labels': {'.': {}, 'f:workflows.argoproj.io/creator': {}}},
#    'f:spec': {}}},
#  {'manager': 'workflow-controller',
#   'operation': 'Update',
#   'api_version': 'argoproj.io/v1alpha1',
#   'time': datetime.datetime(2024, 5, 1, 11, 13, 40, tzinfo=tzutc()),
#   'fields_type': 'FieldsV1',
#   'fields_v1': {'f:metadata': {'f:annotations': {'.': {},
#      'f:annotation_key_1': {},
#      'f:workflows.argoproj.io/pod-name-format': {}},
#     'f:labels': {'f:label_key_1': {},
#      'f:workflows.argoproj.io/completed': {},
#      'f:workflows.argoproj.io/phase': {}}},
#    'f:status': {}}}])

('set-a-coin-from-template-ref-t4nc6',
 'set-a-coin-from-template-ref-',
 'argo',
 'c54b4ca3-26d1-4746-aba8-87a589fe9b30',
 '63762',
 4,
 datetime.datetime(2024, 5, 1, 11, 13, 19, tzinfo=tzutc()),
 {'label_key_a': 'label_value_a',
  'workflows.argoproj.io/completed': 'true',
  'workflows.argoproj.io/creator': 'system-serviceaccount-argo-argo-server',
  'workflows.argoproj.io/phase': 'Succeeded'},
 {'annotation_key_a': 'annotation_value_a',
  'workflows.argoproj.io/pod-name-format': 'v2'},
 [{'manager': 'argo',
   'operation': 'Update',
   'api_version': 'argoproj.io/v1alpha1',
   'time': datetime.datetime(2024, 5, 1, 11, 13, 19, tzinfo=tzutc()),
   'fields_type': 'FieldsV1',
   'fields_v1': {'f:metadata': {'f:generateName': {},
     'f:labels': {'.': {}, 'f:workflows.argoproj.io/creator': {}}},
    'f:spec': {}}},
  {'manager': 'workflow-controller',
   'operation': 'Update',
   'api_version': 'argoproj.io/v1alpha1',
   'time': datetime.datetime(2024, 5, 1, 11, 13, 39, tzinfo=tzutc()),

## `.spec`

For `Workflow`s constructed using a reference `WorkflowTemplate` and a `WorkflowTemplateRef`, this section is extremely short. Since we will only instantiate `Workflow`s this way, we focus on this variant.

The `.spec.arguments` hold the names and values of all the `Workflow`s input arguments. In our case, these will be exclusively of type `Paramter`.
The `.spec.workflow_template_ref` hold the unique `name` of the `WorkflowTemplate` used to construct this `Workflow` via the `WorkflowTemplateRef`.

In [9]:
# spec is the Workflow manifest `spec` manifest field and so contains the 
first_workflow['spec'].keys() # dict_keys(['templates', 'entrypoint', 'arguments', 'pod_metadata', 'workflow_metadata']) for workflows of copy-paste type, dict_keys(['arguments', 'workflow_template_ref']) for workflows constructed using a `WorkflowTemplateRef`. We;ll deal with the latter from now on.
first_workflow['spec']['arguments'] # specified arguments (defined by the WorkflowTemplate) with values (specified in the Workflow construction). For us, these will only be `Parameter` type input arguments.
first_workflow['spec']['workflow_template_ref'] # e.g. > {'name': 'set-a-coin-rl4sj'}

{'name': 'set-a-coin-rl4sj'}

## `.status`

This is by far the largest part of the `Workflow`'s manifest body.

In [10]:
# status is the managed section of the Workflow manifest/resource and contains information on ArgoWorkflow Nodes (e.g. the pods executing the components), their inputs, outputs, logs and relationships between each other, and more
first_workflow['status'].keys() # > dict_keys(['phase', 'started_at', 'finished_at', 'progress', 'nodes', 'stored_templates', 'conditions', 'resources_duration', 'stored_workflow_template_spec', 'artifact_repository_ref', 'artifact_gc_status', 'task_results_completion_status'])

dict_keys(['phase', 'started_at', 'finished_at', 'progress', 'nodes', 'stored_templates', 'conditions', 'resources_duration', 'stored_workflow_template_spec', 'artifact_repository_ref', 'artifact_gc_status', 'task_results_completion_status'])

### `.status.phase`
### `.status.started_at`
### `.status.finished_at`
### `.status.progress`
### `.status.conditions`
### `.status.resources_duration`
### `.status.task_results_completion_status`

In [11]:
first_workflow['status']['phase'],first_workflow['status']['started_at'],first_workflow['status']['finished_at'],first_workflow['status']['progress'], first_workflow['status']['conditions'], first_workflow['status']['resources_duration'], first_workflow['status']['task_results_completion_status'] # self explanatory status data
# > ('Succeeded',
#  datetime.datetime(2024, 5, 1, 11, 13, 19, tzinfo=tzutc()),
#  datetime.datetime(2024, 5, 1, 11, 13, 39, tzinfo=tzutc()),
#  '2/2',
#  [{'type': 'PodRunning', 'status': 'False'},
#   {'type': 'Completed', 'status': 'True'}],
#  {'cpu': 0, 'memory': 5}, # <-- workflow cumulative K8s resources across all pods/nodes. see `status.nodes.{example}.resources_duration` for a pod/node level version of this in the below section
#  {'set-a-coin-from-template-ref-t4nc6-1623702198': True,
#   'set-a-coin-from-template-ref-t4nc6-4235989955': True})

('Succeeded',
 datetime.datetime(2024, 5, 1, 11, 13, 19, tzinfo=tzutc()),
 datetime.datetime(2024, 5, 1, 11, 13, 39, tzinfo=tzutc()),
 '2/2',
 [{'type': 'PodRunning', 'status': 'False'},
  {'type': 'Completed', 'status': 'True'}],
 {'cpu': 0, 'memory': 5},
 {'set-a-coin-from-template-ref-t4nc6-1623702198': True,
  'set-a-coin-from-template-ref-t4nc6-4235989955': True})

### `status.nodes.{example}`

In [12]:
first_workflow['status']['nodes'].keys() # key:value mapping of all ArgoNodes (e.g. pods) in the workflow. 
# In our example we have 3: The DAG orchestrator node (e.g. the entrypoint template), and the two Task nodes (e.g. the two script templates)
# > dict_keys(['set-a-coin-from-template-ref-t4nc6', 'set-a-coin-from-template-ref-t4nc6-1623702198', 'set-a-coin-from-template-ref-t4nc6-4235989955'])

sample_node = first_workflow['status']['nodes']['set-a-coin-from-template-ref-t4nc6-4235989955']
sample_node # node level data 
# includes 
# - identifiers (id and name), 
# - template reference, 
# - progress,
# - pod level resource K8s request & limits
# - resolved inputs and outputs, including the default `Artifact` type log output and the exit code. 
# - Also lists the `id`s of dependant node(s) (where applicable) in the `children` field.
#
# > {'id': 'set-a-coin-from-template-ref-t4nc6-4235989955',
#  'name': 'set-a-coin-from-template-ref-t4nc6.Set-a-coin',
#  'type': 'Pod',
#  'display_name': 'Set-a-coin',
#  'template_name': 'set-coin',
#  'template_scope': 'local/',
#  'phase': 'Succeeded',
#  'boundary_id': 'set-a-coin-from-template-ref-t4nc6',
#  'started_at': datetime.datetime(2024, 5, 1, 11, 13, 19, tzinfo=tzutc()),
#  'finished_at': datetime.datetime(2024, 5, 1, 11, 13, 23, tzinfo=tzutc()),
#  'progress': '1/1',
#  'resources_duration': {'cpu': 0, 'memory': 2},
#  'inputs': {'parameters': [{'name': 'coin', 'value': 'new coin'}]},
#  'outputs': {'parameters': [{'name': 'coin',
#     'value': 'new coin',
#     'value_from': {'path': './coin_output.txt'}}],
#   'artifacts': [{'name': 'main-logs',
#     's3': {'key': 'set-a-coin-from-template-ref-t4nc6/set-a-coin-from-template-ref-t4nc6-set-coin-4235989955/main.log'}}],
#   'exit_code': '0'},
#  'children': ['set-a-coin-from-template-ref-t4nc6-1623702198'],
#  'host_node_name': 'kind-control-plane'}

{'id': 'set-a-coin-from-template-ref-t4nc6-4235989955',
 'name': 'set-a-coin-from-template-ref-t4nc6.Set-a-coin',
 'type': 'Pod',
 'display_name': 'Set-a-coin',
 'template_name': 'set-coin',
 'template_scope': 'local/',
 'phase': 'Succeeded',
 'boundary_id': 'set-a-coin-from-template-ref-t4nc6',
 'started_at': datetime.datetime(2024, 5, 1, 11, 13, 19, tzinfo=tzutc()),
 'finished_at': datetime.datetime(2024, 5, 1, 11, 13, 23, tzinfo=tzutc()),
 'progress': '1/1',
 'resources_duration': {'cpu': 0, 'memory': 2},
 'inputs': {'parameters': [{'name': 'coin', 'value': 'new coin'}]},
 'outputs': {'parameters': [{'name': 'coin',
    'value': 'new coin',
    'value_from': {'path': './coin_output.txt'}}],
  'artifacts': [{'name': 'main-logs',
    's3': {'key': 'set-a-coin-from-template-ref-t4nc6/set-a-coin-from-template-ref-t4nc6-set-coin-4235989955/main.log'}}],
  'exit_code': '0'},
 'children': ['set-a-coin-from-template-ref-t4nc6-1623702198'],
 'host_node_name': 'kind-control-plane'}

### `status.stored_templates` 

The `templates` that are being used here, including the entrypoint. Retrieved via the reference to the WorkflowTemplate, this is essentially the `templates` section of the underlying `WorkflowTemplate` manifest.

In [13]:
first_workflow['status']['stored_templates'].keys() # > dict_keys(['namespaced/set-a-coin-rl4sj/Coin-set', 'namespaced/set-a-coin-rl4sj/set-coin', 'namespaced/set-a-coin-rl4sj/show-coin'])

dict_keys(['namespaced/set-a-coin-rl4sj/Coin-set', 'namespaced/set-a-coin-rl4sj/set-coin', 'namespaced/set-a-coin-rl4sj/show-coin'])

In [14]:
sample_stored_template = first_workflow['status']['stored_templates']['namespaced/set-a-coin-rl4sj/set-coin'] # leaf template
sample_stored_template # the definition of the template as a modular function, i.e. without the contextual DAG dependencies or resolved inputs/outputs
# > {'name': 'set-coin',
#  'inputs': {'parameters': [{'name': 'coin'}]},
#  'outputs': {'parameters': [{'name': 'coin',
#     'value_from': {'path': './coin_output.txt'}}]},
#  'metadata': {},
#  'script': {'image': 'python:3.8',
#   'source': "import os\nimport sys\nsys.path.append(os.getcwd())\nimport json\ntry: coin = json.loads(r'''{{inputs.parameters.coin}}''')\nexcept: coin = r'''{{inputs.parameters.coin}}'''\n\nwith open('./coin_output.txt', 'w') as output:\n    output.write(coin)",
#   'name': '',
#   'command': ['python'],
#   'resources': {}}}

{'name': 'set-coin',
 'inputs': {'parameters': [{'name': 'coin'}]},
 'outputs': {'parameters': [{'name': 'coin',
    'value_from': {'path': './coin_output.txt'}}]},
 'metadata': {},
 'script': {'image': 'python:3.8',
  'source': "import os\nimport sys\nsys.path.append(os.getcwd())\nimport json\ntry: coin = json.loads(r'''{{inputs.parameters.coin}}''')\nexcept: coin = r'''{{inputs.parameters.coin}}'''\n\nwith open('./coin_output.txt', 'w') as output:\n    output.write(coin)",
  'name': '',
  'command': ['python'],
  'resources': {}}}

In [15]:
entrypoint_stored_template = first_workflow['status']['stored_templates']['namespaced/set-a-coin-rl4sj/Coin-set'] # entrypoint template
entrypoint_stored_template # the definition of the entrypoint template as a modular function, i.e. without the contextual DAG dependencies or resolved inputs/outputs
# > {'name': 'Coin-set',
#  'inputs': {},
#  'outputs': {},
#  'metadata': {},
#  'dag': {'tasks': [{'name': 'Set-a-coin',
#     'template': 'set-coin',
#     'arguments': {'parameters': [{'name': 'coin',
#        'value': '{{workflow.parameters.coin}}'}]}},
#    {'name': 'Show-a-coin',
#     'template': 'show-coin',
#     'arguments': {'parameters': [{'name': 'coin',
#        'value': '{{tasks.Set-a-coin.outputs.parameters.coin}}'}]},
#     'depends': 'Set-a-coin'}]}}

{'name': 'Coin-set',
 'inputs': {},
 'outputs': {},
 'metadata': {},
 'dag': {'tasks': [{'name': 'Set-a-coin',
    'template': 'set-coin',
    'arguments': {'parameters': [{'name': 'coin',
       'value': '{{workflow.parameters.coin}}'}]}},
   {'name': 'Show-a-coin',
    'template': 'show-coin',
    'arguments': {'parameters': [{'name': 'coin',
       'value': '{{tasks.Set-a-coin.outputs.parameters.coin}}'}]},
    'depends': 'Set-a-coin'}]}}

### `status.artifact_repository_ref`
### `status.artifact_gc_status`

In [16]:
first_workflow['status']['artifact_repository_ref'] # configuration of associated artifact repository. defaults to minio

{'config_map': 'artifact-repositories',
 'key': 'default-v1',
 'namespace': 'argo',
 'artifact_repository': {'archive_logs': True,
  's3': {'endpoint': 'minio:9000',
   'bucket': 'my-bucket',
   'insecure': True,
   'access_key_secret': {'key': 'accesskey', 'name': 'my-minio-cred'},
   'secret_key_secret': {'key': 'secretkey', 'name': 'my-minio-cred'}}}}

In [17]:
first_workflow['status']['artifact_gc_status'] # ?

{'not_specified': True}

### `status.stored_workflow_template_spec`

This is the `spec` field of the underlying `WorkflowTemplate` manifest. 

Contains the `entrypoint` and `templates` sections defining the DAG of the `WorkflowTemplate` as well as its `name`, and the `arguments`.

Since we added `pod_metadata` and `workflow_metadata` to our `WorkflowTemplate`, we will find these sections here, too.

In [18]:
first_workflow['status']['stored_workflow_template_spec'].keys() # > dict_keys(['templates', 'entrypoint', 'arguments', 'workflow_template_ref', 'pod_metadata', 'workflow_metadata'])

dict_keys(['templates', 'entrypoint', 'arguments', 'workflow_template_ref', 'pod_metadata', 'workflow_metadata'])

In [19]:
first_workflow['status']['stored_workflow_template_spec']['workflow_template_ref'] # same as `spec.workflow_template_ref`
# > {'name': 'set-a-coin-rl4sj'}

{'name': 'set-a-coin-rl4sj'}

In [20]:
first_workflow['status']['stored_workflow_template_spec']['entrypoint'] # name to the entrypoint template of the DAG
# > 'Coin-set'

'Coin-set'

In [21]:
first_workflow['status']['stored_workflow_template_spec']['arguments'] # not quite the WorkflowTemplate arguments - it also contains the value that was resolved at runtime

{'parameters': [{'name': 'coin', 'value': 'new coin'}]}

In [22]:
first_workflow['status']['stored_workflow_template_spec']['templates'][0] # the DAG entrypoint template. Use this at the WorkflowTemplate level to resolve the DAG dependency structure by looking at each DAG's tasks' `depends` field to identify upstream nodes/tasks/pods.

{'name': 'Coin-set',
 'inputs': {},
 'outputs': {},
 'metadata': {},
 'dag': {'tasks': [{'name': 'Set-a-coin',
    'template': 'set-coin',
    'arguments': {'parameters': [{'name': 'coin',
       'value': '{{workflow.parameters.coin}}'}]}},
   {'name': 'Show-a-coin',
    'template': 'show-coin',
    'arguments': {'parameters': [{'name': 'coin',
       'value': '{{tasks.Set-a-coin.outputs.parameters.coin}}'}]},
    'depends': 'Set-a-coin'}]}}

In [23]:
first_workflow['status']['stored_workflow_template_spec']['templates'][1]

{'name': 'set-coin',
 'inputs': {'parameters': [{'name': 'coin'}]},
 'outputs': {'parameters': [{'name': 'coin',
    'value_from': {'path': './coin_output.txt'}}]},
 'metadata': {},
 'script': {'image': 'python:3.8',
  'source': "import os\nimport sys\nsys.path.append(os.getcwd())\nimport json\ntry: coin = json.loads(r'''{{inputs.parameters.coin}}''')\nexcept: coin = r'''{{inputs.parameters.coin}}'''\n\nwith open('./coin_output.txt', 'w') as output:\n    output.write(coin)",
  'name': '',
  'command': ['python'],
  'resources': {}}}

In [24]:
first_workflow['status']['stored_workflow_template_spec']['templates'][2]

{'name': 'show-coin',
 'inputs': {'parameters': [{'name': 'coin'}]},
 'outputs': {},
 'metadata': {},
 'script': {'image': 'python:3.8',
  'source': "import os\nimport sys\nsys.path.append(os.getcwd())\nimport json\ntry: coin = json.loads(r'''{{inputs.parameters.coin}}''')\nexcept: coin = r'''{{inputs.parameters.coin}}'''\n\nprint(f'it was {coin}')",
  'name': '',
  'command': ['python'],
  'resources': {}}}