In [1]:
from DANE_utils import jobspec
import json

def pprint(obj):
    print(json.dumps(json.loads(str(obj)), indent=4))

# Jobspec

In [2]:
job = jobspec.jobspec(source_url='http://127.0.0.1/example', source_id='ITM123', source_set='NISV',
                      tasks=jobspec.taskSequential(['DOWNLOAD', 'CV', 'ASR', 'INDEX', 'DELETE']))

pprint(job)

{
    "source_url": "http://127.0.0.1/example",
    "source_id": "ITM123",
    "source_set": "NISV",
    "job_id": null,
    "tasks": {
        "taskSequential": [
            "DOWNLOAD",
            "CV",
            "ASR",
            "INDEX",
            "DELETE"
        ]
    },
    "metadata": {},
    "priority": 1,
    "response": {}
}


In [3]:
new_job = jobspec.jobspec.from_json(job.to_json())
pprint(new_job)

{
    "source_url": "http://127.0.0.1/example",
    "source_id": "ITM123",
    "source_set": "NISV",
    "job_id": null,
    "tasks": {
        "taskSequential": [
            "DOWNLOAD",
            "CV",
            "ASR",
            "INDEX",
            "DELETE"
        ]
    },
    "metadata": {},
    "priority": 1,
    "response": {}
}


## API

In [4]:
# Dummy endpoint so we can 'simulate' behaviour of workflow
dummy = jobspec.DummyEndpoint()
new_job.set_api(dummy)

print(new_job.tasks[0].api) # instance, won't be serialised with job

<DANE_utils.jobspec.DummyEndpoint object at 0x7f0218175e50>


In [5]:
# register job & all tasks
new_job.register()
pprint(new_job)

{
    "source_url": "http://127.0.0.1/example",
    "source_id": "ITM123",
    "source_set": "NISV",
    "job_id": "c30bef02-cc68-444e-85bf-9240c0d70a3c",
    "tasks": {
        "taskSequential": [
            "0:DOWNLOAD",
            "1:CV",
            "2:ASR",
            "3:INDEX",
            "4:DELETE"
        ]
    },
    "metadata": {},
    "priority": 1,
    "response": {}
}


In [6]:
# Explicitly run a specific task
# No guarantee that this task can run successfully if preceding tasks havent been run
new_job.tasks[0].run()

DummyEndpoint: Executed task DOWNLOAD for job: c30bef02-cc68-444e-85bf-9240c0d70a3c


In [7]:
# run next task
new_job.tasks.run()

DummyEndpoint: Executed task CV for job: c30bef02-cc68-444e-85bf-9240c0d70a3c


In [8]:
job.set_api(dummy)
job.register()

while not job.isDone():
    job.run()

DummyEndpoint: Executed task DOWNLOAD for job: ff9a2b8b-5362-4776-b1c6-cb9f2f35d84a
DummyEndpoint: Executed task CV for job: ff9a2b8b-5362-4776-b1c6-cb9f2f35d84a
DummyEndpoint: Executed task ASR for job: ff9a2b8b-5362-4776-b1c6-cb9f2f35d84a
DummyEndpoint: Executed task INDEX for job: ff9a2b8b-5362-4776-b1c6-cb9f2f35d84a
DummyEndpoint: Executed task DELETE for job: ff9a2b8b-5362-4776-b1c6-cb9f2f35d84a


# Nesting Task Containers

In [9]:
new_tasks = jobspec.taskSequential(['DOWNLOAD', 
                            jobspec.taskParallel(['CV', 'ASR']), 
                            jobspec.taskParallel(['INDEX', 'DELETE'])])
pprint(new_tasks)

{
    "taskSequential": [
        "DOWNLOAD",
        {
            "taskParallel": [
                "CV",
                "ASR"
            ]
        },
        {
            "taskParallel": [
                "INDEX",
                "DELETE"
            ]
        }
    ]
}


In [10]:
newer_tasks = jobspec.taskContainer.from_json(new_tasks.to_json())
pprint(newer_tasks)

{
    "taskSequential": [
        "DOWNLOAD",
        {
            "taskParallel": [
                "CV",
                "ASR"
            ]
        },
        {
            "taskParallel": [
                "INDEX",
                "DELETE"
            ]
        }
    ]
}


In [11]:
newer_job = jobspec.jobspec(source_url='http://127.0.0.1/example2', source_id='ITM124', source_set='NISV',
                      tasks=newer_tasks)

In [12]:
newer_job.set_api(dummy)
newer_job.register()

In [13]:
newest_job = jobspec.jobspec.from_json(newer_job.to_json())
newest_job.set_api(dummy) # api isn't serialised, so need to set again

newest_job.tasks[0].run()
newest_job.tasks[1].run()

DummyEndpoint: Executed task DOWNLOAD for job: 1c5e633e-5361-45f2-86e5-fd19e5f8428c
DummyEndpoint: Executed task CV for job: 1c5e633e-5361-45f2-86e5-fd19e5f8428c
DummyEndpoint: Executed task ASR for job: 1c5e633e-5361-45f2-86e5-fd19e5f8428c
