### Asynchronous job submission and waiting for job completion

Import the required modules and determine the number of cores and nodes available in the allocation.

In [None]:
import os
import concurrent.futures

In [None]:
from flux.job import JobspecV1, FluxExecutor

In [None]:
ncores = !flux resource list -no {ncores} --state=up
nc = int(ncores[0])

nnodes = !flux resource list -no {nnodes} --state=up
nn = int(nnodes[0])

Create a job request that will run `compute.py` across all the nodes in the allocation.

In [None]:
compute_jobspec = JobspecV1.from_command(
    command=["./flux-workflow-examples/job-submit-wait/compute.py", "10"], num_tasks=nn*2, num_nodes=nn, cores_per_task=4
)
compute_jobspec.cwd = os.getcwd()
compute_jobspec.environment = dict(os.environ)

Create a _bad_ job request that _flux_ will see as failing when it is launched.

In [None]:
bad_jobspec = JobspecV1.from_command(["/bin/false"])

Launch a set of jobs, half of them `compute.py` and half of them `bad` and wait for them to complete in any order either successfully or with an error code.

In [None]:
njobs = 12

In [None]:
with FluxExecutor() as executor:
    futures = []
    # submit half successful jobs and half failures
    for _ in range(njobs // 2):
        futures.append(executor.submit(compute_jobspec))
        print(f"submit: {id(futures[-1])} compute_jobspec")
    for _ in range(njobs // 2, njobs):
        futures.append(executor.submit(bad_jobspec))
        print(f"submit: {id(futures[-1])} bad_jobspec")
    for fut in concurrent.futures.as_completed(futures):
        if fut.exception() is not None:
            print(f"wait: {id(fut)} Error: job raised error {fut.exception()}")
        elif fut.result() == 0:
            print(f"wait: {id(fut)} Success")
        else:
            print(f"wait: {id(fut)} Error: job returned exit code {fut.result()}")

### Sliding submission window

This code launches a set of jobs but uses a sliding _window_ to control how many are submitted at any point in time.

In [None]:
import collections
import concurrent.futures as cf

In [None]:
compute_jobspec = JobspecV1.from_command(
    command=["./flux-workflow-examples/job-submit-wait/compute.py", "5"], num_tasks=8, num_nodes=2, cores_per_task=4
)
compute_jobspec.cwd = os.getcwd()
compute_jobspec.environment = dict(os.environ)

In [None]:
njobs = 16
window_size = 5

jobspec_queue = collections.deque(compute_jobspec for _ in range(njobs))
futures = []  # holds incomplete futures

In [None]:
with FluxExecutor() as executor:
    while jobspec_queue or futures:
        if len(futures) < window_size and jobspec_queue:
            fut = executor.submit(jobspec_queue.popleft())
            print(f"submit: {id(fut)}")
            futures.append(fut)
        else:
            done, not_done = cf.wait(futures, return_when=cf.FIRST_COMPLETED)
            futures = list(not_done)
            for fut in done:
                if fut.exception() is not None:
                    print(
                        f"wait: {id(fut)} Error: job raised error "
                        f"{fut.exception()}"
                    )
                elif fut.result() == 0:
                    print(f"wait: {id(fut)} Success")
                else:
                    print(
                        f"wait: {id(fut)} Error: job returned "
                        f"exit code {fut.result()}"
                    )

### Wait for a specific job to complete

Finally, launch a set of jobs and randomly chose one of them to wait for.

In [None]:
compute_jobspec = JobspecV1.from_command(
    command=["./flux-workflow-examples/job-submit-wait/compute.py", "2"], num_tasks=12, num_nodes=3, cores_per_task=4
)
compute_jobspec.cwd = os.getcwd()
compute_jobspec.environment = dict(os.environ)

In [None]:
import random

njobs = 8

In [None]:
with FluxExecutor() as executor:
    chosen_job = random.randrange(njobs)
    futures = []
    # submit half successful jobs and half failures
    for n in range(njobs):
        futures.append(executor.submit(compute_jobspec))
        print(f"submit: {id(futures[-1])} compute_jobspec")
        if n == chosen_job:
            chosen_job_id = id(futures[-1])
    print(f"waiting for {chosen_job_id} to complete")
    for fut in concurrent.futures.as_completed(futures):
        if id(fut) == chosen_job_id:
            print(f"job {chosen_job_id} Completed [{fut.result()}]")

Once the chosen job completes we may want to cancel the others. Check out the `job-cancelation` notebook to see how to cancel jobs.