In [1]:
# useful for development to reload modules everytime we run code
%load_ext autoreload
%autoreload 2

## Imports
If any errors in imports, then execute the following package installs and execute the imports again:

pip install -r requirements.txt -U

In [2]:
# general imports
from pprint import pprint
import io, os, sys, time, json, uuid, glob
from datetime import datetime, timedelta
from dotenv import load_dotenv

# azure batch
import azure.batch.batch_auth as batchauth
import azure.batch._batch_service_client as batch
import azure.batch.models as batchmodels

# azure storage
import azure.storage.blob as azureblob

## Loading Configuration
Before executing, make sure you create a .env file with the following format:

```
BATCH_SERVICE_URL=<batch_service_url>
BATCH_ACCOUNT_NAME=<batch_account_name>
BATCH_ACCOUNT_KEY=<batch_account_key>
BATCH_STORAGE_ACCOUNT_NAME=<storage_account_name>
BATCH_STORAGE_ACCOUNT_KEY=<storage_account_key>
```

This .env file is not added on the repo to avoid surfacing sensitive data.

In [3]:
load_dotenv()

batch_service_url = os.getenv("BATCH_SERVICE_URL")
batch_account_name = os.getenv("BATCH_ACCOUNT_NAME")
batch_account_key = os.getenv("BATCH_ACCOUNT_KEY")
storage_account_name = os.getenv("BATCH_STORAGE_ACCOUNT_NAME")
storage_account_key = os.getenv("BATCH_STORAGE_ACCOUNT_KEY")

print(batch_service_url)

https://covidiabatch.francecentral.batch.azure.com


## Create Clients

In [4]:
# create the batch client to create pools, jobs and tasks on Azure Batch
credentials = batchauth.SharedKeyCredentials(
       batch_account_name,
       batch_account_key)

batch_client = batch.BatchServiceClient(
        credentials,
        batch_url=batch_service_url)

# Create the blob client, for use in obtaining references to
# blob storage containers and uploading files to containers.
blob_client = azureblob.BlockBlobService(
    account_name = storage_account_name,
    account_key = storage_account_key)

## Create Storage Containers
This will create an application container to hold the application files that will be downloaded by the batch nodes.
Additionally it will create an input and output container to store any input files to feed to each task and to collect the output of the tasks

In [5]:
# Use the blob client to create the containers in Azure Storage if they
# don't yet exist.
app_container_name = "application"
input_container_name = "input"
output_container_name = "output"

blob_client.create_container(app_container_name, fail_on_exist=False)
blob_client.create_container(input_container_name, fail_on_exist=False)
blob_client.create_container(output_container_name, fail_on_exist=False)

print("Containers in Storage Account:")

for c in blob_client.list_containers():
    print("\t",c.name)

Containers in Storage Account:
	 application
	 input
	 output


## Helper Azure Storage Methods

In [6]:
def upload_blob_and_create_sas(block_blob_client, container_name, file_name, blob_name, hours=24):

    block_blob_client.create_container(
        container_name,
        fail_on_exist=False)

    block_blob_client.create_blob_from_path(
        container_name,
        blob_name,
        file_name)

    print("Uploaded", file_name, "to container", container_name)

    expiry = datetime.utcnow() + timedelta(hours=hours)
    sas_token = block_blob_client.generate_blob_shared_access_signature(
        container_name,
        blob_name,
        permission=azureblob.BlobPermissions.READ,
        expiry=expiry)

    sas_url = block_blob_client.make_blob_url(
        container_name,
        blob_name,
        sas_token=sas_token)

    return sas_url

def create_container_sas_token(block_blob_client, container_name, permission, hours=24):
 
    expiry = datetime.utcnow() + timedelta(hours=hours)
    sas_token = block_blob_client.generate_container_shared_access_signature(
        container_name, permission=permission, expiry=expiry)

    valid_sas_url = "https://{}.blob.core.windows.net/{}?{}".format(
        block_blob_client.account_name, container_name, sas_token
    )
    
    return valid_sas_url

## Copying app files to app container
This step will zip the files in the app_dir folder and upload the zip package to the application container created in the previous step. We will also generate a script file to install Python on the Batch nodes

In [7]:
# folder to be zipped
app_dir = "sample_application"
# zip file name
app_package_file_name = "app.tar.gz"
# python install script
setup_file_name = "installPython.sh"
# folder to store the zipped file and install script
resource_folder = "batch_resources"

In [8]:
# zip the application and copy zip file to resource folder
!ls -la $app_dir
!mkdir $resource_folder
!echo zipping application
!tar czf $resource_folder/app.tar.gz -C ./$app_dir/ .

total 20
drwxrwxr-x 3 quick quick 4096 Apr 14 21:34 .
drwxrwxr-x 6 quick quick 4096 Apr 14 22:57 ..
-rw-rw-r-- 1 quick quick 1333 Apr 14 21:37 main.py
-rw-rw-r-- 1 quick quick    6 Apr 14 17:03 requirements.txt
drwxrwxr-x 3 quick quick 4096 Apr 14 11:10 sample_utils
mkdir: cannot create directory ‘batch_resources’: File exists
zipping application


This next cell creates the Python setup script for Ubuntu nodes

In [9]:
%%writefile $resource_folder/$setup_file_name
sudo apt-get update
sudo su
apt-get install python3.6
apt-get install -y python3-pip

Overwriting batch_resources/installPython.sh


In [10]:
# get file paths for upload
app_file_path = os.path.join(resource_folder, app_package_file_name)
setup_file_path = os.path.join(resource_folder, setup_file_name)

# upload application package file to application container
appFileSas = upload_blob_and_create_sas(blob_client, app_container_name, app_file_path, app_package_file_name)
print(appFileSas)

# upload install script to application container
setupFileSas = upload_blob_and_create_sas(blob_client, app_container_name, setup_file_path, setup_file_name)
print(setupFileSas)

Uploaded batch_resources/app.tar.gz to container application
https://covidiabatchstorage.blob.core.windows.net/application/app.tar.gz?se=2020-04-15T22%3A58%3A37Z&sp=r&sv=2018-03-28&sr=b&sig=9RRpXh1rPnACsnU9YuEvuglUI%2BOBT4z/HhcO/EL5EYg%3D
Uploaded batch_resources/installPython.sh to container application
https://covidiabatchstorage.blob.core.windows.net/application/installPython.sh?se=2020-04-15T22%3A58%3A37Z&sp=r&sv=2018-03-28&sr=b&sig=ZjIXmfzReYJWNeLIaoKBL70PmywvrlEb7VXHAkjG9TY%3D


## Creating Azure Batch Pool
A pool is the central compute resource for Azure Batch. It's composed of several machines that will be assigned tasks once a job is created.
In here, we setup a pool of Ubuntu nodes and create a start task to make sure Python is installed. As machines get added to the pool, this task will imediately run before any tasks are assigned to the nodes.

In [11]:
# let's read the configuration
settings_file = "batch_settings.json"

with open(settings_file) as f:
    settings = json.load(f)

pprint(settings, indent=2)

{ 'dedicatedVmCount': 0,
  'jobIdPrefix': 'SimulationJobInfluenza',
  'lowPriorityVmCount': 5,
  'poolId': 'SimulationPoolInfluenza',
  'vmSize': 'STANDARD_A1_V2'}


### Defining a StartTask
Runs on all nodes on startup. This will reference the install script to make sure Python is installed on each node

Note:
*To enable detailed monitoring of pool nodes, we have to setup Application Insights as detailed here: https://github.com/Azure/batch-insights. This is not required but it's a good way to understand if we can run more tasks in paralel inside the same node. This requires setting up 3 environment variables on the pool start task. In this notebook we are setting the variables but we are not enabling app insights by default because it causes a significant spike in node start time (a few minutes compared to a few seconds). This won't be relevant in a real scenario where a job takes significant more time to run than the startup time of the pool, but in this demo it has a noticeable impact.*

In [23]:
# setting up app insights related environment variables in task
env_variables = list()
env_variables.append(batchmodels.EnvironmentSetting(name="APP_INSIGHTS_INSTRUMENTATION_KEY", value="d5c240e1-1a2e-4e69-b5fd-9009627f03ec"))
env_variables.append(batchmodels.EnvironmentSetting(name="APP_INSIGHTS_APP_ID", value="c32ac08b-472e-4128-b245-62c3ed03a8a5"))
env_variables.append(batchmodels.EnvironmentSetting(name="BATCH_INSIGHTS_DOWNLOAD_URL", value="https://github.com/Azure/batch-insights/releases/download/v1.3.0/batch-insights"))                                     

# notice we could use this to setup any environment variables that the task would need.

In [14]:
# create an elevated identity to run the start task - needed whenever you require sudo access
user = batchmodels.AutoUserSpecification(scope=batchmodels.AutoUserScope.pool, elevation_level=batchmodels.ElevationLevel.admin)
user_identity = batchmodels.UserIdentity(auto_user=user)   

# setup the task command - executing the shell script that install python. 
command_line = f"/bin/bash -c \"sudo sh {setup_file_name}\""

# alternative command line to setup application insights - uncomment this to enable app insights
#command_line = f"/bin/bash -c \"sudo sh {setup_file_name} && wget -O - https://raw.githubusercontent.com/Azure/batch-insights/master/scripts/run-linux.sh | bash \""

# setup the start task
startTask = batchmodels.StartTask(
        command_line=command_line,
        wait_for_success = True,
        user_identity = user_identity,
        environment_settings=env_variables,
        resource_files = [batchmodels.ResourceFile(
                         file_path = setup_file_name,
                         http_url = setupFileSas)])

print("Start task:")
print(f"CommandLine: {command_line}")
print(f"ResourceFiles:")
for f in startTask.resource_files:
    print(f"\t{f.http_url}")

Start task:
CommandLine: /bin/bash -c "sudo sh installPython.sh && wget -O - https://raw.githubusercontent.com/Azure/batch-insights/master/scripts/run-linux.sh | bash "
ResourceFiles:
	https://covidiabatchstorage.blob.core.windows.net/application/installPython.sh?se=2020-04-15T22%3A58%3A37Z&sp=r&sv=2018-03-28&sr=b&sig=ZjIXmfzReYJWNeLIaoKBL70PmywvrlEb7VXHAkjG9TY%3D


### Creating the Pool

In [15]:
# checking configuration
poolId = settings["poolId"]
vmSize = settings["vmSize"]
dedicatedNodes = settings["dedicatedVmCount"]
lowPriorityNodes = settings["lowPriorityVmCount"]

print(f"Creating pool {poolId} with:")
print("Size:",vmSize)
print("Number of dedicated nodes:",dedicatedNodes)
print("Number of low priority nodes:",lowPriorityNodes)

Creating pool SimulationPoolInfluenza with:
Size: STANDARD_A1_V2
Number of dedicated nodes: 0
Number of low priority nodes: 5


In [16]:
# setup pool
pool = batchmodels.PoolAddParameter(
    id=poolId,
    virtual_machine_configuration=batchmodels.VirtualMachineConfiguration(
        image_reference=batchmodels.ImageReference(
            publisher="Canonical",
            offer="UbuntuServer",
            sku="18.04-LTS",
            version="latest"
        ),
        node_agent_sku_id="batch.node.ubuntu 18.04"),
    vm_size=vmSize,
    target_dedicated_nodes=dedicatedNodes,
    target_low_priority_nodes=lowPriorityNodes,
    start_task=startTask)

# create pool
try:
    print("Attempting to create pool:", pool.id)
    batch_client.pool.add(pool)
    print("Created pool:", pool.id)
except batchmodels.BatchErrorException as e:
    if e.error.code != "PoolExists":
        raise
    else:
        print("Pool {!r} already exists".format(pool.id))

Attempting to create pool: SimulationPoolInfluenza
Created pool: SimulationPoolInfluenza


In [17]:
def wait_for_all_nodes_state(batch_client, pool, node_state):
    print('Waiting for all nodes in pool {} to reach one of: {!r}\n'.format(
        pool.id, node_state))
    i = 0
    targetNodes = pool.target_dedicated_nodes + pool.target_low_priority_nodes
    while True:
        # refresh pool to ensure that there is no resize error
        pool = batch_client.pool.get(pool.id)
        if pool.resize_errors is not None:
            resize_errors = "\n".join([repr(e) for e in pool.resize_errors])
            raise RuntimeError(
                'resize error encountered for pool {}:\n{}'.format(
                    pool.id, resize_errors))
        nodes = list(batch_client.compute_node.list(pool.id))
        if (len(nodes) >= targetNodes and
                all(node.state in node_state for node in nodes)):
            return nodes
        i += 1
        if i % 3 == 0:
            print('waiting for {} nodes to reach desired state...'.format(
                targetNodes))
        time.sleep(10)

# we check if all nodes are up before we continue
nodes = wait_for_all_nodes_state(batch_client, pool, [batchmodels.ComputeNodeState.idle, batchmodels.ComputeNodeState.running])

# show all nodes
for n in nodes:
    print(n.id, n.state, n.is_dedicated)

Waiting for all nodes in pool SimulationPoolInfluenza to reach one of: [<ComputeNodeState.idle: 'idle'>, <ComputeNodeState.running: 'running'>]

waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
tvmps_477d8a63ba8bd4433965aff979ec71f38d287e7bff4a515683e1b022e8230317_p ComputeNodeState.idle False
tvmps_862b92057067bd391d54175ba8e5c21ad626b6129f28f6bf47ad465cdbfab8d5_p ComputeNodeState.idle False
tvmps_b3f39852da98c4f38b637eff990f67bfe01108bdd17b21f2f969ebffe3ab4c5c_p ComputeNodeState.idle False
tvmps_b533ace5f45d7275cdd1daa9ef76ba8faf610061

## Creating a Job to run on the Pool
We will now create a job and an associated Prep task to ensure the application is downloaded, extracted to a known location and all python packages are installed via pip

In [18]:
# creating a unique job Id
job_id = settings["jobIdPrefix"] + "_" + datetime.now().strftime("%Y-%m-%d_%H-%m")

# setup the task command
command_line = f"/bin/bash -c \"cd $AZ_BATCH_NODE_SHARED_DIR && tar -xvf $AZ_BATCH_JOB_PREP_WORKING_DIR/{app_package_file_name} && sudo su && pip3 install -r requirements.txt \""

# create an elevated identity to run the start task
user = batchmodels.AutoUserSpecification(scope=batchmodels.AutoUserScope.pool, elevation_level=batchmodels.ElevationLevel.admin)
user_identity = batchmodels.UserIdentity(auto_user=user)   

# setup the start task
jobTask = batchmodels.JobPreparationTask(
        command_line = command_line,
        user_identity = user_identity,
        wait_for_success = True,
        resource_files = [batchmodels.ResourceFile(
                         file_path = app_package_file_name,
                         http_url = appFileSas)])

print("Job Preparation task:")
print(f"CommandLine: {command_line}")
print(f"ResourceFiles:")
for f in jobTask.resource_files:
    print(f"\t{f.http_url}")

Job Preparation task:
CommandLine: /bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && tar -xvf $AZ_BATCH_JOB_PREP_WORKING_DIR/app.tar.gz && sudo su && pip3 install -r requirements.txt "
ResourceFiles:
	https://covidiabatchstorage.blob.core.windows.net/application/app.tar.gz?se=2020-04-15T22%3A58%3A37Z&sp=r&sv=2018-03-28&sr=b&sig=9RRpXh1rPnACsnU9YuEvuglUI%2BOBT4z/HhcO/EL5EYg%3D


In [19]:
# setup job
job = batchmodels.JobAddParameter(
    id=job_id,
    pool_info=batchmodels.PoolInformation(pool_id=pool.id),
    job_preparation_task = jobTask)

# create job
print('Creating job [{}]...'.format(job.id))

try:
    batch_client.job.add(job)
except batchmodels.batch_error.BatchErrorException as err:
    print_batch_exception(err)
    if err.error.code != "JobExists":
        raise
    else:
        print("Job {!r} already exists".format(job_id))

Creating job [SimulationJobInfluenza_2020-04-14_23-04]...


## Adding Tasks to the Job
Now that our application is correctly configured and we made sure Python is installed in all nodes, we need to setup a task to run a work item. We can launch many tasks inside the same job and Azure Batch will assign it to any VMs in the pool.

In this example, we will create as many tasks as files in input_data (a local folder in this repo). This is a simple way of doing paralel processing of a large file when splits can be done. Another option is simple iterating over an array of parameter values and creating a task for each different value. We illustrate here the most complicated scenario which involves passing different input files to the script and uploading those files to the input container in the storage account.

These tasks also write output to storage. The main.py script writes an output file and we configure the task to upload these files to the output container we created earlier. It is done after the task ends successfully

In [20]:
# get a sas url for write access to output container. This will be used so we can persist task output files
output_container_sas = create_container_sas_token(blob_client, container_name=output_container_name, permission=azureblob.BlobPermissions.WRITE)
print(output_container_sas)

https://covidiabatchstorage.blob.core.windows.net/output?se=2020-04-15T23%3A05%3A44Z&sp=w&sv=2018-03-28&sr=c&sig=xf1RT9c40Tg%2BowabSH1Y6umYZv5Fbq%2BH8vCYlEO/zfI%3D


In [21]:
# we get a list of input files
file_list = glob.glob("input_data/*.dat")

# initialize task counter
i = 0
for f in file_list:
    # increment task counter
    i = i + 1
    
    # create a task id
    task_id = "Process-" + str(i)
    print("\nCreating task",task_id)
    
    # grab file name
    input_file = f.split("/")[-1:][0]
    output_file = input_file.replace(".dat","_output.csv")
    
    # upload file to azure storage
    input_file_sas = upload_blob_and_create_sas(blob_client, input_container_name, f, input_file)
    
    # setup task command
    taskCommand = f"/bin/bash -c \"cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/{input_file} -o $AZ_BATCH_TASK_WORKING_DIR/{output_file}\""
    print(taskCommand)
    
    # create an elevated identity to run the start task
    user = batchmodels.AutoUserSpecification(scope=batchmodels.AutoUserScope.pool, elevation_level=batchmodels.ElevationLevel.admin)
    user_identity = batchmodels.UserIdentity(auto_user=user)   
   
    # setup output files destination
    containerDest = batchmodels.OutputFileBlobContainerDestination(container_url = output_container_sas, path = task_id)
    outputFileDestination = batchmodels.OutputFileDestination(container = containerDest)
    
    # setup output files upload condition
    uploadCondition = batchmodels.OutputFileUploadCondition.task_success
    uploadOptions = batchmodels.OutputFileUploadOptions(upload_condition = uploadCondition)
    
    # output files
    output_files = [batchmodels.OutputFile(destination = outputFileDestination,
                                        upload_options = uploadOptions,
                                        file_pattern="*output.csv")]
    
    
    # create task
    task = batchmodels.TaskAddParameter(
    id = task_id,
    command_line=taskCommand,
    user_identity=user_identity,
    resource_files=[batchmodels.ResourceFile(
                        file_path=input_file,
                        http_url=input_file_sas)],
    output_files=output_files)
    
    
    batch_client.task.add(job_id=job.id, task=task)


Creating task Process-1
Uploaded input_data/data56.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data56.dat -o $AZ_BATCH_TASK_WORKING_DIR/data56_output.csv"

Creating task Process-2
Uploaded input_data/data43.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data43.dat -o $AZ_BATCH_TASK_WORKING_DIR/data43_output.csv"

Creating task Process-3
Uploaded input_data/data8.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data8.dat -o $AZ_BATCH_TASK_WORKING_DIR/data8_output.csv"

Creating task Process-4
Uploaded input_data/data35.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data35.dat -o $AZ_BATCH_TASK_WORKING_DIR/data35_output.csv"

Creating task Process-5
Uploaded input_data/data33.dat to container input
/bin/bash -c "cd $AZ

Uploaded input_data/data40.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data40.dat -o $AZ_BATCH_TASK_WORKING_DIR/data40_output.csv"

Creating task Process-40
Uploaded input_data/data18.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data18.dat -o $AZ_BATCH_TASK_WORKING_DIR/data18_output.csv"

Creating task Process-41
Uploaded input_data/data52.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data52.dat -o $AZ_BATCH_TASK_WORKING_DIR/data52_output.csv"

Creating task Process-42
Uploaded input_data/data9.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data9.dat -o $AZ_BATCH_TASK_WORKING_DIR/data9_output.csv"

Creating task Process-43
Uploaded input_data/data54.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DI

## Monitoring Tasks

In [22]:
def wait_for_tasks_to_complete(batch_client, job_id, timeout):

    time_to_timeout_at = datetime.now() + timeout

    while datetime.now() < time_to_timeout_at:
        print("Checking if all tasks are complete...")
        tasks = batch_client.task.list(job_id)

        incomplete_tasks = [task for task in tasks if
                            task.state != batchmodels.TaskState.completed]
        if not incomplete_tasks:
            return
        time.sleep(30)

    raise TimeoutError("Timed out waiting for tasks to complete")

wait_for_tasks_to_complete(batch_client, job.id, timedelta(minutes=60))
print("All Tasks Complete!")

Checking if all tasks are complete...
Checking if all tasks are complete...
Checking if all tasks are complete...
Checking if all tasks are complete...
Checking if all tasks are complete...
Checking if all tasks are complete...
All Tasks Complete!


### Helper functions to read task output directly

In [None]:
def read_stream_as_string(stream, encoding):
    output = io.BytesIO()
    try:
        for data in stream:
            output.write(data)
        if encoding is None:
            encoding = 'utf-8'
        return output.getvalue().decode(encoding)
    finally:
        output.close()
    raise RuntimeError('could not write data to stream or decode bytes')

def read_task_file_as_string(batch_client, job_id, task_id, file_name, encoding=None):
    stream = batch_client.file.get_from_task(job_id, task_id, file_name)
    return read_stream_as_string(stream, encoding)

def print_task_output(batch_client, job_id, task_ids, encoding=None):
    _STANDARD_OUT_FILE_NAME = 'stdout.txt'
    _STANDARD_ERROR_FILE_NAME = 'stderr.txt'
    
    for task_id in task_ids:
        file_text = read_task_file_as_string(
            batch_client,
            job_id,
            task_id,
            _STANDARD_OUT_FILE_NAME,
            encoding)
        print("{} content for task {}: ".format(
            _STANDARD_OUT_FILE_NAME,
            task_id))
        print(file_text)

        file_text = read_task_file_as_string(
            batch_client,
            job_id,
            task_id,
            _STANDARD_ERROR_FILE_NAME,
            encoding)
        print("{} content for task {}: ".format(
            _STANDARD_ERROR_FILE_NAME,
            task_id))
        print(file_text)

tasks = batch_client.task.list(job_id)
task_ids = [task.id for task in tasks]

# let's print the output of the first 3 tasks
print_task_output(batch_client, job_id, task_ids[:3])

## Checking Output
As we created each task with an output file option, the produced filed by each execution of our sample_application will result in a new file being created in the output container in Azure Storage. We can quickly check all the files here:

In [None]:
output_file_list = blob_client.list_blobs(container_name=output_container_name)
print("Number of files:",len(list(output_file_list)))
print("\nFirst 10:")
for f in list(output_file_list)[0:10]:
    print(f.name)

## Delete Job
No issues in removing the job because each task will write it's results to the output container in Azure Storage, however, keeping this uncommented will allow you to see the job in Batch Explorer and debug any failed tasks

In [None]:
#batch_client.job.delete(job.id)

## Delete Pool
Note: you may not necessarily want to do this because creating the pool takes some time

In [None]:
#batch_client.pool.delete(pool.id)