In [64]:
# useful for development to reload modules everytime we run code
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Imports
If any errors in imports, then execute the following package installs and execute the imports again:

pip install -r requirements.txt -U

In [65]:
# general imports
from pprint import pprint
import datetime, io, os, sys, time, json, uuid, glob
from datetime import date

# azure batch
import azure.batch.batch_auth as batchauth
import azure.batch._batch_service_client as batch
import azure.batch.models as batchmodels

# azure storage
import azure.storage.blob as azureblob

## Loading Configuration
Before executing, make sure settings.json file is correctly configured to access your resources on Azure

In [66]:
settings_file = "settings.json"

with open(settings_file) as f:
    settings = json.load(f)

pprint(settings, indent=2)

{ 'batchAccountKey': 'Z5Gevll0QrJt0KzkXMi+EwbPbHXN9FDRPgvNOiDeMttT/ubg3tPr54ETbVwxpXub8tMHlZ8IGo026B29ThmKFQ==',
  'batchAccountName': 'covidiabatch',
  'batchServiceUrl': 'https://covidiabatch.francecentral.batch.azure.com',
  'dedicatedVmCount': 0,
  'jobIdPrefix': 'SimulationJob',
  'lowPriorityVmCount': 5,
  'poolId': 'SimulationPoolInfluenza',
  'storageAccountKey': '6mZDnh2bYaunEgt+WTX7EqeBMw944ywtrZsaFyvbAeAWmr/Sg1g6ceRrOYkr3KUowjPvUBYZufNEC29B1jwdbg==',
  'storageAccountName': 'covidiabatchstorage',
  'vmSize': 'STANDARD_A1_V2'}


## Create Clients

In [67]:
# create the batch client to create pools, jobs and tasks on Azure Batch
credentials = batchauth.SharedKeyCredentials(
       settings["batchAccountName"],
       settings["batchAccountKey"])

batch_client = batch.BatchServiceClient(
        credentials,
        batch_url=settings["batchServiceUrl"])

# Create the blob client, for use in obtaining references to
# blob storage containers and uploading files to containers.

blob_client = azureblob.BlockBlobService(
    account_name = settings["storageAccountName"],
    account_key = settings["storageAccountKey"])

## Create Storage Containers
This will create an application container to hold the application files that will be downloaded by the batch nodes.
Additionally it will create an input and output container to store any input files to feed to each task and to collect the output of the tasks

In [68]:
# Use the blob client to create the containers in Azure Storage if they
# don't yet exist.
app_container_name = "application"
input_container_name = "input"
output_container_name = "output"

blob_client.create_container(app_container_name, fail_on_exist=False)
blob_client.create_container(input_container_name, fail_on_exist=False)
blob_client.create_container(output_container_name, fail_on_exist=False)

print("Containers in Storage Account:")

for c in blob_client.list_containers():
    print("\t",c.name)

Containers in Storage Account:
	 application
	 input
	 output


## Helper Azure Storage Methods

In [69]:
def upload_blob_and_create_sas(block_blob_client, container_name, file_name, blob_name, hours=24):

    block_blob_client.create_container(
        container_name,
        fail_on_exist=False)

    block_blob_client.create_blob_from_path(
        container_name,
        blob_name,
        file_name)

    print("Uploaded", file_name, "to container", container_name)

    expiry = datetime.datetime.utcnow() + datetime.timedelta(hours=hours)
    sas_token = block_blob_client.generate_blob_shared_access_signature(
        container_name,
        blob_name,
        permission=azureblob.BlobPermissions.READ,
        expiry=expiry)

    sas_url = block_blob_client.make_blob_url(
        container_name,
        blob_name,
        sas_token=sas_token)

    return sas_url

def create_container_sas_token(block_blob_client, container_name, permission, hours=24):
 
    expiry = datetime.datetime.utcnow() + datetime.timedelta(hours=hours)
    sas_token = block_blob_client.generate_container_shared_access_signature(
        container_name, permission=permission, expiry=expiry)

    valid_sas_url = "https://{}.blob.core.windows.net/{}?{}".format(
        block_blob_client.account_name, container_name, sas_token
    )
    
    return valid_sas_url

## Copying app files to app container
This step will zip the files in the app_dir folder and upload the zip package to the application container created in the previous step. We will also generate a script file to install Python on the Batch nodes

In [70]:
# folder to be zipped
app_dir = "sample_application"
# zip file name
app_package_file_name = "app.tar.gz"
# python install script
setup_file_name = "installPython.sh"
# folder to store the zipped file and install script
resource_folder = "batch_resources"

In [71]:
# zip the application and copy zip file to resource folder
!ls -la $app_dir
!mkdir $resource_folder
!echo zipping application
!tar czf $resource_folder/app.tar.gz -C ./$app_dir/ .

total 20
drwxrwxr-x 3 quick quick 4096 Apr 14 17:03 .
drwxrwxr-x 6 quick quick 4096 Apr 14 21:17 ..
-rw-rw-r-- 1 quick quick 1333 Apr 14 11:28 main.py
-rw-rw-r-- 1 quick quick    6 Apr 14 17:03 requirements.txt
drwxrwxr-x 3 quick quick 4096 Apr 14 11:10 sample_utils
mkdir: cannot create directory ‘batch_resources’: File exists
zipping application


This next cell creates the Python setup script for Ubuntu nodes

In [72]:
%%writefile $resource_folder/$setup_file_name
sudo apt-get update
sudo su
apt-get install python3.6
apt-get install -y python3-pip

Overwriting batch_resources/installPython.sh


In [73]:
# get file paths for upload
app_file_path = os.path.join(resource_folder, app_package_file_name)
setup_file_path = os.path.join(resource_folder, setup_file_name)

# upload application package file to application container
appFileSas = upload_blob_and_create_sas(blob_client, app_container_name, app_file_path, app_package_file_name)
print(appFileSas)

# upload install script to application container
setupFileSas = upload_blob_and_create_sas(blob_client, app_container_name, setup_file_path, setup_file_name)
print(setupFileSas)

Uploaded batch_resources/app.tar.gz to container application
https://covidiabatchstorage.blob.core.windows.net/application/app.tar.gz?se=2020-04-15T21%3A18%3A05Z&sp=r&sv=2018-03-28&sr=b&sig=IJwH/LqJiViNUHi9VY%2B74cf1Ybtfd2rfBiJDMzgaJt0%3D
Uploaded batch_resources/installPython.sh to container application
https://covidiabatchstorage.blob.core.windows.net/application/installPython.sh?se=2020-04-15T21%3A18%3A05Z&sp=r&sv=2018-03-28&sr=b&sig=GMCSWgAeTE6406XbIqHNdLka81PT1eymx/GpnZBDt0E%3D


## Creating Azure Batch Pool
A pool is the central compute resource for Azure Batch. It's composed of several machines that will be assigned tasks once a job is created.
In here, we setup a pool of Ubuntu nodes and create a start task to make sure Python is installed. As machines get added to the pool, this task will imediately run before any tasks are assigned to the nodes.

### Defining a StartTask
Runs on all nodes on startup. This will reference the install script to make sure Python is installed on each node

In [74]:
# create an elevated identity to run the start task - needed whenever you require sudo access
user = batchmodels.AutoUserSpecification(scope=batchmodels.AutoUserScope.pool, elevation_level=batchmodels.ElevationLevel.admin)
user_identity = batchmodels.UserIdentity(auto_user=user)   

# setup the task command - executing the shell script that install python
command_line = f"/bin/bash -c \"sudo sh {setup_file_name}\""

# setup the start task
startTask = batchmodels.StartTask(
        command_line=command_line,
        wait_for_success = True,
        user_identity = user_identity,
        resource_files = [batchmodels.ResourceFile(
                         file_path = setup_file_name,
                         http_url = setupFileSas)])

print("Start task:")
print(f"CommandLine: {command_line}")
print(f"ResourceFiles:")
for f in startTask.resource_files:
    print(f"\t{f.http_url}")

Start task:
CommandLine: /bin/bash -c "sudo sh installPython.sh"
ResourceFiles:
	https://covidiabatchstorage.blob.core.windows.net/application/installPython.sh?se=2020-04-15T21%3A18%3A05Z&sp=r&sv=2018-03-28&sr=b&sig=GMCSWgAeTE6406XbIqHNdLka81PT1eymx/GpnZBDt0E%3D


### Creating the Pool

In [75]:
# checking configuration
poolId = settings["poolId"]
vmSize = settings["vmSize"]
dedicatedNodes = settings["dedicatedVmCount"]
lowPriorityNodes = settings["lowPriorityVmCount"]

print(f"Creating pool {poolId} with:")
print("Size:",vmSize)
print("Number of dedicated nodes:",dedicatedNodes)
print("Number of low priority nodes:",lowPriorityNodes)

Creating pool SimulationPoolInfluenza with:
Size: STANDARD_A1_V2
Number of dedicated nodes: 0
Number of low priority nodes: 5


In [76]:
# setup pool
pool = batchmodels.PoolAddParameter(
    id=poolId,
    virtual_machine_configuration=batchmodels.VirtualMachineConfiguration(
        image_reference=batchmodels.ImageReference(
            publisher="Canonical",
            offer="UbuntuServer",
            sku="18.04-LTS",
            version="latest"
        ),
        node_agent_sku_id="batch.node.ubuntu 18.04"),
    vm_size=vmSize,
    target_dedicated_nodes=dedicatedNodes,
    target_low_priority_nodes=lowPriorityNodes,
    start_task=startTask)

# create pool
try:
    print("Attempting to create pool:", pool.id)
    batch_client.pool.add(pool)
    print("Created pool:", pool.id)
except batchmodels.BatchErrorException as e:
    if e.error.code != "PoolExists":
        raise
    else:
        print("Pool {!r} already exists".format(pool.id))

Attempting to create pool: SimulationPoolInfluenza
Created pool: SimulationPoolInfluenza


In [77]:
def wait_for_all_nodes_state(batch_client, pool, node_state):
    print('Waiting for all nodes in pool {} to reach one of: {!r}\n'.format(
        pool.id, node_state))
    i = 0
    targetNodes = pool.target_dedicated_nodes + pool.target_low_priority_nodes
    while True:
        # refresh pool to ensure that there is no resize error
        pool = batch_client.pool.get(pool.id)
        if pool.resize_errors is not None:
            resize_errors = "\n".join([repr(e) for e in pool.resize_errors])
            raise RuntimeError(
                'resize error encountered for pool {}:\n{}'.format(
                    pool.id, resize_errors))
        nodes = list(batch_client.compute_node.list(pool.id))
        if (len(nodes) >= targetNodes and
                all(node.state in node_state for node in nodes)):
            return nodes
        i += 1
        if i % 3 == 0:
            print('waiting for {} nodes to reach desired state...'.format(
                targetNodes))
        time.sleep(10)

# we check if all nodes are up before we continue
nodes = wait_for_all_nodes_state(batch_client, pool, [batchmodels.ComputeNodeState.idle, batchmodels.ComputeNodeState.running])

# show all nodes
for n in nodes:
    print(n.id, n.state, n.is_dedicated)

Waiting for all nodes in pool SimulationPoolInfluenza to reach one of: [<ComputeNodeState.idle: 'idle'>, <ComputeNodeState.running: 'running'>]

waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
waiting for 5 nodes to reach desired state...
tvmps_0849b14a971d7c65d2da7513d62c33501aa62f9e9cf3b3ed60f7f5e530ebbb63_p ComputeNodeState.idle False
tvmps_88046169e06a8b41eb63ef7128fb1d4f3a1ed2fe559daf1ee956d8c24478a811_p ComputeNodeState.idle False
tvmps_8a17a9d935ba6ca08b32d9535ef239e5e3908c6828ee29c921e81717622ca28d_p ComputeNodeState.idle False
tvmps_ed14c4e4f566dd46ca4eb7940ea372521166ee5be80080f2522a1131759dd612_p ComputeNodeState.id

## Creating a Job to run on the Pool
We will now create a job and an associated Prep task to ensure the application is downloaded, extracted to a known location and all python packages are installed via pip

In [78]:
# creating a unique job Id
job_id = settings["jobIdPrefix"] + "_" + str(date.today().year) + "_" + str(date.today().month) + "_" + str(date.today().day) + "_" + str(uuid.uuid1())

# setup the task command
command_line = f"/bin/bash -c \"cd $AZ_BATCH_NODE_SHARED_DIR && tar -xvf $AZ_BATCH_JOB_PREP_WORKING_DIR/{app_package_file_name} && sudo su && pip3 install -r requirements.txt \""

# create an elevated identity to run the start task
user = batchmodels.AutoUserSpecification(scope=batchmodels.AutoUserScope.pool, elevation_level=batchmodels.ElevationLevel.admin)
user_identity = batchmodels.UserIdentity(auto_user=user)   

# setup the start task
jobTask = batchmodels.JobPreparationTask(
        command_line = command_line,
        user_identity = user_identity,
        wait_for_success = True,
        resource_files = [batchmodels.ResourceFile(
                         file_path = app_package_file_name,
                         http_url = appFileSas)])

print("Job Preparation task:")
print(f"CommandLine: {command_line}")
print(f"ResourceFiles:")
for f in jobTask.resource_files:
    print(f"\t{f.http_url}")

Job Preparation task:
CommandLine: /bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && tar -xvf $AZ_BATCH_JOB_PREP_WORKING_DIR/app.tar.gz && sudo su && pip3 install -r requirements.txt "
ResourceFiles:
	https://covidiabatchstorage.blob.core.windows.net/application/app.tar.gz?se=2020-04-15T21%3A18%3A05Z&sp=r&sv=2018-03-28&sr=b&sig=IJwH/LqJiViNUHi9VY%2B74cf1Ybtfd2rfBiJDMzgaJt0%3D


In [79]:
# setup job
job = batchmodels.JobAddParameter(
    id=job_id,
    pool_info=batchmodels.PoolInformation(pool_id=pool.id),
    job_preparation_task = jobTask)

# create job
print('Creating job [{}]...'.format(job.id))

try:
    batch_client.job.add(job)
except batchmodels.batch_error.BatchErrorException as err:
    print_batch_exception(err)
    if err.error.code != "JobExists":
        raise
    else:
        print("Job {!r} already exists".format(job_id))

Creating job [SimulationJob_2020_4_14_2efaaf4e-7e96-11ea-a754-793f197936c4]...


## Adding Tasks to the Job
Now that our application is correctly configured and we made sure Python is installed in all nodes, we need to setup a task to run a work item. We can launch many tasks inside the same job and Azure Batch will assign it to any VMs in the pool.

In this example, we will create as many tasks as files in input_data (a local folder in this repo). This is a simple way of doing paralel processing of a large file when splits can be done. Another option is simple iterating over an array of parameter values and creating a task for each different value. We illustrate here the most complicated scenario which involves passing different input files to the script and uploading those files to the input container in the storage account.

These tasks also write output to storage. The main.py script writes an output file and we configure the task to upload these files to the output container we created earlier. It is done after the task ends successfully

In [80]:
# get a sas url for write access to output container. This will be used so we can persist task output files
output_container_sas = create_container_sas_token(blob_client, container_name=output_container_name, permission=azureblob.BlobPermissions.WRITE)
print(output_container_sas)

https://covidiabatchstorage.blob.core.windows.net/output?se=2020-04-15T21%3A23%3A28Z&sp=w&sv=2018-03-28&sr=c&sig=st%2Be7/FQWQ%2ByrIWIj/iy9HnMHHzfARyYuO7aGKKyIpA%3D


In [81]:
# we get a list of input files
file_list = glob.glob("input_data/*.dat")

# initialize task counter
i = 100
for f in file_list:
    # increment task counter
    i = i + 1
    
    # create a task id
    task_id = "Process-" + str(i)
    print("\nCreating task",task_id)
    
    # grab file name
    input_file = f.split("/")[-1:][0]
    output_file = input_file.replace(".dat","_output.csv")
    
    # upload file to azure storage
    input_file_sas = upload_blob_and_create_sas(blob_client, input_container_name, f, input_file)
    
    # setup task command
    taskCommand = f"/bin/bash -c \"cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/{input_file} -o $AZ_BATCH_TASK_WORKING_DIR/{output_file}\""
    print(taskCommand)
    
    # create an elevated identity to run the start task
    user = batchmodels.AutoUserSpecification(scope=batchmodels.AutoUserScope.pool, elevation_level=batchmodels.ElevationLevel.admin)
    user_identity = batchmodels.UserIdentity(auto_user=user)   
   
    # setup output files destination
    containerDest = batchmodels.OutputFileBlobContainerDestination(container_url = output_container_sas, path = task_id)
    outputFileDestination = batchmodels.OutputFileDestination(container = containerDest)
    
    # setup output files upload condition
    uploadCondition = batchmodels.OutputFileUploadCondition.task_success
    uploadOptions = batchmodels.OutputFileUploadOptions(upload_condition = uploadCondition)
    
    # output files
    output_files = [batchmodels.OutputFile(destination = outputFileDestination,
                                        upload_options = uploadOptions,
                                        file_pattern="*output.csv")]
    
    
    # create task
    task = batchmodels.TaskAddParameter(
    id = task_id,
    command_line=taskCommand,
    user_identity=user_identity,
    resource_files=[batchmodels.ResourceFile(
                        file_path=input_file,
                        http_url=input_file_sas)],
    output_files=output_files)
    
    
    batch_client.task.add(job_id=job.id, task=task)


Creating task Process-101
Uploaded input_data/data56.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data56.dat -o $AZ_BATCH_TASK_WORKING_DIR/data56_output.csv"

Creating task Process-102
Uploaded input_data/data43.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data43.dat -o $AZ_BATCH_TASK_WORKING_DIR/data43_output.csv"

Creating task Process-103
Uploaded input_data/data8.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data8.dat -o $AZ_BATCH_TASK_WORKING_DIR/data8_output.csv"

Creating task Process-104
Uploaded input_data/data35.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data35.dat -o $AZ_BATCH_TASK_WORKING_DIR/data35_output.csv"

Creating task Process-105
Uploaded input_data/data33.dat to container input
/bin/bash 


Creating task Process-138
Uploaded input_data/data11.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data11.dat -o $AZ_BATCH_TASK_WORKING_DIR/data11_output.csv"

Creating task Process-139
Uploaded input_data/data40.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data40.dat -o $AZ_BATCH_TASK_WORKING_DIR/data40_output.csv"

Creating task Process-140
Uploaded input_data/data18.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data18.dat -o $AZ_BATCH_TASK_WORKING_DIR/data18_output.csv"

Creating task Process-141
Uploaded input_data/data52.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data52.dat -o $AZ_BATCH_TASK_WORKING_DIR/data52_output.csv"

Creating task Process-142
Uploaded input_data/data9.dat to container input
/bin/bas

## Monitoring Tasks

In [82]:
def wait_for_tasks_to_complete(batch_client, job_id, timeout):
    """Waits for all the tasks in a particular job to complete.

    :param batch_client: The batch client to use.
    :type batch_client: `batchserviceclient.BatchServiceClient`
    :param str job_id: The id of the job to monitor.
    :param timeout: The maximum amount of time to wait.
    :type timeout: `datetime.timedelta`
    """
    time_to_timeout_at = datetime.datetime.now() + timeout

    while datetime.datetime.now() < time_to_timeout_at:
        print("Checking if all tasks are complete...")
        tasks = batch_client.task.list(job_id)

        incomplete_tasks = [task for task in tasks if
                            task.state != batchmodels.TaskState.completed]
        if not incomplete_tasks:
            return
        time.sleep(30)

    raise TimeoutError("Timed out waiting for tasks to complete")

wait_for_tasks_to_complete(batch_client, job.id, datetime.timedelta(minutes=60))
print("All Tasks Complete!")

Checking if all tasks are complete...
Checking if all tasks are complete...
Checking if all tasks are complete...
Checking if all tasks are complete...
Checking if all tasks are complete...
Checking if all tasks are complete...
All Tasks Complete!


### Helper functions to read task output directly

In [83]:
def read_stream_as_string(stream, encoding):
    output = io.BytesIO()
    try:
        for data in stream:
            output.write(data)
        if encoding is None:
            encoding = 'utf-8'
        return output.getvalue().decode(encoding)
    finally:
        output.close()
    raise RuntimeError('could not write data to stream or decode bytes')

def read_task_file_as_string(batch_client, job_id, task_id, file_name, encoding=None):
    stream = batch_client.file.get_from_task(job_id, task_id, file_name)
    return _read_stream_as_string(stream, encoding)

def print_task_output(batch_client, job_id, task_ids, encoding=None):
    _STANDARD_OUT_FILE_NAME = 'stdout.txt'
    _STANDARD_ERROR_FILE_NAME = 'stderr.txt'
    
    for task_id in task_ids:
        file_text = read_task_file_as_string(
            batch_client,
            job_id,
            task_id,
            _STANDARD_OUT_FILE_NAME,
            encoding)
        print("{} content for task {}: ".format(
            _STANDARD_OUT_FILE_NAME,
            task_id))
        print(file_text)

        file_text = read_task_file_as_string(
            batch_client,
            job_id,
            task_id,
            _STANDARD_ERROR_FILE_NAME,
            encoding)
        print("{} content for task {}: ".format(
            _STANDARD_ERROR_FILE_NAME,
            task_id))
        print(file_text)

tasks = batch_client.task.list(job_id)

# let's print the output of the first 3 tasks
print_task_output(batch_client, job_id, task_ids[:3])

stdout.txt content for task Process-101: 
Application version: 1.2
Started at: 2020-04-14 21:23:58.448409

Received data:

      0   1         2   3   4   ...         13         14  15     16    17
0   5700   2  Saturday  17  54  ... -73.782097  40.745861   1   6.08  19.0
1   5701   2  Saturday  17  56  ... -73.847115  40.865940   1   2.76  12.0
2   5702   2  Saturday  17  56  ... -73.865463  40.725395   1   1.67   8.0
3   5703   2  Saturday  17  57  ... -73.999985  40.682884   1  13.03  38.0
4   5704   2  Saturday  17  58  ... -73.809593  40.721497   1   2.35   8.5
..   ...  ..       ...  ..  ..  ...        ...        ...  ..    ...   ...
95  5795   2  Saturday  19  32  ... -73.950279  40.786373   5   4.25  13.5
96  5796   1  Saturday  19  32  ... -73.862411  40.730431   1   2.00  11.0
97  5797   2  Saturday  19  33  ... -73.884659  40.737160   5   3.18  13.5
98  5798   2  Saturday  19  34  ... -73.935631  40.845116   1   5.79  16.0
99  5799   2  Saturday  19  34  ... -73.922737  40.7

## Checking Output
As we created each task with an output file option, the produced filed by each execution of our sample_application will result in a new file being created in the output container in Azure Storage. We can quickly check all the files here:

In [84]:
output_file_list = blob_client.list_blobs(container_name=output_container_name)
print("Number of files:",len(list(output_file_list)))
print("\nFirst 10:")
for f in list(output_file_list)[0:10]:
    print(f.name)

Number of files: 60

First 10:
Process-101/data56_output.csv
Process-102/data43_output.csv
Process-103/data8_output.csv
Process-104/data35_output.csv
Process-105/data33_output.csv
Process-106/data23_output.csv
Process-107/data25_output.csv
Process-108/data34_output.csv
Process-109/data57_output.csv
Process-110/data6_output.csv


## Delete Job
No issues in removing the job because each task will write it's results to the output container in Azure Storage, however, keeping this uncommented will allow you to see the job in Batch Explorer and debug any failed tasks

In [87]:
batch_client.job.delete(job.id)

## Delete Pool
Note: you may not necessarily want to do this because creating the pool takes some time

In [88]:
batch_client.pool.delete(pool.id)