In [1]:
# useful for development to reload modules everytime we run code
%load_ext autoreload
%autoreload 2

## Imports
If any errors in imports, then execute the following package installs and execute the imports again:

pip install -r requirements.txt -U

In [2]:
# general imports
from pprint import pprint
import datetime, io, os, sys, time, json, uuid, glob
from datetime import date

# azure batch
import azure.batch.batch_auth as batchauth
import azure.batch._batch_service_client as batch
import azure.batch.models as batchmodels

# azure storage
import azure.storage.blob as azureblob

## Loading Configuration
Before executing, make sure settings.json file is correctly configured to access your resources on Azure

In [3]:
settings_file = "settings.json"

with open(settings_file) as f:
    settings = json.load(f)

pprint(settings, indent=2)

{ 'batchAccountKey': 'Z5Gevll0QrJt0KzkXMi+EwbPbHXN9FDRPgvNOiDeMttT/ubg3tPr54ETbVwxpXub8tMHlZ8IGo026B29ThmKFQ==',
  'batchAccountName': 'covidiabatch',
  'batchServiceUrl': 'https://covidiabatch.francecentral.batch.azure.com',
  'dedicatedVmCount': 0,
  'jobIdPrefix': 'SimulationJob',
  'lowPriorityVmCount': 5,
  'poolId': 'SimulationPoolInfluenza',
  'storageAccountKey': '6mZDnh2bYaunEgt+WTX7EqeBMw944ywtrZsaFyvbAeAWmr/Sg1g6ceRrOYkr3KUowjPvUBYZufNEC29B1jwdbg==',
  'storageAccountName': 'covidiabatchstorage',
  'vmSize': 'STANDARD_A1_V2'}


## Create Clients

In [4]:
# create the batch client to create pools, jobs and tasks on Azure Batch
credentials = batchauth.SharedKeyCredentials(
       settings["batchAccountName"],
       settings["batchAccountKey"])

batch_client = batch.BatchServiceClient(
        credentials,
        batch_url=settings["batchServiceUrl"])

# Create the blob client, for use in obtaining references to
# blob storage containers and uploading files to containers.

blob_client = azureblob.BlockBlobService(
    account_name = settings["storageAccountName"],
    account_key = settings["storageAccountKey"])

## Create Storage Containers
This will create an application container to hold the application files that will be downloaded by the batch nodes.
Additionally it will create an input and output container to store any input files to feed to each task and to collect the output of the tasks

In [5]:
# Use the blob client to create the containers in Azure Storage if they
# don't yet exist.
app_container_name = "application"
input_container_name = "input"
output_container_name = "output"

blob_client.create_container(app_container_name, fail_on_exist=False)
blob_client.create_container(input_container_name, fail_on_exist=False)
blob_client.create_container(output_container_name, fail_on_exist=False)

print("Containers in Storage Account:")

for c in blob_client.list_containers():
    print("\t",c.name)

Containers in Storage Account:
	 application
	 input
	 output


## Helper Azure Storage Methods

In [6]:
def upload_blob_and_create_sas(block_blob_client, container_name, file_name, blob_name, hours=24):

    block_blob_client.create_container(
        container_name,
        fail_on_exist=False)

    block_blob_client.create_blob_from_path(
        container_name,
        blob_name,
        file_name)

    print("Uploaded", file_name, "to container", container_name)

    expiry = datetime.datetime.utcnow() + datetime.timedelta(hours=hours)
    sas_token = block_blob_client.generate_blob_shared_access_signature(
        container_name,
        blob_name,
        permission=azureblob.BlobPermissions.READ,
        expiry=expiry)

    sas_url = block_blob_client.make_blob_url(
        container_name,
        blob_name,
        sas_token=sas_token)

    return sas_url

def create_container_sas_token(block_blob_client, container_name, permission, hours=24):
 
    expiry = datetime.datetime.utcnow() + datetime.timedelta(hours=hours)
    sas_token = block_blob_client.generate_container_shared_access_signature(
        container_name, permission=permission, expiry=expiry)

    valid_sas_url = "https://{}.blob.core.windows.net/{}?{}".format(
        block_blob_client.account_name, container_name, sas_token
    )
    
    return valid_sas_url

## Copying app files to app container
This step will zip the files in the app_dir folder and upload the zip package to the application container created in the previous step. We will also generate a script file to install Python on the Batch nodes

In [7]:
# folder to be zipped
app_dir = "sample_application"
# zip file name
app_package_file_name = "app.tar.gz"
# python install script
setup_file_name = "installPython.sh"
# folder to store the zipped file and install script
resource_folder = "batch_resources"

In [8]:
# zip the application and copy zip file to resource folder
!ls -la $app_dir
!mkdir $resource_folder
!echo zipping application
!tar czf $resource_folder/app.tar.gz -C ./$app_dir/ .

total 20
drwxrwxr-x 3 quick quick 4096 Apr 14 17:03 .
drwxrwxr-x 6 quick quick 4096 Apr 14 17:07 ..
-rw-rw-r-- 1 quick quick 1333 Apr 14 11:28 main.py
-rw-rw-r-- 1 quick quick    6 Apr 14 17:03 requirements.txt
drwxrwxr-x 3 quick quick 4096 Apr 14 11:10 sample_utils
mkdir: cannot create directory ‘batch_resources’: File exists
zipping application


This next cell creates the Python setup script for Ubuntu nodes

In [9]:
%%writefile $resource_folder/$setup_file_name
sudo apt-get update
sudo su
apt-get install python3.6
apt-get install -y python3-pip

Overwriting batch_resources/installPython.sh


In [10]:
# get file paths for upload
app_file_path = os.path.join(resource_folder, app_package_file_name)
setup_file_path = os.path.join(resource_folder, setup_file_name)

# upload application package file to application container
appFileSas = upload_blob_and_create_sas(blob_client, app_container_name, app_file_path, app_package_file_name)
print(appFileSas)

# upload install script to application container
setupFileSas = upload_blob_and_create_sas(blob_client, app_container_name, setup_file_path, setup_file_name)
print(setupFileSas)

Uploaded batch_resources/app.tar.gz to container application
https://covidiabatchstorage.blob.core.windows.net/application/app.tar.gz?se=2020-04-15T17%3A09%3A11Z&sp=r&sv=2018-03-28&sr=b&sig=BtZ6zgC9z8Mq94tO/6JYr9Eji5%2B8w3Suy53F5h%2BJslU%3D
Uploaded batch_resources/installPython.sh to container application
https://covidiabatchstorage.blob.core.windows.net/application/installPython.sh?se=2020-04-15T17%3A09%3A11Z&sp=r&sv=2018-03-28&sr=b&sig=1VMF2%2BcLxoZlpyHllmRlnB0tkbOyPYeTlp14%2BlYBs%2BE%3D


## Creating Azure Batch Pool
A pool is the central compute resource for Azure Batch. It's composed of several machines that will be assigned tasks once a job is created.
In here, we setup a pool of Ubuntu nodes and create a start task to make sure Python is installed. As machines get added to the pool, this task will imediately run before any tasks are assigned to the nodes.

### Defining a StartTask
Runs on all nodes on startup. This will reference the install script to make sure Python is installed on each node

In [11]:
# create an elevated identity to run the start task - needed whenever you require sudo access
user = batchmodels.AutoUserSpecification(scope=batchmodels.AutoUserScope.pool, elevation_level=batchmodels.ElevationLevel.admin)
user_identity = batchmodels.UserIdentity(auto_user=user)   

# setup the task command - executing the shell script that install python
command_line = f"/bin/bash -c \"sudo sh {setup_file_name}\""

# setup the start task
startTask = batchmodels.StartTask(
        command_line=command_line,
        wait_for_success = True,
        user_identity = user_identity,
        resource_files = [batchmodels.ResourceFile(
                         file_path = setup_file_name,
                         http_url = setupFileSas)])

print("Start task:")
print(f"CommandLine: {command_line}")
print(f"ResourceFiles:")
for f in startTask.resource_files:
    print(f"\t{f.http_url}")

Start task:
CommandLine: /bin/bash -c "sudo sh installPython.sh"
ResourceFiles:
	https://covidiabatchstorage.blob.core.windows.net/application/installPython.sh?se=2020-04-15T17%3A09%3A11Z&sp=r&sv=2018-03-28&sr=b&sig=1VMF2%2BcLxoZlpyHllmRlnB0tkbOyPYeTlp14%2BlYBs%2BE%3D


### Creating the Pool

In [12]:
# checking configuration
poolId = settings["poolId"]
vmSize = settings["vmSize"]
dedicatedNodes = settings["dedicatedVmCount"]
lowPriorityNodes = settings["lowPriorityVmCount"]

print(f"Creating pool {poolId} with:")
print("Size:",vmSize)
print("Number of dedicated nodes:",dedicatedNodes)
print("Number of low priority nodes:",lowPriorityNodes)

Creating pool SimulationPoolInfluenza with:
Size: STANDARD_A1_V2
Number of dedicated nodes: 0
Number of low priority nodes: 5


In [13]:
# setup pool
pool = batchmodels.PoolAddParameter(
    id=poolId,
    virtual_machine_configuration=batchmodels.VirtualMachineConfiguration(
        image_reference=batchmodels.ImageReference(
            publisher="Canonical",
            offer="UbuntuServer",
            sku="18.04-LTS",
            version="latest"
        ),
        node_agent_sku_id="batch.node.ubuntu 18.04"),
    vm_size=vmSize,
    target_dedicated_nodes=dedicatedNodes,
    target_low_priority_nodes=lowPriorityNodes,
    start_task=startTask)

# create pool
try:
    print("Attempting to create pool:", pool.id)
    batch_client.pool.add(pool)
    print("Created pool:", pool.id)
except batchmodels.BatchErrorException as e:
    if e.error.code != "PoolExists":
        raise
    else:
        print("Pool {!r} already exists".format(pool.id))

Attempting to create pool: SimulationPoolInfluenza
Created pool: SimulationPoolInfluenza


In [31]:
def wait_for_all_nodes_state(batch_client, pool, node_state):
    print('Waiting for all nodes in pool {} to reach one of: {!r}\n'.format(
        pool.id, node_state))
    i = 0
    targetNodes = pool.target_dedicated_nodes + pool.target_low_priority_nodes
    while True:
        # refresh pool to ensure that there is no resize error
        pool = batch_client.pool.get(pool.id)
        if pool.resize_errors is not None:
            resize_errors = "\n".join([repr(e) for e in pool.resize_errors])
            raise RuntimeError(
                'resize error encountered for pool {}:\n{}'.format(
                    pool.id, resize_errors))
        nodes = list(batch_client.compute_node.list(pool.id))
        if (len(nodes) >= targetNodes and
                all(node.state in node_state for node in nodes)):
            return nodes
        i += 1
        if i % 3 == 0:
            print('waiting for {} nodes to reach desired state...'.format(
                targetNodes))
        time.sleep(10)

# we check if all nodes are up before we continue
nodes = wait_for_all_nodes_state(batch_client, pool, [batchmodels.ComputeNodeState.idle, batchmodels.ComputeNodeState.running])

# show all nodes
for n in nodes:
    print(n.id, n.state, n.is_dedicated)

Waiting for all nodes in pool SimulationPoolInfluenza to reach one of: [<ComputeNodeState.idle: 'idle'>, <ComputeNodeState.running: 'running'>]

tvmps_4926f00379a160016454c298f5cf7a62b2653a89027f941ea5237ee045aca95e_p ComputeNodeState.idle False
tvmps_8fc321166b6f74bb308c40ce1af349f89ca575facf81d2331cbdf88e2b700a44_p ComputeNodeState.idle False
tvmps_c04d0ea8de7ec7d885e8d025a1db757da98f9e30b00a48a8e6527523f29e9c47_p ComputeNodeState.idle False
tvmps_c0507d319eb8e15b6b4eb0b03e974bc3e33b21181c4da1d80d8f6cdc627a1d54_p ComputeNodeState.idle False
tvmps_ea14cff61b9319f5fd38989d2227221fc60116c533837e47c82d49ff3ab01499_p ComputeNodeState.idle False


## Creating a Job to run on the Pool
We will now create a job and an associated Prep task to ensure the application is downloaded, extracted to a known location and all python packages are installed via pip

In [15]:
# creating a unique job Id
job_id = settings["jobIdPrefix"] + "_" + str(date.today().year) + "_" + str(date.today().month) + "_" + str(date.today().day) + "_" + str(uuid.uuid1())

# setup the task command
command_line = f"/bin/bash -c \"cd $AZ_BATCH_NODE_SHARED_DIR && tar -xvf $AZ_BATCH_JOB_PREP_WORKING_DIR/{app_package_file_name} && sudo su && pip3 install -r requirements.txt \""

# create an elevated identity to run the start task
user = batchmodels.AutoUserSpecification(scope=batchmodels.AutoUserScope.pool, elevation_level=batchmodels.ElevationLevel.admin)
user_identity = batchmodels.UserIdentity(auto_user=user)   

# setup the start task
jobTask = batchmodels.JobPreparationTask(
        command_line = command_line,
        user_identity = user_identity,
        wait_for_success = True,
        resource_files = [batchmodels.ResourceFile(
                         file_path = app_package_file_name,
                         http_url = appFileSas)])

print("Job Preparation task:")
print(f"CommandLine: {command_line}")
print(f"ResourceFiles:")
for f in jobTask.resource_files:
    print(f"\t{f.http_url}")

Job Preparation task:
CommandLine: /bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && tar -xvf $AZ_BATCH_JOB_PREP_WORKING_DIR/app.tar.gz && sudo su && pip3 install -r requirements.txt "
ResourceFiles:
	https://covidiabatchstorage.blob.core.windows.net/application/app.tar.gz?se=2020-04-15T17%3A09%3A11Z&sp=r&sv=2018-03-28&sr=b&sig=BtZ6zgC9z8Mq94tO/6JYr9Eji5%2B8w3Suy53F5h%2BJslU%3D


In [16]:
# setup job
job = batchmodels.JobAddParameter(
    id=job_id,
    pool_info=batchmodels.PoolInformation(pool_id=pool.id),
    job_preparation_task = jobTask)

# create job
print('Creating job [{}]...'.format(job.id))

try:
    batch_client.job.add(job)
except batchmodels.batch_error.BatchErrorException as err:
    print_batch_exception(err)
    if err.error.code != "JobExists":
        raise
    else:
        print("Job {!r} already exists".format(job_id))

Creating job [SimulationJob_2020_4_14_a964935e-7e72-11ea-a754-793f197936c4]...


## Adding Tasks to the Job
Now that our application is correctly configured and we made sure Python is installed in all nodes, we need to setup a task to run a work item. We can launch many tasks inside the same job and Azure Batch will assign it to any VMs in the pool.

In this example, we will create as many tasks as files in input_data (a local folder in this repo). This is a simple way of doing paralel processing of a large file when splits can be done. Another option is simple iterating over an array of parameter values and creating a task for each different value. We illustrate here the most complicated scenario which involves passing different input files to the script and uploading those files to the input container in the storage account.

These tasks also write output to storage. The main.py script writes an output file and we configure the task to upload these files to the output container we created earlier. It is done after the task ends successfully

In [17]:
# get a sas url for write access to output container. This will be used so we can persist task output files
output_container_sas = create_container_sas_token(blob_client, container_name=output_container_name, permission=azureblob.BlobPermissions.WRITE)
print(output_container_sas)

https://covidiabatchstorage.blob.core.windows.net/output?se=2020-04-15T17%3A09%3A12Z&sp=w&sv=2018-03-28&sr=c&sig=CIcb7cX1bjYs%2BLyBhsPQQHBrtKtKZUHn2jOoSpcV8RI%3D


In [18]:
# we get a list of input files
file_list = glob.glob("input_data/*.dat")

# initialize task counter
i = 100
for f in file_list:
    # increment task counter
    i = i + 1
    
    # create a task id
    task_id = "Process-" + str(i)
    
    # grab file name
    input_file = f.split("/")[-1:][0]
    output_file = input_file.replace(".dat","_output.csv")
    
    # upload file to azure storage
    input_file_sas = upload_blob_and_create_sas(blob_client, input_container_name, f, input_file)
    
    # setup task command
    taskCommand = f"/bin/bash -c \"cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/{input_file} -o $AZ_BATCH_TASK_WORKING_DIR/{output_file}\""
    print(taskCommand)
    
    # create an elevated identity to run the start task
    user = batchmodels.AutoUserSpecification(scope=batchmodels.AutoUserScope.pool, elevation_level=batchmodels.ElevationLevel.admin)
    user_identity = batchmodels.UserIdentity(auto_user=user)   
   
    # setup output files destination
    containerDest = batchmodels.OutputFileBlobContainerDestination(container_url = output_container_sas, path = task_id)
    outputFileDestination = batchmodels.OutputFileDestination(container = containerDest)
    
    # setup output files upload condition
    uploadCondition = batchmodels.OutputFileUploadCondition.task_success
    uploadOptions = batchmodels.OutputFileUploadOptions(upload_condition = uploadCondition)
    
    # output files
    output_files = [batchmodels.OutputFile(destination = outputFileDestination,
                                        upload_options = uploadOptions,
                                        file_pattern="*output.csv")]
    
    
    # create task
    task = batchmodels.TaskAddParameter(
    id = task_id,
    command_line=taskCommand,
    user_identity=user_identity,
    resource_files=[batchmodels.ResourceFile(
                        file_path=input_file,
                        http_url=input_file_sas)],
    output_files=output_files)
    
    
    batch_client.task.add(job_id=job.id, task=task)

Uploaded input_data/data56.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data56.dat -o $AZ_BATCH_TASK_WORKING_DIR/data56_output.csv"
Uploaded input_data/data43.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data43.dat -o $AZ_BATCH_TASK_WORKING_DIR/data43_output.csv"
Uploaded input_data/data8.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data8.dat -o $AZ_BATCH_TASK_WORKING_DIR/data8_output.csv"
Uploaded input_data/data35.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data35.dat -o $AZ_BATCH_TASK_WORKING_DIR/data35_output.csv"
Uploaded input_data/data33.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data33.dat -o $AZ_BATCH_TASK_WORKING_DIR/data33_outpu

Uploaded input_data/data9.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data9.dat -o $AZ_BATCH_TASK_WORKING_DIR/data9_output.csv"
Uploaded input_data/data54.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data54.dat -o $AZ_BATCH_TASK_WORKING_DIR/data54_output.csv"
Uploaded input_data/data16.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data16.dat -o $AZ_BATCH_TASK_WORKING_DIR/data16_output.csv"
Uploaded input_data/data45.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data45.dat -o $AZ_BATCH_TASK_WORKING_DIR/data45_output.csv"
Uploaded input_data/data55.dat to container input
/bin/bash -c "cd $AZ_BATCH_NODE_SHARED_DIR && python3 main.py -i $AZ_BATCH_TASK_WORKING_DIR/data55.dat -o $AZ_BATCH_TASK_WORKING_DIR/data55_outpu

## Monitoring Tasks

In [29]:
def wait_for_tasks_to_complete(batch_client, job_id, timeout):
    """Waits for all the tasks in a particular job to complete.

    :param batch_client: The batch client to use.
    :type batch_client: `batchserviceclient.BatchServiceClient`
    :param str job_id: The id of the job to monitor.
    :param timeout: The maximum amount of time to wait.
    :type timeout: `datetime.timedelta`
    """
    time_to_timeout_at = datetime.datetime.now() + timeout

    while datetime.datetime.now() < time_to_timeout_at:
        print("Checking if all tasks are complete...")
        tasks = batch_client.task.list(job_id)

        incomplete_tasks = [task for task in tasks if
                            task.state != batchmodels.TaskState.completed]
        if not incomplete_tasks:
            return
        time.sleep(30)

    raise TimeoutError("Timed out waiting for tasks to complete")

wait_for_tasks_to_complete(batch_client, job.id, datetime.timedelta(minutes=60))
print("All Tasks Complete!")

Checking if all tasks are complete...
All Tasks Complete!


### Helper functions to read task output directly

In [30]:
def read_stream_as_string(stream, encoding):
    output = io.BytesIO()
    try:
        for data in stream:
            output.write(data)
        if encoding is None:
            encoding = 'utf-8'
        return output.getvalue().decode(encoding)
    finally:
        output.close()
    raise RuntimeError('could not write data to stream or decode bytes')

def read_task_file_as_string(batch_client, job_id, task_id, file_name, encoding=None):
    stream = batch_client.file.get_from_task(job_id, task_id, file_name)
    return _read_stream_as_string(stream, encoding)

def print_task_output(batch_client, job_id, task_ids, encoding=None):
    _STANDARD_OUT_FILE_NAME = 'stdout.txt'
    _STANDARD_ERROR_FILE_NAME = 'stderr.txt'
    
    for task_id in task_ids:
        file_text = read_task_file_as_string(
            batch_client,
            job_id,
            task_id,
            _STANDARD_OUT_FILE_NAME,
            encoding)
        print("{} content for task {}: ".format(
            _STANDARD_OUT_FILE_NAME,
            task_id))
        print(file_text)

        file_text = read_task_file_as_string(
            batch_client,
            job_id,
            task_id,
            _STANDARD_ERROR_FILE_NAME,
            encoding)
        print("{} content for task {}: ".format(
            _STANDARD_ERROR_FILE_NAME,
            task_id))
        print(file_text)

tasks = batch_client.task.list(job_id)
task_ids = [task.id for task in tasks]

print_task_output(batch_client, job_id, task_ids)

stdout.txt content for task Process-101: 
Application version: 1.2
Started at: 2020-04-14 17:14:27.952534

Received data:

      0   1         2   3   4   ...         13         14  15     16    17
0   5700   2  Saturday  17  54  ... -73.782097  40.745861   1   6.08  19.0
1   5701   2  Saturday  17  56  ... -73.847115  40.865940   1   2.76  12.0
2   5702   2  Saturday  17  56  ... -73.865463  40.725395   1   1.67   8.0
3   5703   2  Saturday  17  57  ... -73.999985  40.682884   1  13.03  38.0
4   5704   2  Saturday  17  58  ... -73.809593  40.721497   1   2.35   8.5
..   ...  ..       ...  ..  ..  ...        ...        ...  ..    ...   ...
95  5795   2  Saturday  19  32  ... -73.950279  40.786373   5   4.25  13.5
96  5796   1  Saturday  19  32  ... -73.862411  40.730431   1   2.00  11.0
97  5797   2  Saturday  19  33  ... -73.884659  40.737160   5   3.18  13.5
98  5798   2  Saturday  19  34  ... -73.935631  40.845116   1   5.79  16.0
99  5799   2  Saturday  19  34  ... -73.922737  40.7

stdout.txt content for task Process-107: 
Application version: 1.2
Started at: 2020-04-14 17:14:30.255563

Received data:

      0   1       2   3   4   ...         13         14  15    16    17
0   2600   2  Sunday  15  24  ... -73.882301  40.749359   1  3.16  14.5
1   2601   2  Sunday  15  30  ... -73.971153  40.680103   1  9.14  28.0
2   2602   2  Sunday  15  34  ... -73.963379  40.764297   5  3.14  12.5
3   2603   2  Sunday  15  36  ... -73.952637  40.718609   1  5.15  18.5
4   2604   2  Sunday  15  37  ... -73.999374  40.675072   2  1.10   5.5
..   ...  ..     ...  ..  ..  ...        ...        ...  ..   ...   ...
95  2695   2  Sunday  19  13  ... -74.003029  40.725677   6  5.35  19.0
96  2696   2  Sunday  19  17  ... -73.845474  40.724857   1  1.02   7.5
97  2697   2  Sunday  19  22  ... -73.858559  40.839245   1  1.48   8.0
98  2698   2  Sunday  19  26  ... -73.978104  40.778233   5  3.81  13.0
99  2699   2  Sunday  19  27  ... -73.954323  40.780731   1  1.91   7.5

[100 rows x 

stdout.txt content for task Process-113: 
Application version: 1.2
Started at: 2020-04-14 17:14:47.870173

Received data:

      0   1        2   3   4   ...         13         14  15     16    17
0   3100   2   Monday  21   5  ... -73.969284  40.757648   1   3.63  13.5
1   3101   2   Monday  21   8  ... -73.850822  40.738750   1   1.73   7.5
2   3102   2   Monday  21  13  ... -73.803307  40.729145   1   2.91  11.5
3   3103   2   Monday  21  14  ... -73.933502  40.758308   5   0.57   4.0
4   3104   2   Monday  21  22  ... -73.971870  40.756374   1  11.05  32.5
..   ...  ..      ...  ..  ..  ...        ...        ...  ..    ...   ...
95  3195   2  Tuesday   8  25  ... -73.908096  40.852955   1   3.78  13.0
96  3196   2  Tuesday   8  29  ... -73.794685  40.723316   1   3.41  13.5
97  3197   2  Tuesday   8  31  ... -73.952759  40.791557   2   1.53   7.0
98  3198   2  Tuesday   8  31  ... -73.970955  40.786057   2   1.46   6.0
99  3199   2  Tuesday   8  45  ... -73.972336  40.796486   1   

stdout.txt content for task Process-119: 
Application version: 1.2
Started at: 2020-04-14 17:15:02.182453

Received data:

      0   1       2   3   4   ...         13         14  15     16    17
0   2900   2  Monday  10  45  ... -73.974876  40.777737   1   2.02  10.0
1   2901   2  Monday  10  54  ... -73.845467  40.754898   1   4.17  21.0
2   2902   2  Monday  10  55  ... -73.938026  40.804588   5   1.69   8.0
3   2903   2  Monday  10  56  ... -73.931618  40.846355   1   1.63   7.0
4   2904   2  Monday  11   0  ... -73.844017  40.721207   5   1.10   6.5
..   ...  ..     ...  ..  ..  ...        ...        ...  ..    ...   ...
95  2995   2  Monday  17  25  ... -73.967468  40.762836   2   3.99  17.0
96  2996   2  Monday  17  28  ... -73.991470  40.759884   1   4.73  23.5
97  2997   2  Monday  17  32  ... -73.926971  40.859821   1   0.90   5.5
98  2998   2  Monday  17  32  ... -73.972641  40.749668   1  10.24  30.0
99  2999   2  Monday  17  34  ... -73.945992  40.782833   1   2.61  15.0



stdout.txt content for task Process-125: 
Application version: 1.2
Started at: 2020-04-14 17:15:12.759416

Received data:

      0   1         2   3   4   ...         13         14  15    16    17
0   4200   2  Thursday  16   9  ... -73.872742  40.774361   1  8.94  28.0
1   4201   2  Thursday  16  11  ... -73.933807  40.759422   1  7.49  27.0
2   4202   2  Thursday  16  11  ... -73.918137  40.759972   2  0.46   4.0
3   4203   2  Thursday  16  11  ... -73.870941  40.757168   1  2.09  10.0
4   4204   1  Thursday  16  12  ... -73.991669  40.749943   1  4.00  23.5
..   ...  ..       ...  ..  ..  ...        ...        ...  ..   ...   ...
95  4295   1  Thursday  18  23  ... -73.913597  40.774918   1  1.30   9.5
96  4296   2  Thursday  18  23  ... -73.933739  40.849476   5  0.60   5.0
97  4297   2  Thursday  18  27  ... -73.953987  40.775349   2  1.25   5.5
98  4298   2  Thursday  18  27  ... -73.891144  40.860027   1  5.15  21.0
99  4299   2  Thursday  18  28  ... -73.966049  40.800655   5  

stdout.txt content for task Process-131: 
Application version: 1.2
Started at: 2020-04-14 17:15:20.840585

Received data:

      0   1          2   3   4   ...         13         14  15     16    17
0   3900   2  Wednesday  21  10  ... -73.972130  40.745739   2  11.70  35.5
1   3901   2  Wednesday  21  12  ... -73.984024  40.664017   2   2.40  10.5
2   3902   2  Wednesday  21  13  ... -73.915512  40.772419   1   7.83  24.0
3   3903   2  Wednesday  21  13  ... -73.962463  40.672672   5   4.22  16.0
4   3904   2  Wednesday  21  17  ... -73.816780  40.717686   1   4.78  14.5
..   ...  ..        ...  ..  ..  ...        ...        ...  ..    ...   ...
95  3995   2   Thursday   1  14  ... -73.850807  40.738300   2   1.76   7.0
96  3996   2   Thursday   1  26  ... -73.881668  40.755890   1   0.33   3.5
97  3997   2   Thursday   1  29  ... -73.977654  40.754753   2  10.85  31.0
98  3998   2   Thursday   1  44  ... -73.915337  40.767586   1   7.90  23.0
99  3999   2   Thursday   1  46  ... -73.

stdout.txt content for task Process-137: 
Application version: 1.2
Started at: 2020-04-14 17:15:33.245292

Received data:

      0   1         2   3   4   ...         13         14  15     16    17
0   5400   2  Saturday   5  23  ... -73.946899  40.816059   3  12.21  37.0
1   5401   2  Saturday   5  26  ... -73.810669  40.730350   2   2.77  10.0
2   5402   2  Saturday   5  37  ... -73.912277  40.876072   5  11.57  34.0
3   5403   2  Saturday   5  44  ... -73.973534  40.754379   1   4.12  14.5
4   5404   2  Saturday   5  45  ... -73.855042  40.709217   2   1.18   5.5
..   ...  ..       ...  ..  ..  ...        ...        ...  ..    ...   ...
95  5495   2  Saturday  12  56  ... -73.976151  40.786049   1   3.52  14.0
96  5496   2  Saturday  13   1  ... -73.950378  40.809898   1   2.17  11.0
97  5497   2  Saturday  13   2  ... -73.906456  40.849216   1   0.61   6.0
98  5498   2  Saturday  13   2  ... -73.847084  40.745388   5   2.94  10.5
99  5499   2  Saturday  13  17  ... -73.990646  40.7

stdout.txt content for task Process-143: 
Application version: 1.2
Started at: 2020-04-14 17:15:42.519160

Received data:

      0   1         2   3   4   ...         13         14  15    16    17
0   5500   2  Saturday  13  20  ... -73.943405  40.789421   5  0.45   4.0
1   5501   2  Saturday  13  22  ... -73.896057  40.862362   2  3.80  10.0
2   5502   2  Saturday  13  22  ... -73.977074  40.751339   1  4.48  19.0
3   5503   2  Saturday  13  30  ... -73.915375  40.854313   5  1.17   6.5
4   5504   2  Saturday  13  30  ... -73.952255  40.827152   1  1.58   7.0
..   ...  ..       ...  ..  ..  ...        ...        ...  ..   ...   ...
95  5595   2  Saturday  16  10  ... -73.841576  40.726929   1  1.06   6.5
96  5596   2  Saturday  16  10  ... -73.927078  40.757401   1  2.95  15.0
97  5597   2  Saturday  16  11  ... -73.902634  40.756390   5  0.64   4.5
98  5598   2  Saturday  16  12  ... -73.871208  40.774162   1  2.84   9.5
99  5599   2  Saturday  16  12  ... -73.944168  40.809361   1  

stdout.txt content for task Process-149: 
Application version: 1.2
Started at: 2020-04-14 17:15:52.835073

Received data:

      0   1         2   3   4   ...         13         14  15     16    17
0   5200   2    Friday  23   6  ... -73.934181  40.741966   3   4.05  15.5
1   5201   2    Friday  23  12  ... -73.935860  40.696857   1   2.56  12.5
2   5202   2    Friday  23  16  ... -73.962532  40.609974   1   4.15  14.5
3   5203   2    Friday  23  17  ... -73.935883  40.754410   2   6.49  20.5
4   5204   2    Friday  23  17  ... -73.979660  40.689808   1  11.47  32.0
..   ...  ..       ...  ..  ..  ...        ...        ...  ..    ...   ...
95  5295   2  Saturday   1   9  ... -73.840111  40.698929   1   6.08  19.0
96  5296   2  Saturday   1   9  ... -74.004875  40.721458   1   3.52  13.5
97  5297   2  Saturday   1   9  ... -73.850250  40.701962   1   2.77  11.0
98  5298   2  Saturday   1  12  ... -73.844246  40.771584   2   2.18   8.5
99  5299   1  Saturday   1  12  ... -73.978088  40.6

stderr.txt content for task Process-154: 

stdout.txt content for task Process-155: 
Application version: 1.2
Started at: 2020-04-14 17:16:03.433921

Received data:

      0   1         2   3   4   ...         13         14  15    16    17
0   2300   2  Saturday  19  48  ... -74.005585  40.738178   5  7.64  25.0
1   2301   2  Saturday  19  51  ... -74.010887  40.713291   6  2.64  15.5
2   2302   2  Saturday  19  55  ... -73.908760  40.774792   1  1.03   7.0
3   2303   2  Saturday  19  55  ... -73.846817  40.744854   1  2.74  10.0
4   2304   2  Saturday  19  56  ... -73.815353  40.712440   2  2.11   9.5
..   ...  ..       ...  ..  ..  ...        ...        ...  ..   ...   ...
95  2395   2  Saturday  23  18  ... -73.986168  40.724876   1  6.18  24.0
96  2396   2  Saturday  23  18  ... -74.009644  40.739166   1  6.93  24.5
97  2397   2  Saturday  23  19  ... -73.851212  40.699715   1  2.42   9.0
98  2398   2  Saturday  23  23  ... -73.910118  40.761654   5  5.16  18.5
99  2399   2  Saturd

stderr.txt content for task Process-160: 



## Delete Job
No issues in removing the job because each task will write it's results to the output container in Azure Storage, however, keeping this uncommented will allow you to see the job in Batch Explorer and debug any failed tasks

In [32]:
#batch_client.job.delete(job.id)

## Delete Pool
Note: you may not necessarily want to do this because creating the pool takes some time

In [34]:
#batch_client.pool.delete(pool.id)