# Notebook S4

This requires the following installation:
pip install azure==4.0.0

In [1]:
import azure.storage.blob as azureblob
import azure.batch.batch_service_client as batch
import azure.batch.batch_auth as batchauth
import azure.batch.models as batchmodels
import datetime

## Connect Client to the blob storage account

Find the account key by going to >> Access Keys under the Storage Account.
The storage account name and access key are both given on this page. Also, see TutorialS2.

In [None]:
# Name of the account:
_STORAGE_ACCOUNT_NAME = 'seismicloud2'
# Key to the account
# KEEP THIS SECURE
# Should be a long string of letters, numbers, and symbols
_STORAGE_ACCOUNT_KEY = 'your_key_here'

In [None]:
# Now create a tie to the storage account using the azure python libraries
blob_client = azureblob.BlockBlobService(
        account_name=_STORAGE_ACCOUNT_NAME,
        account_key=_STORAGE_ACCOUNT_KEY)

## Get SAS URL token to access the container within the blob storage account

In [3]:
def get_container_sas_token(block_blob_client,
                            container_name, blob_permissions):
    """
    Obtains a shared access signature granting the specified permissions to the
    container.
    :param block_blob_client: A blob service client.
    :type block_blob_client: `azure.storage.blob.BlockBlobService`
    :param str container_name: The name of the Azure Blob storage container.
    :param BlobPermissions blob_permissions:
    :rtype: str
    :return: A SAS token granting the specified permissions to the container.
    """
    # Obtain the SAS token for the container, setting the expiry time and
    # permissions. In this case, no start time is specified, so the shared
    # access signature becomes valid immediately. Expiration is in 2 hours.
    container_sas_token = \
        block_blob_client.generate_container_shared_access_signature(
            container_name,
            permission=blob_permissions,
            expiry=datetime.datetime.utcnow() + datetime.timedelta(hours=2))

    return container_sas_token

def get_container_sas_url(block_blob_client,
                          container_name, blob_permissions):
    """
    Obtains a shared access signature URL that provides write access to the 
    ouput container to which the tasks will upload their output.
    :param block_blob_client: A blob service client.
    :type block_blob_client: `azure.storage.blob.BlockBlobService`
    :param str container_name: The name of the Azure Blob storage container.
    :param BlobPermissions blob_permissions:
    :rtype: str
    :return: A SAS URL granting the specified permissions to the container.
    """
    # Obtain the SAS token for the container.
    sas_token = get_container_sas_token(block_blob_client,
                                        container_name, azureblob.BlobPermissions.WRITE)

    # Construct SAS URL for the container
    container_sas_url = "https://{}.blob.core.windows.net/{}?{}".format(
        _STORAGE_ACCOUNT_NAME, container_name, sas_token)

    return container_sas_url

In [4]:
output_container_name = 'seismicloud2'

In [None]:
output_container_sas_url= get_container_sas_url(
        blob_client,
        output_container_name,
        azureblob.BlobPermissions.WRITE)

#### Great! Now we have a secure, direct tie to the storage account. Next we need to tie to the Batch account.

## Connect Client to batch account

See TutorialS3 to locate your batch account name and key.

In [None]:
_BATCH_ACCOUNT_NAME = 'tmdetect'
_BATCH_ACCOUNT_KEY = 'your_key_here'
_BATCH_ACCOUNT_URL = 'https://tmdetect.westus2.batch.azure.com'

In [None]:
# Create a Batch service client
credentials = batchauth.SharedKeyCredentials(_BATCH_ACCOUNT_NAME,
                                             _BATCH_ACCOUNT_KEY)

In [None]:
batch_client = batch.BatchServiceClient(credentials,base_url=_BATCH_ACCOUNT_URL)

### Now that we've connected to the batch account, we want to identify the specific Pool you've set up to run on. We can do this simply using its name.

In [None]:
_POOL_ID = 'zoestest'

### Okay, now we can create a job for this pool! A "job" is simple to create, because it is basically just an organizational resource- we will then assign "tasks" to the "job", which is where the real work happens.

In [7]:
# Start by creating a name for your job
_JOB_ID = 'zoestest2'

In [None]:
def create_job(batch_service_client, job_id, pool_id):
    """
    Creates a job with the specified ID, associated with the specified pool.
    :param batch_service_client: A Batch service client.
    :type batch_service_client: `azure.batch.BatchServiceClient`
    :param str job_id: The ID for the job.
    :param str pool_id: The ID for the pool.
    """
    print('Creating job [{}]...'.format(job_id))

    job = batch.models.JobAddParameter(
        id=job_id,
        pool_info=batch.models.PoolInformation(pool_id=pool_id))

    batch_service_client.job.add(job)

In [None]:
# Create the job that will run the tasks. Note that this is also connected to the pool!
create_job(batch_client, _JOB_ID, _POOL_ID)

## Create tasks within job
When you create a task within a Job, which is tied to a specific Pool, the job will assign the task to whichever node in the Pool is available. 
In our workflow, we create the same number of tasks as nodes in the pool. This way, there is no "queuing" of tasks and nodes do not communicate to each other.

This does mean that there will likely be one node that finishes its task before the others and will be sitting idle while the others are still running.

We indicate which node number each task is running on with the command we send to the task.
Each task creates the same job list, but then only executes the jobs that have its rank.

#### Specify the Docker image you want the task to run within

In [9]:
image_name = 'ghcr.io/denolle-lab/seismicloud:latest'

#### Specify which path to run the Docker image from. 
Importantly, this needs to match with the pathnames in your config files, so that the running scripts can access both the python scripts and the directory where the storage container is mounted to the node (remember-- we did that in the start-up task when we created the Pool.)

We also specify that the commands in the task will run with root permission.

In [None]:
container_run_options='-u root -v /tmp/data:/tmp/data/data'

### Now we can specify what commands each task will run.
This will be a series of two bash commands, similar to how we run locally, with slightly different pathnames and input arguments. 
This is the same set of commands as shown in Figure 1. First, create the job list, and then use MPI to run the detection script.

In [14]:
# First- the command to create the job list
# Note the variables in curly brackets- these will be filled in later 
command1 = "python /tmp/batch_scripts/template_matching/create_joblist.py -c {config_path} -b {n_nodes}"

In [15]:
# Next- the command that runs the detection. Here this is shown for template matching,
# but this could be replaced by the script to run EQT detection
# Note here that the seismic network we want to run on is specified by -n,
# And the year we want to run on by -y
# Make sure these are correct for your purposes!
# Again, the variables in curly brackets will be filled in later
command2 = "mpirun -np {n_cpus} python /tmp/batch_scripts/template_matching/distributed_detection.py -c {config_path} -n NV -y 2017 -b {idx}"

In [16]:
# Now we combine those two commands into one, adding a /bin/bash specification at the beginning
# and a double && so that they run sequentially
input_command = '/bin/bash -c ' + command1 + ' && ' + command2

In [None]:
def add_tasks(batch_service_client, job_id, image_name,container_run_options,input_command, n_nodes, n_cpus, config_path, output_container_sas_url):
    """
    Adds a task for each input file in the collection to the specified job.
    :param batch_service_client: A Batch service client.
    :type batch_service_client: `azure.batch.BatchServiceClient`
    :param str job_id: The ID of the job to which to add the tasks.
    :param list input_files: A collection of input files. One task will be
     created for each input file.
    :param output_container_sas_token: A SAS token granting write access to
    the specified Azure Blob storage container.
    """

    print('Adding {} tasks to job [{}]...'.format(n_nodes, job_id))

    tasks = list()

    for idx in range(n_nodes):
        command = eval('f"'+input_command+'"')
        task = batch.models.TaskAddParameter(
            id='Task{}'.format(idx),
            command_line=command,
            user_identity=batchmodels.UserIdentity(
                auto_user=batchmodels.AutoUserSpecification(
                    scope=batchmodels.AutoUserScope.pool,
                    elevation_level=batchmodels.ElevationLevel.admin)),
            container_settings=batchmodels.TaskContainerSettings(
                container_run_options=container_run_options,
                image_name = image_name)
        )
        tasks.append(task)

    batch_service_client.task.add_collection(job_id, tasks)

In [None]:
# With the function defined above, we are all set to add N tasks to our Pool of N Nodes!

# Define number of nodes
n_nodes = 1
# If you need to check how many nodes are currently in the pool, use the following:
#n_nodes = batch_client.pool.get(pool_id=_POOL_ID).target_dedicated_nodes

# Define number of CPUs per node. This is based on the instance type you chose during Pool construction
n_cpus = 4

# Define config file path- remember, must work with the specification you are running your docker image from
config_path = '/tmp/configs/config_tm_batch.json'

# Go ahead and send your tasks!
add_tasks(batch_client, _JOB_ID, 
          image_name,container_run_options,
          input_command,
          n_nodes, n_cpus, config_path, 
          output_container_sas_url)

#### If that worked successfully, you should be able to check on your nodes in the Pool in the Azure portal. If running, they will appear as green boxes. See Tutorial S3 for a how-to to check on these nodes through the Portal.

#### We can also check on the nodes from here. Here's some examples below.

In [None]:
# Check whether a task is done running on the nodes
# The loop below checks on all of them
status = []
for i in range(n_nodes):
    task_id = 'Task{}'.format(i)
    node_status = batch_client.task.get(job_id =_JOB_ID,task_id=task_id).state.name
    status.append(node_status)

In [None]:
# Check how long the tasks took to run, if they are finished
run_times = []
for i in range(n_nodes):
    task_id = 'Task{}'.format(i)
    start_time = batch_client.task.get(job_id =_JOB_ID,task_id=task_id).creation_time
    end_time = batch_client.task.get(job_id =_JOB_ID,task_id=task_id).state_transition_time
    run_times.append(end_time-start_time)
run_time = max(run_times)
print('Run time = ' + str(run_time))

#### Also, if you'd like to resize your Pool (add more nodes and run more tasks, OR resize it to zero when you are done running tasks), you can do that from here as well.

#### That is why it is so useful to have a Pool set up-- once it is running, you can keep adding or lowering the current compute resources to suit your needs!

In [None]:
# Size up pool, redefine n_nodes
n_nodes = n_nodes * 2
batch_client.pool.resize(pool_id = _POOL_ID,
                 pool_resize_parameter=batch.models.PoolResizeParameter(
                     target_dedicated_nodes=n_nodes))
print('Sizing up to '+str(n_nodes)+' nodes')

In [None]:
# After sending the command to size up, it will take several minutes for the 
# new nodes in the Pool to become usable.
# In this case, you'll want to wait to create a new job until the Pool finishes resizing
# Below shows an example of how to check on that:
state_ready = False
while not state_ready:
    time.sleep(20)
    print('Checking node state...')
    counter = 0
    for i in batch_client.compute_node.list(pool_id = _POOL_ID):
        # Nodes in the Pool are ready to be used if they are "idle"
        # This means they have successfully completed their start-up task
        if i.state == azure.batch.models.ComputeNodeState.idle:
            counter = counter + 1
    if counter == n_nodes:
        state_ready = True   
    else:
        state_ready = False

## Once your tasks have finished, you can check on the outputs in the Blob storage container and download the resulting output files using azcopy (see TutorialS2).