# Creating Azure Resources



In [None]:
from azure.mgmt.resource import ResourceManagementClient
from azure.common.credentials import ServicePrincipalCredentials
from azure.mgmt.resource.resources.models import DeploymentMode
import json

Start with specifying your subscription and resource group information and log in.

In [None]:
# Subscription & resource group
SUBSCRIPTION_NAME = ''
RESOURCE_GROUP = ''
LOCATION = 'eastus'

In [None]:
# Login to subscription 
!az login

In [None]:
# Select subscription
!az account set -s "{SUBSCRIPTION_NAME}"

In [None]:
# Get susbcription info
temp = '"az account show -s \\"{}\\""'.format(SUBSCRIPTION_NAME)
subscription_id, tenant_id  =!eval {temp} | jq -r '.id, .tenantId'

In [None]:
# Create resource group
!az group create -l {LOCATION} -n {RESOURCE_GROUP}

You will also need service principal credentials for authentication. The following command creates and retrieves the credentials. For more information on service principal, check the docuemntation [here](https://docs.microsoft.com/en-us/cli/azure/create-an-azure-service-principal-azure-cli?view=azure-cli-latest).

In [None]:
# Create and get service principal credentials
temp = !az ad sp create-for-rbac | jq -r '.appId, .password'
sp_client, sp_secret = temp[-2:] # filter role assignment warnings that are returned by 'az ad sp create-for-rbac'

The following are parameters needed to create and access the main Azure resources. These include: Azure Container Registry (ACR), Batch AI, Blob Storage, and Logic Apps parameters. 

You can use the default values below as is.

In [None]:
# ACR
ACR_NAME = 'baimmacr'
ACR_SERVER = 'baimmacr.azurecr.io'
BAI_DOCKER_IMG = 'baimmacr.azurecr.io/baimmimg:v1'
SCHED_DOCKER_IMG = 'baimmacr.azurecr.io/baimmschedimg:v1'

# Batch AI
BAI_CLUSTER_NAME = 'baimmcluster'
BAI_WORKSPACE = 'baimmws'
BAI_USER = 'baimmuser'
BAI_PASS = 'baimmpass'
BAI_VM_SIZE = 'Standard_D2'
BAI_VM_IMG = 'UbuntuLTS'
BAI_NODES_MIN = 0
BAI_NODES_MAX = 3

# Blob storage
BFS_CONTAINER = 'bfs' # shared across Batch AI nodes under /mnt/batch/tasks/shared/LS_root/mounts/bfs
BLOB_ACCOUNT = 'baimmstorage'
MODELS_CONTAINER = 'models'
PREDS_CONTAINER = 'preds'
DATA_CONTAINER = 'data'
DATA_BLOB = 'sensor_data.csv' # name of data file to be copied to blob storage

# Logic App
LA_ACI_CON = 'aci'
LA_WORKFLOW = 'baimmscheduler'
LA_ACI_CON_JSON = 'sched/api_con_template.json'
LA_JSON = 'sched/logic_app_template.json'
LA_ACI_CONTAINER_NAME = 'baimmschedcontainer'
LA_ACI_CONTAINER_GROUP = 'baimmcontainergroup'

In [None]:
# Create ACR
!az acr create --resource-group {RESOURCE_GROUP} --name {ACR_NAME} --sku Basic

In [None]:
# Create Blob storage account
!az storage account create -n {BLOB_ACCOUNT} -g {RESOURCE_GROUP} -l {LOCATION}

In [None]:
# Retrieve Blob storage key
blob_key = !az storage account keys list -g {RESOURCE_GROUP} -n {BLOB_ACCOUNT} | jq -r .[0].value
blob_key = blob_key[0]

In [None]:
# Create models, predictions and data containers
!az storage container create -n {MODELS_CONTAINER} --account-key {blob_key} --account-name {BLOB_ACCOUNT}
!az storage container create -n {PREDS_CONTAINER} --account-key {blob_key} --account-name {BLOB_ACCOUNT}
!az storage container create -n {DATA_CONTAINER} --account-key {blob_key} --account-name {BLOB_ACCOUNT}
!az storage container create -n {BFS_CONTAINER} --account-key {blob_key} --account-name {BLOB_ACCOUNT}

In [None]:
# Create Batch AI cluster and set auto-scaling
!az batchai workspace create -g {RESOURCE_GROUP} -n {BAI_WORKSPACE}
!az batchai cluster create -g {RESOURCE_GROUP} -n {BAI_CLUSTER_NAME} -w {BAI_WORKSPACE} -s {BAI_VM_SIZE} -i {BAI_VM_IMG} --min {BAI_NODES_MIN} --max {BAI_NODES_MAX} -u {BAI_USER} -p {BAI_PASS} --storage-account-name {BLOB_ACCOUNT} --storage-account-key {blob_key} --bfs-name {BFS_CONTAINER}
!az batchai cluster auto-scale -g {RESOURCE_GROUP} -w {BAI_WORKSPACE} -n {BAI_CLUSTER_NAME} --min {BAI_NODES_MIN} --max {BAI_NODES_MAX}

The main scoring Python script requires some config parameters to access the created Azure resources. We can generate those in the following cell and save the config file in json format.

In [None]:
# Create config file for scoring script (predict.py)
score_config = {"blob_account": BLOB_ACCOUNT,
                "blob_key": blob_key,
                "models_blob_container": MODELS_CONTAINER,
                "data_blob_container": DATA_CONTAINER,
                "data_blob" : DATA_BLOB,
                "predictions_blob_container": PREDS_CONTAINER}

with open('batchai/predict_config.json', 'w') as f:
    json.dump(score_config, f, indent=4)


With the scoring script and its config in place, we create a docker image that Batch AI can use to execute scoring, and push that to ACR. The image is defined in a Dockerfile in the repo.

In [61]:
# Create Batch AI docker img
!sudo docker build -f batchai/Dockerfile -t {BAI_DOCKER_IMG} .

Sending build context to Docker daemon  8.623MB
Step 1/8 : FROM continuumio/miniconda3
 ---> 1284db959d5d
Step 2/8 : EXPOSE 3000
 ---> Using cache
 ---> 3d5fae5b0095
Step 3/8 : RUN apt-get update -y
 ---> Using cache
 ---> ccf358629d64
Step 4/8 : COPY batchai/requirements.txt /
 ---> Using cache
 ---> 00d0a541d1c2
Step 5/8 : RUN pip install --upgrade pip
 ---> Using cache
 ---> 174df659b223
Step 6/8 : RUN python3 -m pip install -r requirements.txt
 ---> Using cache
 ---> 515608cba2cd
Step 7/8 : COPY batchai/predict.py /
 ---> ac1842051fbf
Step 8/8 : COPY batchai/predict_config.json /
 ---> 0f36ee3eb72e
Successfully built 0f36ee3eb72e
Successfully tagged baimmacr.azurecr.io/baimmimg:v1


In [None]:
# Validate that the image was created
!sudo docker images

In [62]:
# Login to ACR and push docker image
!sudo az acr login --name {ACR_NAME}
!sudo docker push {BAI_DOCKER_IMG}

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

The push refers to repository [baimmacr.azurecr.io/baimmimg]

[1B115dd695: Preparing 
[1Ba4452865: Preparing 
[1Baf025037: Preparing 
[1B48e56ac4: Preparing 
[1B87a12f5f: Preparing 
[1B709cca82: Preparing 
[1B4729c572: Preparing 
[1B589f916c: Preparing 
[1Bed6965f1: Preparing 
[1B38eef542: Preparing 
[2B38eef542: Layer already exists 8kB[10A[1K[K[11A[1K[K[9A[1K[K[8A[1K[K[10A[1K[K[4A[1K[K[3A[1K[K[6A[1K[K[1A[1K[Kv1: digest: sha256:77fff6411723259d2ea2c771807c7b015261514d18697d7db49b4621e3c6a86b size: 2626


The following commands copy the pre-trained models and sample data from this repo to blob storage so that Batch AI can access them during job submission.

In [None]:
# Copy models from local dir to blob container
!az storage blob upload-batch -d {MODELS_CONTAINER} -s models --account-name {BLOB_ACCOUNT}

In [None]:
# Copy dataset to blob
!az storage blob upload -c {DATA_CONTAINER} -f data/'{DATA_BLOB}' -n '{DATA_BLOB}' --account-name {BLOB_ACCOUNT}

We also generate a json config file for the Python script that creates and submits Batch AI jobs. The config file includes Batch AI, ACR, and service principal parameters.

In [None]:
# Enable ACR admin account authentication
!az acr update -n {ACR_NAME} --admin-enabled true

In [None]:
# Get ACR's password (user is {ACR_NAME})
acr_password = !az acr credential show --name {ACR_NAME} | jq -r .passwords[0].value
acr_password = acr_password[0]

In [52]:
# Create config file for Batch AI job submission script (submit_jobs.py)
submit_jobs_config = {
  "sp_tenant": tenant_id,
  "sp_client": sp_client,
  "sp_secret": sp_secret,
  "resource_group_name": RESOURCE_GROUP,
  "subscription_id": subscription_id,
  "work_space": BAI_WORKSPACE,
  "experiment_name": "baimm_score",
  "cluster_name": BAI_CLUSTER_NAME,
  "location": LOCATION,
  "acr_server": ACR_SERVER,
  "acr_image": BAI_DOCKER_IMG,
  "acr_user": ACR_NAME,
  "acr_password": acr_password,
  "command_line": "python /predict.py {0} {1} {2}",
  "std_out_err_path_prefix": '/mnt/batch/tasks/shared/LS_root/mounts/{}'.format(BFS_CONTAINER),
  "config_file_path": "/predict_config.json",
  "node_count": 2,
  "device_ids": [ 1, 2, 3 ],
  "tags": [ 1, 2, 3, 4, 5 ],
  "job_name": "baimm_predict{0}_{1}" # job name template
}

with open('sched/bai_pred_config.json', 'w') as f:
    json.dump(submit_jobs_config, f, indent=4)

We will submit Batch AI jobs on a schedule defined and triggered by a Logic App. The Logic App creates a container instance from ACR and runs a Docker container that executes the job submission. That Docker image can be created and pushed to ACR using the following commands. 

In [53]:
# Create scheduling docker img
!sudo docker build -f sched/Dockerfile -t {SCHED_DOCKER_IMG} .

Sending build context to Docker daemon  8.623MB
Step 1/9 : FROM continuumio/miniconda3
 ---> 1284db959d5d
Step 2/9 : EXPOSE 3000
 ---> Using cache
 ---> 3d5fae5b0095
Step 3/9 : RUN apt-get update -y
 ---> Using cache
 ---> ccf358629d64
Step 4/9 : COPY sched/requirements.txt /
 ---> Using cache
 ---> 7d569bba9145
Step 5/9 : RUN pip install --upgrade pip
 ---> Using cache
 ---> d9ec2292b0a2
Step 6/9 : RUN python3 -m pip install -r requirements.txt
 ---> Using cache
 ---> b0c8b7b12086
Step 7/9 : COPY sched/submit_jobs.py /
 ---> Using cache
 ---> a990caa88735
Step 8/9 : COPY sched/bai_pred_config.json /
 ---> a0ac36c54ad0
Step 9/9 : CMD python submit_jobs.py bai_pred_config.json
 ---> Running in 6278fd2d0ad9
Removing intermediate container 6278fd2d0ad9
 ---> 824c994891bb
Successfully built 824c994891bb
Successfully tagged baimmacr.azurecr.io/baimmschedimg:v1


In [54]:
# Login to ACR and push docker image
!sudo az acr login --name {ACR_NAME}
!sudo docker push {SCHED_DOCKER_IMG}

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

The push refers to repository [baimmacr.azurecr.io/baimmschedimg]

[1Bf0124f0f: Preparing 
[1Be7d3f0f9: Preparing 
[1B182ab77c: Preparing 
[1Bdb5d4f81: Preparing 
[1Befad35ce: Preparing 
[1B709cca82: Preparing 
[1B4729c572: Preparing 
[1B589f916c: Preparing 
[1Bed6965f1: Preparing 
[1B38eef542: Preparing 
[2B38eef542: Layer already exists K[11A[1K[K[10A[1K[K[9A[1K[K[5A[1K[K[11A[1K[K[3A[1K[K[8A[1K[K[6A[1K[K[4A[1K[K[1A[1K[K[2A[1K[Kv1: digest: sha256:456131c3d23c6f4799d0f2796cd1919dd1d41e8d941fe00bfc698900e97e58a2 size: 2626


Finally, we create the Logic App that acts as a scheduler for this solution. The Logic App and its API connection to Azure Container Instances (ACI) are created using an Azure Resournce Management (ARM) client and corresponding json templates that are stored in the repo.

In [None]:
# Create credentials and ARM client
credentials = ServicePrincipalCredentials(client_id=sp_client,
                                          secret=sp_secret,
                                          tenant=tenant_id)
arm_client = ResourceManagementClient(credentials, subscription_id)

In [None]:
# Create an ACI API connection
with open('sched/api_con_template.json') as f:
    aci_api_con_template = json.load(f)

aci_api_con_params = {"location": {"value": LOCATION},
                      "name": {"value": LA_ACI_CON},
                      "subscription_id": {"value": subscription_id}
                      }

aci_api_con_props = {
    'mode': DeploymentMode.incremental,
    'template': aci_api_con_template,
    'parameters': aci_api_con_params
}

arm_client.deployments.create_or_update(RESOURCE_GROUP, LA_ACI_CON, aci_api_con_props)


In [None]:
# Create Logic App
with open('sched/logic_app_template.json') as f:
    logic_app_template = json.load(f)

logic_app_params = {"location": {"value": LOCATION},
                    "resource_group": { "value": RESOURCE_GROUP },
                    "name": {"value": LA_WORKFLOW},
                    "subscription_id": {"value": subscription_id},
                    "container_name": { "value": LA_ACI_CONTAINER_NAME },
                    "container_group": { "value": LA_ACI_CONTAINER_GROUP },
                    "image_name": { "value": SCHED_DOCKER_IMG },
                    "acr_pass": { "value": acr_password },
                    "acr_user": { "value": ACR_NAME },
                    "acr_server": { "value": ACR_SERVER },
                    "aci_connection_name": { "value": LA_ACI_CON }
                    }

logic_app_props = {
    'mode': DeploymentMode.incremental,
    'template': logic_app_template,
    'parameters': logic_app_params
}

arm_client.deployments.create_or_update(RESOURCE_GROUP, LA_WORKFLOW, logic_app_props)