# Batch deployments with a custom output

In [None]:
from azure.ai.ml import MLClient, Input
from azure.ai.ml.entities import (
    BatchEndpoint,
    BatchDeployment,
    Model,
    AmlCompute,
    Data,
    BatchRetrySettings,
    CodeConfiguration,
    Environment,
)
from azure.ai.ml.constants import AssetTypes, BatchDeploymentOutputAction
from azure.identity import DefaultAzureCredential

In [None]:
subscription_id = "<subscription>"
resource_group = "<resource-group>"
workspace = "<workspace>"

ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)

Let's verify if the model we want to deploy, `heart-classifier`, is registered in the model registry. If not, we will register it from a local version we have in the repository:

In [None]:
model_name = "heart-classifier"
model_local_path = "heart-classifier-mlflow/model"

In [None]:
if not any(filter(lambda m: m.name == model_name, ml_client.models.list())):
    print(f"Model {model_name} is not registered. Creating...")
    model = ml_client.models.create_or_update(
        Model(name=model_name, path=model_local_path, type=AssetTypes.MLFLOW_MODEL)
    )

Let's get the model:

In [None]:
model = ml_client.models.get(name=model_name, label="latest")

First, let's create the endpoint that is going to host the batch deployments. Remember that each endpoint can host multiple deployments at any time, however, only one of them is the default one:

In [None]:
endpoint = BatchEndpoint(
    name="heart-classifier-batch",
    description="A heart condition classifier for batch inference",
)

In [None]:
ml_client.batch_endpoints.begin_create_or_update(endpoint)

Batch endpoints can run on any Azure ML compute that already exists in the workspace. That means that multiple batch deployments can share the same compute infrastructure. In this example, we are going to work on an AzureML compute cluster called `cpu-cluster`. Let's verify the compute exists on the workspace or create it otherwise.

In [None]:
compute_name = "cpu-cluster"
if not any(filter(lambda m: m.name == compute_name, ml_client.compute.list())):
    print(f"Compute {compute_name} is not created. Creating...")
    compute_cluster = AmlCompute(
        name=compute_name, description="amlcompute", min_instances=0, max_instances=5
    )
    ml_client.begin_create_or_update(compute_cluster)

Compute may take time to be created. Let's wait for it:

In [None]:
from time import sleep

print("Waiting for compute", end="")
while ml_client.compute.get(name=compute_name).provisioning_state == "Creating":
    sleep(1)
    print(".", end="")

print(" [DONE]")

Authoring a scoring script that can write to the output folder:

In [None]:
%%writefile heart-classifier-mlflow/code/batch_driver_parquet.py

import os
import mlflow
import pandas as pd
from pathlib import Path

def init():
    global model
    global output_path

    # AZUREML_MODEL_DIR is an environment variable created during deployment
    # It is the path to the model folder
    # Please provide your model's folder name if there's one:
    model_path = os.path.join(os.environ["AZUREML_MODEL_DIR"], "model")
    output_path = os.environ['AZUREML_BI_OUTPUT_PATH']
    model = mlflow.pyfunc.load_model(model_path)

def run(mini_batch):
    for file_path in mini_batch:        
        data = pd.read_csv(file_path)
        pred = model.predict(data)
        
        data['prediction'] = pred
        
        output_file_name = Path(file_path).stem
        output_file_path = os.path.join(output_path, output_file_name + '.parquet')
        data.to_parquet(output_file_path)
    
    return mini_batch


Let's create a deployment under the given endpoint.

In [None]:
environment = Environment(
    conda_file="./heart-classifier-mlflow/environment/conda.yml",
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest",
)

In [None]:
deployment = BatchDeployment(
    name="classifier-xgboost-parquet",
    description="A heart condition classifier based on XGBoost",
    endpoint_name=endpoint.name,
    model=model,
    environment=environment,
    code_configuration=CodeConfiguration(
        code="./heart-classifier-mlflow/code/",
        scoring_script="batch_driver_parquet.py",
    ),
    compute=compute_name,
    instance_count=2,
    max_concurrency_per_instance=2,
    mini_batch_size=2,
    output_action=BatchDeploymentOutputAction.SUMMARY_ONLY,
    retry_settings=BatchRetrySettings(max_retries=3, timeout=300),
    logging_level="info",
)

In [None]:
ml_client.batch_deployments.begin_create_or_update(deployment)

## Testing the endpoint

Once the deployment is created, it is ready to recieve jobs. Let's first register a data asset so we can run the job against it. This data asset is a folder containing multiple CSV files that we want to process in parallel using the batch endpoint we just created.

In [None]:
data_path = "heart-classifier-mlflow/dataset/"
dataset_name = "heart-dataset-unlabeled"

heart_dataset_unlabeled = Data(
    path=data_path,
    type=AssetTypes.URI_FOLDER,
    description="An unlabeled dataset for heart classification",
    name=dataset_name,
)

ml_client.data.create_or_update(heart_dataset_unlabeled)

In [None]:
heart_dataset_unlabeled = ml_client.data.get(name=dataset_name, label="latest")

Let's use this data as an input for the job:

In [None]:
input = Input(type=AssetTypes.URI_FOLDER, path=heart_dataset_unlabeled.id)

In [None]:
job = ml_client.batch_endpoints.invoke(
    endpoint_name=endpoint.name, deployment_name=deployment.name, input=input
)

You can use the returned job object to check the status of the job:

In [None]:
ml_client.jobs.get(job.name)

## Exploring the results

We can download the results from the job by downloading the output with name `score`:

In [None]:
ml_client.jobs.download(name=job.name, download_path=".", output_name="score")

In [None]:
import pandas as pd
import glob

output_files = glob.glob("named-outputs/score/*.parquet")
score = pd.concat((pd.read_parquet(f) for f in output_files))

In [None]:
score