## Configuring Data Collector in Azure Machine Learning (AzureML)

### System configuration

In [None]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    Data,
    DataCollector,
    DeploymentCollection
)
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential

In [None]:
# enter details of your Azure Machine Learning workspace
subscription_id = "<YOUR_AZURE_SUBSCRIPTION>"
resource_group_name = "<YOUR_AZURE_ML_RESOURCE_GROUP>"
workspace_name = "<YOUR_AZURE_ML_WORKSPACE>"

In [None]:
# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(),
    subscription_id = subscription_id,
    resource_group_name = resource_group_name,
    workspace_name = workspace_name
)
ml_client

In [None]:
# define an endpoint name
endpoint_name = "endpoint-data-collector"

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name = endpoint_name, 
    description="Endpoint to test data collector",
    auth_mode="key"
)
endpoint

### Option 1: Local model deployment

In [None]:
# define local model and environment
model = Model(path = "./model/sklearn_regression_model.pkl")

env = Environment(
    conda_file = "./environment/conda.yaml",
    image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
)

### Option 2: Registered model deployment

In [None]:
file_model = Model(
    path="./model/",
    type=AssetTypes.CUSTOM_MODEL,
    name="scikit-model",
    description="SciKit model created from local file",
)
ml_client.models.create_or_update(file_model)

In [None]:
from azure.ai.ml.entities import Environment

env_docker_conda = Environment(
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04",
    conda_file="./environment/conda.yaml",
    name="scikit-env",
    description="SciKit env from Docker image and Conda file",
)
ml_client.environments.create_or_update(env_docker_conda)

In [None]:
# define registered model and environment
model = "scikit-model:1"
env = "azureml:scikit-env:2"

### Configuring managed endpoint

In [None]:
# define data collector
# input_data_asset = Data(
#     name = 'model_inputs',
#     version ='1',
#     path = 'azureml://datastores/model_inputs/paths/model_inputs'
# )
# output_data_asset = Data(
#     name = 'model_outputs',
#     version = '1',
#     path = 'azureml://datastores/model_inputs/paths/model_outputs'
# )

collections = {
    'model_inputs': DeploymentCollection(
        enabled="true",
        # data=input_data_asset
    ),
    'model_outputs': DeploymentCollection(
        enabled="true",
        # data=output_data_asset
    )
}

data_collector = DataCollector(collections=collections)

In [None]:
# define deployment
blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=endpoint_name,
    model=model,
    environment=env,
    code_configuration=CodeConfiguration(
        code="./onlinescoring", scoring_script="score_datacollector.py"
    ),
    instance_type="Standard_DS2_v2",
    instance_count=1,
    data_collector=data_collector
)

In [None]:
# create endpoint
ml_client.online_endpoints.begin_create_or_update(endpoint)

In [None]:
# check endpoint
ml_client.online_endpoints.get(name=endpoint_name)

In [None]:
# create deployment
ml_client.online_deployments.begin_create_or_update(blue_deployment)

In [None]:
# blue deployment takes 100 traffic
endpoint.traffic = {"blue": 100}
ml_client.online_endpoints.begin_create_or_update(endpoint)

In [None]:
# check deployment
ml_client.online_deployments.get_logs(
    name="blue", endpoint_name=endpoint_name, lines=50
)

In [None]:
# test the blue deployment with sample data
ml_client.online_endpoints.invoke(
    endpoint_name=endpoint_name,
    deployment_name="blue",
    request_file="./sample-request.json",
)

### Delete managed endpoint deployment

In [None]:
# delete deployment
ml_client.online_endpoints.begin_delete(name=endpoint_name)