In [1]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential, AzureCliCredential
from azure.ai.ml import MLClient, Input
from azure.ai.ml.constants import AssetTypes, InputOutputModes, BatchDeploymentOutputAction
from azure.ai.ml.entities import BatchEndpoint, Environment, BatchDeployment, BatchRetrySettings, CodeConfiguration
import mlflow

In [2]:
try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    # This will open a browser page for
    credential = InteractiveBrowserCredential()

In [3]:
config_path = './.azureml/config.json'
ml_client = MLClient.from_config(credential=credential, config_path=config_path)


# Se configura tracking url ya que se está corriendo localmente, en azure compute es configurado automáticamente
azureml_mlflow_uri = ml_client.workspaces.get(ml_client.workspace_name).mlflow_tracking_uri
mlflow.set_tracking_uri(azureml_mlflow_uri)

Found the config file in: .\.azureml\config.json


In [4]:
import warnings

warnings.simplefilter("ignore")

In [6]:
import random
import string

allowed_chars = string.ascii_lowercase + string.digits
endpoint_suffix = "".join(random.choice(allowed_chars) for x in range(5))
endpoint_name = "andres-diabetes-batch-" + endpoint_suffix

endpoint = BatchEndpoint(
    name=endpoint_name,
    description="A batch endpoint for scoring entities of the dataset.",
    tags={"type": "classification",
          "model": "sk-learn, keras"}
)

ml_client.begin_create_or_update(endpoint).result()

<azure.ai.ml._restclient.v2022_05_01.models._models_py3.BatchEndpointData at 0x241ce3435e0>

In [7]:
#Obtener modelo

model_name = "xgboost_diabetes_classifier"
model = ml_client.models.get(name=model_name, label="latest")

#Configurar el ambiente para crear en el caso de que no esté creado
env = Environment(
    conda_file="./.condaenv/deploy_env.yml",
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest",
)

#Setear el nombre del computo, este debe ser creado previamente en azure ml
compute_name = "dedicated-cpu-cluster"


deployment = BatchDeployment(
    name="diabetes-classification-dpl",
    description="A deployment using sk-learn to solve the diabetes classification dataset.",
    endpoint_name=endpoint_name,
    model=model,
    code_configuration=CodeConfiguration(
        code="./deploy/", scoring_script="batch_driver.py"
    ),
    environment=env,
    compute=compute_name,
    instance_count=1,
    max_concurrency_per_instance=1,
    mini_batch_size=1,
    output_action=BatchDeploymentOutputAction.APPEND_ROW,
    output_file_name="predictions.csv",
    retry_settings=BatchRetrySettings(max_retries=3, timeout=30),
    logging_level="info",
)


ml_client.begin_create_or_update(deployment).result()


endpoint = ml_client.batch_endpoints.get(endpoint_name)
endpoint.defaults.deployment_name = deployment.name
ml_client.batch_endpoints.begin_create_or_update(endpoint).result()

print(f"The default deployment is {endpoint.defaults.deployment_name}")

Uploading deploy (0.0 MBs): 100%|##########| 4344/4344 [00:00<00:00, 51976.52it/s]




The default deployment is diabetes-classification-dpl


In [8]:
endpoint_name

'andres-diabetes-batch-medby'

In [9]:
data = ml_client.data.get(name="input_diabetes_folder", label="latest")
input = Input(type=AssetTypes.URI_FOLDER, path=data.id)
batch_ds = ml_client.datastores.get(name="workspaceblobstore")

print("input: ", input)
print("batch id: ",batch_ds.id)
print("endpoint name: ",endpoint_name)

input:  {'type': 'uri_folder', 'path': '/subscriptions/02244025-226f-46fa-91a7-e856e479198d/resourceGroups/rg-analitica-formacion-dev/providers/Microsoft.MachineLearningServices/workspaces/aml-analitica-formacion-dev/data/input_diabetes_folder/versions/1'}
batch id:  /subscriptions/02244025-226f-46fa-91a7-e856e479198d/resourceGroups/rg-analitica-formacion-dev/providers/Microsoft.MachineLearningServices/workspaces/aml-analitica-formacion-dev/datastores/workspaceblobstore
endpoint name:  andres-diabetes-batch-medby


In [10]:
import time
TS = int(time.time())
FILE_RESULT_NAME = f'result_{TS}.json'

job = ml_client.batch_endpoints.invoke(
    endpoint_name=endpoint_name,
    input=input
)

In [13]:
scoring_job = list(ml_client.jobs.list(parent_job_name=job.name))[0]

ml_client.jobs.download(name=scoring_job.name, download_path=".", output_name="score")

Downloading artifact azureml://datastores/workspaceblobstore/paths/azureml/6e820e2c-ba34-442c-9758-a6815589239f/score/ to named-outputs\score


In [14]:
import pandas as pd
df = pd.read_csv("named-outputs/score/predictions.csv")

In [15]:
df

Unnamed: 0,{'predictions': ['diabetes','diabetes','no diabetes','no diabetes']}
