In [39]:
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
from azureml.core import Workspace, Dataset

subscription_id = '20d4fdf3-6a4b-4f0b-a842-bd7392136332'
resource_group = 'cienciadatos'
workspace_name = 'azureml'

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='AirlinesDelay')
df = dataset.to_pandas_dataframe()

In [19]:
import pandas as pd
import random
def data_generator(context:pd.DataFrame, num_samples:int):

    generated_data = {
        'Flight' : [random.choice(context['Flight']) for _ in range(num_samples)],
        'Time': [float(random.randint(min(df['Time']), max(df['Time']))) for _ in range(num_samples)],
        'Length': [float(random.randint(min(df['Length']), max(df['Length']))) for _ in range(num_samples)],
        'Airline': [random.choice(context['Airline']) for _ in range(num_samples)],
        'AirportFrom': [random.choice(context['AirportFrom']) for _ in range(num_samples)],
        'AirportTo': [random.choice(context['AirportTo']) for _ in range(num_samples)],
        'DayOfWeek': [random.choice(context['DayOfWeek']) for _ in range(num_samples)]
    }
    
    return pd.DataFrame(data=generated_data)


In [23]:
prueba = data_generator(context=df, num_samples=10)

In [40]:

for i in range(100):

    data = data_generator(context=df, num_samples=1)
    data.to_csv(f'dataToPredict/data_{i}.csv', index=False, header=True)
    

## upload data

In [20]:
from azureml.core import Workspace

ws = Workspace.from_config()


generated_data = data_generator(context=df, num_samples=1000)
datastore = ws.get_default_datastore()

train_data_reg = Dataset.Tabular.register_pandas_dataframe(generated_data,
                                                    target=datastore,
                                                    name='data_to_predict')

Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/1041c6ea-2a7f-4ce0-8c6d-1057acaa3b91/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


In [None]:
print("Uploading files to datastore...")
default_ds = ws.get_default_datastore()
default_ds.upload(src_dir="dataToPredict", target_path="batch-data", overwrite=True, show_progress=True)

# Register a dataset for the input data
batch_data_set = Dataset.File.from_files(path=(default_ds, 'batch-data/'), validate=False)
try:
    batch_data_set = batch_data_set.register(workspace=ws, 
                                             name='batch-data',
                                             description='batch data',
                                             create_new_version=True)
except Exception as ex:
    print(ex)

print("Done!")

## Batch deployment

In [74]:
from azure.ai.ml import MLClient, Input
from azure.ai.ml.entities import BatchEndpoint, BatchDeployment, Model, AmlCompute, Data, CodeConfiguration
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential
from azure.ai.ml.constants import BatchDeploymentOutputAction
from azure.ai.ml.entities import BatchRetrySettings

In [59]:
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential)

Found the config file in: .\config.json


In [60]:
endpoint = BatchEndpoint(
    name='my-first-endpoint',
    description='my very first endpoint'
)

In [61]:
ml_client.batch_endpoints.begin_create_or_update(endpoint).result()

<azure.ai.ml._restclient.v2022_05_01.models._models_py3.BatchEndpointData at 0x14aa7118df0>

In [29]:
enva = ml_client.environments.get(name='experiment_env', label='latest')
model = ml_client.models.get(name='airlines_model', label='latest')
compute_name = 'prueba-DS'

In [96]:

deployment = BatchDeployment(
    name="first-deployment",
    description="airline classifier",
    endpoint_name=endpoint.name,
    model=model,
    code_path='./',
    scoring_script='deploy/batch_driver.py',
    compute=compute_name,
    environment=enva,
    instance_count=1,
    max_concurrency_per_instance=1,
    mini_batch_size=5,
    output_action=BatchDeploymentOutputAction.APPEND_ROW,
    output_file_name="predictions.csv",
    retry_settings=BatchRetrySettings(max_retries=1, timeout=300),
    logging_level="info",
)

In [71]:
ml_client.batch_deployments.begin_create_or_update(deployment).result()

[32mUploading AzureDataSchool (0.48 MBs): 100%|##########| 484050/484050 [00:04<00:00, 112203.60it/s]
[39m



BatchDeployment({'deployment_type': 'Model', 'job_definition': None, 'endpoint_name': 'my-first-endpoint', 'type': None, 'name': 'first-deployment', 'description': 'airline classifier', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/resourceGroups/cienciadatos/providers/Microsoft.MachineLearningServices/workspaces/azureml/batchEndpoints/my-first-endpoint/deployments/first-deployment', 'Resource__source_path': None, 'base_path': 'c:\\Users\\pablo.tamayo\\Desktop\\DataSchool\\Azure\\AzureDataSchool', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x0000014AA2E2A7C0>, 'model': '/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/resourceGroups/cienciadatos/providers/Microsoft.MachineLearningServices/workspaces/azureml/models/airlines_model/versions/4', 'code_configuration': {'code': '/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/resourceGroups/cienciadatos/providers/Microsoft.MachineL

In [32]:
endpoint = ml_client.batch_endpoints.get(endpoint.name)
endpoint.defaults.deployment_name = deployment.name
ml_client.batch_endpoints.begin_create_or_update(endpoint)

<azure.core.polling._poller.LROPoller at 0x14aa71ceee0>

In [95]:
data_to_predict = ml_client.data.get(name='data_to_predict', label='latest')

In [100]:
data_to_predict.id

'/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/resourceGroups/cienciadatos/providers/Microsoft.MachineLearningServices/workspaces/azureml/data/data_to_predict/versions/2'

In [97]:
input_data = Input(type=AssetTypes.MLTABLE, path=data_to_predict.id)

In [101]:
job = ml_client.batch_endpoints.invoke(
   endpoint_name=endpoint.name,
   deployment_name = deployment.name,
   input={
      'prueba': 2
   },
)

ValidationException: Unsupported input type please use a dictionary of either a path on the datastore, public URI, a registered data asset, or a local folder path.

## Online deployment

In [75]:
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment, Model, Environment, CodeConfiguration
from azure.identity import DefaultAzureCredential

credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential)

Found the config file in: .\config.json


In [77]:
# Define an endpoint name
endpoint_name = "my-endpoint"

# Example way to define a random name
import datetime

endpoint_name = "endpt-" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint_online = ManagedOnlineEndpoint(
    name = endpoint_name, 
    description="this is a sample endpoint",
    auth_mode="key"
)

In [78]:
enva = ml_client.environments.get(name='experiment_env', label='latest')
model = ml_client.models.get(name='airlines_model', label='latest')

In [93]:
blue_deployment = ManagedOnlineDeployment(
    name="online-deploy",
    endpoint_name=endpoint_online.name,
    model=model,
    environment=enva,
    code_configuration=CodeConfiguration(
        code="./", scoring_script="deploy/online_driver.py"
    ),
    instance_type="Standard_DS3_v2",
    instance_count=1,
)

In [90]:
ml_client.begin_create_or_update(endpoint_online).result()

ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://endpt-04041349371390.eastus.inference.ml.azure.com/score', 'openapi_uri': 'https://endpt-04041349371390.eastus.inference.ml.azure.com/swagger.json', 'name': 'endpt-04041349371390', 'description': 'this is a sample endpoint', 'tags': {}, 'properties': {'azureml.onlineendpointid': '/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/resourcegroups/cienciadatos/providers/microsoft.machinelearningservices/workspaces/azureml/onlineendpoints/endpt-04041349371390', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/providers/Microsoft.MachineLearningServices/locations/eastus/mfeOperationsStatus/oe:bf392800-56a0-4a74-95d7-79199bba48e0:e95ae1af-34dc-47a9-86e9-4271afad6e16?api-version=2022-02-01-preview'}, 'print_as_yaml': True, 'id': '/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/resourceGroups/cienciadatos/providers/

In [94]:
ml_client.online_deployments.begin_create_or_update(blue_deployment).result()

Check: endpoint endpt-04041349371390 exists
data_collector is not a known attribute of class <class 'azure.ai.ml._restclient.v2022_02_01_preview.models._models_py3.ManagedOnlineDeployment'> and will be ignored


HttpResponseError: (BadRequest) The request is invalid.
Code: BadRequest
Message: The request is invalid.
Exception Details:	(InferencingClientCreateDeploymentFailed) InferencingClient HttpRequest error, error detail: {"errors":{"VmSize":["Not enough quota available for Standard_DS3_v2 in SubscriptionId 20d4fdf3-6a4b-4f0b-a842-bd7392136332. Current usage/limit: 2/6. Additional needed: 8 Please see troubleshooting guide, available here: https://aka.ms/oe-tsg#error-outofquota"]},"type":"https://tools.ietf.org/html/rfc7231#section-6.5.1","title":"One or more validation errors occurred.","status":400,"traceId":"00-aba4eaaf69f578d147aad6e66d91bcbe-1149f3039850126f-01"}
	Code: InferencingClientCreateDeploymentFailed
	Message: InferencingClient HttpRequest error, error detail: {"errors":{"VmSize":["Not enough quota available for Standard_DS3_v2 in SubscriptionId 20d4fdf3-6a4b-4f0b-a842-bd7392136332. Current usage/limit: 2/6. Additional needed: 8 Please see troubleshooting guide, available here: https://aka.ms/oe-tsg#error-outofquota"]},"type":"https://tools.ietf.org/html/rfc7231#section-6.5.1","title":"One or more validation errors occurred.","status":400,"traceId":"00-aba4eaaf69f578d147aad6e66d91bcbe-1149f3039850126f-01"}
Additional Information:Type: ComponentName
Info: {
    "value": "managementfrontend"
}Type: Correlation
Info: {
    "value": {
        "operation": "aba4eaaf69f578d147aad6e66d91bcbe",
        "request": "25a4e1dbfdeadcb6"
    }
}Type: Environment
Info: {
    "value": "eastus"
}Type: Location
Info: {
    "value": "eastus"
}Type: Time
Info: {
    "value": "2023-04-04T20:04:22.478423+00:00"
}