In [39]:
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
from azureml.core import Workspace, Dataset

subscription_id = '20d4fdf3-6a4b-4f0b-a842-bd7392136332'
resource_group = 'cienciadatos'
workspace_name = 'azureml'

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='AirlinesDelay')
df = dataset.to_pandas_dataframe()

In [19]:
import pandas as pd
import random
def data_generator(context:pd.DataFrame, num_samples:int):

    generated_data = {
        'Flight' : [random.choice(context['Flight']) for _ in range(num_samples)],
        'Time': [float(random.randint(min(df['Time']), max(df['Time']))) for _ in range(num_samples)],
        'Length': [float(random.randint(min(df['Length']), max(df['Length']))) for _ in range(num_samples)],
        'Airline': [random.choice(context['Airline']) for _ in range(num_samples)],
        'AirportFrom': [random.choice(context['AirportFrom']) for _ in range(num_samples)],
        'AirportTo': [random.choice(context['AirportTo']) for _ in range(num_samples)],
        'DayOfWeek': [random.choice(context['DayOfWeek']) for _ in range(num_samples)]
    }
    
    return pd.DataFrame(data=generated_data)


In [23]:
prueba = data_generator(context=df, num_samples=10)

In [40]:

for i in range(100):

    data = data_generator(context=df, num_samples=1)
    data.to_csv(f'dataToPredict/data_{i}.csv', index=False, header=True)
    

## upload data

In [20]:
from azureml.core import Workspace

ws = Workspace.from_config()


generated_data = data_generator(context=df, num_samples=1000)
datastore = ws.get_default_datastore()

train_data_reg = Dataset.Tabular.register_pandas_dataframe(generated_data,
                                                    target=datastore,
                                                    name='data_to_predict')

Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/1041c6ea-2a7f-4ce0-8c6d-1057acaa3b91/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


In [None]:
print("Uploading files to datastore...")
default_ds = ws.get_default_datastore()
default_ds.upload(src_dir="dataToPredict", target_path="batch-data", overwrite=True, show_progress=True)

# Register a dataset for the input data
batch_data_set = Dataset.File.from_files(path=(default_ds, 'batch-data/'), validate=False)
try:
    batch_data_set = batch_data_set.register(workspace=ws, 
                                             name='batch-data',
                                             description='batch data',
                                             create_new_version=True)
except Exception as ex:
    print(ex)

print("Done!")

## Deply models

In [58]:
from azure.ai.ml import MLClient, Input
from azure.ai.ml.entities import BatchEndpoint, BatchDeployment, Model, AmlCompute, Data
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential
from azure.ai.ml.constants import BatchDeploymentOutputAction
from azure.ai.ml.entities import BatchRetrySettings

In [59]:
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential)

Found the config file in: .\config.json


In [60]:
endpoint = BatchEndpoint(
    name='my-first-endpoint',
    description='my very first endpoint'
)

In [61]:
ml_client.batch_endpoints.begin_create_or_update(endpoint).result()

<azure.ai.ml._restclient.v2022_05_01.models._models_py3.BatchEndpointData at 0x14aa7118df0>

In [29]:
enva = ml_client.environments.get(name='experiment_env', label='latest')
model = ml_client.models.get(name='airlines_model', label='latest')
compute_name = 'prueba-DS'

In [66]:

deployment = BatchDeployment(
    name="first-deployment",
    description="airline classifier",
    endpoint_name=endpoint.name,
    model=model,
    code_path='./',
    scoring_script='deploy/batch_driver.py',
    compute=compute_name,
    environment=enva,
    instance_count=1,
    max_concurrency_per_instance=1,
    mini_batch_size=5,
    output_action=BatchDeploymentOutputAction.APPEND_ROW,
    output_file_name="predictions.csv",
    retry_settings=BatchRetrySettings(max_retries=1, timeout=300),
    logging_level="info",
)

In [67]:
ml_client.batch_deployments.begin_create_or_update(deployment).result()

[32mUploading AzureDataSchool (0.48 MBs): 100%|##########| 483402/483402 [00:04<00:00, 108809.58it/s]
[39m



BatchDeployment({'deployment_type': 'Model', 'job_definition': None, 'endpoint_name': 'my-first-endpoint', 'type': None, 'name': 'first-deployment', 'description': 'airline classifier', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/resourceGroups/cienciadatos/providers/Microsoft.MachineLearningServices/workspaces/azureml/batchEndpoints/my-first-endpoint/deployments/first-deployment', 'Resource__source_path': None, 'base_path': 'c:\\Users\\pablo.tamayo\\Desktop\\DataSchool\\Azure\\AzureDataSchool', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x0000014AA30C14F0>, 'model': '/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/resourceGroups/cienciadatos/providers/Microsoft.MachineLearningServices/workspaces/azureml/models/airlines_model/versions/4', 'code_configuration': {'code': '/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/resourceGroups/cienciadatos/providers/Microsoft.MachineL

In [32]:
endpoint = ml_client.batch_endpoints.get(endpoint.name)
endpoint.defaults.deployment_name = deployment.name
ml_client.batch_endpoints.begin_create_or_update(endpoint)

<azure.core.polling._poller.LROPoller at 0x14aa71ceee0>

In [55]:
data_to_predict = ml_client.data.get(name='batch-data', label='latest')

In [68]:
input_data = Input(type=AssetTypes.URI_FOLDER, path=data_to_predict.id)

In [69]:
job = ml_client.batch_endpoints.invoke(
   endpoint_name=endpoint.name,
   deployment_name = deployment.name,
   input=input_data,
)

## De forma antigua

In [45]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core import Experiment, ScriptRunConfig, Environment
# Create a Python environment for the experiment (from a .yml file)
batch_env = Environment.from_conda_specification("experiment_env",  './env/environment.yml')

In [46]:
batch_data_set = Dataset.File.from_files(path=(default_ds, 'batch-data/'), validate=False)

In [52]:
from azureml.pipeline.steps import ParallelRunConfig, ParallelRunStep
from azureml.data import OutputFileDatasetConfig
from azureml.core.compute import ComputeTarget, ComputeInstance
from azureml.core.compute_target import ComputeTargetException

# nombre del cluster
compute_name = "prueba-DS"
ws = Workspace.from_config()
# verificación de exixtencia del cluster
aml_compute = ComputeTarget(workspace=ws, name=compute_name)

output_dir = OutputFileDatasetConfig(name='inferences')

parallel_run_config = ParallelRunConfig(
    source_directory="./",
    entry_script="deploy/batch_driver.py",
    mini_batch_size="5",
    error_threshold=10,
    output_action="append_row",
    environment=batch_env,
    compute_target=aml_compute,
    node_count=1)

parallelrun_step = ParallelRunStep(
    name='batch-score',
    parallel_run_config=parallel_run_config,
    inputs=[batch_data_set.as_named_input('batch')],
    output=output_dir,
    arguments=[],
    allow_reuse=True
)

print('Steps defined')

Steps defined


In [53]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(workspace=ws, steps=[parallelrun_step])
pipeline_run = Experiment(ws, 'inference-batch').submit(pipeline)
pipeline_run.wait_for_completion(show_output=True)

Created step batch-score [f8ce155a][bb21fd3f-a9a3-43ec-9c0f-1d882b36e361], (This step will run and generate new outputs)
Submitted PipelineRun a6608ca2-df42-4aef-88f5-e94cd77317d3
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/a6608ca2-df42-4aef-88f5-e94cd77317d3?wsid=/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/resourcegroups/cienciadatos/workspaces/azureml&tid=c7db5234-eb19-42fd-8840-a85829ea4628
PipelineRunId: a6608ca2-df42-4aef-88f5-e94cd77317d3
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/a6608ca2-df42-4aef-88f5-e94cd77317d3?wsid=/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/resourcegroups/cienciadatos/workspaces/azureml&tid=c7db5234-eb19-42fd-8840-a85829ea4628
PipelineRun Status: Running


StepRunId: 372f8937-10f6-4714-951f-01713fe20319
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/372f8937-10f6-4714-951f-01713fe20319?wsid=/subscriptions/20d4fdf3-6a4b-4f0b-a842-bd7392136332/resourcegroups/cienciadatos/workspace

'Finished'