In [1]:
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient

# authenticate
credential = DefaultAzureCredential()

SUBSCRIPTION="3d030016-c1e4-4d3a-b18f-a967ca328031"
RESOURCE_GROUP="upmpractica"
WS_NAME="upmpracticamlws"
# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION,
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WS_NAME,
)

In [2]:
# Verify that the handle works correctly.  
# If you ge an error here, modify your SUBSCRIPTION, RESOURCE_GROUP, and WS_NAME in the previous cell.
ws = ml_client.workspaces.get(WS_NAME)
print(ws.location,":", ws.resource_group)

uksouth : upmpractica


In [3]:
import os

train_src_dir = "./src"
os.makedirs(train_src_dir, exist_ok=True)

In [4]:
%%writefile {train_src_dir}/main.py
import pandas as pd
import numpy as np
import argparse
import os

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import *
from sklearn.impute import SimpleImputer

import mlflow
import mlflow.sklearn


def main():

    # input and output arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--data", type=str, help="path to input data")
    parser.add_argument("--test_train_ratio", type=float, required=False, default=0.3)
    parser.add_argument("--n_estimators", required=False, default=50, type=int)
    parser.add_argument("--registered_model_name", type=str, help="model name")
    args = parser.parse_args()

    # Start Logging
    mlflow.start_run()

    # enable autologging
    mlflow.sklearn.autolog()

    # Cargar los datos
    print(" ".join(f"{k}={v}" for k, v in vars(args).items()))

    print("input data:", args.data)

    cleanFlightsDF = pd.read_csv(args.data)

    mlflow.log_metric("num_samples", cleanFlightsDF.shape[0])
    mlflow.log_metric("num_features", cleanFlightsDF.shape[1] - 1)

    # Definir las categorías: <15, entre 15 y 60, >60
    splitsDelays = [-float("inf"), 15, 60, float("inf")]
    cleanFlightsDF['ArrDelayBucketed'] = pd.cut(cleanFlightsDF['ArrDelay'], bins=splitsDelays, labels=False)

    # Dividir los datos en conjuntos de entrenamiento y prueba
    trainDF, testDF = train_test_split(cleanFlightsDF, test_size=args.test_train_ratio, random_state=123)

    # Definir columnas numéricas y categóricas
    numeric_features = ['DepDelay', 'DayOfWeek', 'DepTime']
    categorical_features = ['Origin', 'Dest', 'TailNum']

    # Construir el pipeline para características numéricas
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())])

    # Construir el pipeline para características categóricas
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))])

    # Combinar los pipelines
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)])

    model = RandomForestClassifier(n_estimators=args.n_estimators, max_depth=None, max_features='auto', random_state=123)

    # Definir el pipeline completo
    pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                               ('classifier', model)])

    # Entrenar el modelo RandomForest
    pipeline.fit(trainDF, trainDF['ArrDelayBucketed'])

    # Realizar predicciones en el conjunto de prueba
    predictions_test = pipeline.predict(testDF)
    test_accuracy = accuracy_score(testDF['ArrDelayBucketed'], predictions_test)
    print("Test Accuracy:", test_accuracy)

    # Registering the model to the workspace
    print("Registering the model via MLFlow")
    mlflow.sklearn.log_model(
        sk_model=pipeline,
        registered_model_name=args.registered_model_name,
        artifact_path=args.registered_model_name,
    )

    # Saving the model to a file
    mlflow.sklearn.save_model(
        sk_model=pipeline,
        path=os.path.join(args.registered_model_name, "trained_model"),
    )

    mlflow.end_run()

if __name__ == "__main__":
    main()

Overwriting ./src/main.py


In [5]:
from azure.ai.ml import MLClient, command, Input
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.identity import DefaultAzureCredential

ml_client = MLClient.from_config(credential=DefaultAzureCredential())

data_asset = ml_client.data.get("cleanFlightsDF_train_test", version="2")

registered_model_name = "Trained_flights_model"

job = command(
    inputs=dict(
        data=Input(
            type=AssetTypes.URI_FILE,
            mode=InputOutputModes.RO_MOUNT,
            path=data_asset.id,
        ),
        test_train_ratio=0.3,
        n_estimators=50,
        registered_model_name=registered_model_name,
    ),
    code="./src/",  # location of source code
    command="python main.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --n_estimators ${{inputs.n_estimators}} --registered_model_name ${{inputs.registered_model_name}}",
    environment="azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest",
    compute="upmpractica-computeds",
    display_name="flights_default_prediction",
    )

Found the config file in: /config.json


In [6]:
ml_client.create_or_update(job)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Experiment,Name,Type,Status,Details Page
alejandro,tender_map_dvcf6dv5cm,command,Starting,Link to Azure Machine Learning studio


In [7]:
import uuid

# Creating a unique name for the endpoint
online_endpoint_name = "fligths-endpoint-" + str(uuid.uuid4())[:8]

In [8]:
# Expect the endpoint creation to take a few minutes
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
)

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="this is an online endpoint",
    auth_mode="key",
    tags={
        "training_dataset": "credit_defaults",
        "model_type": "sklearn.RandomForestClassifier",
    },
)

endpoint = ml_client.online_endpoints.begin_create_or_update(endpoint).result()

print(f"Endpoint {endpoint.name} provisioning state: {endpoint.provisioning_state}")

Endpoint fligths-endpoint-37fddcb3 provisioning state: Succeeded


In [9]:
endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)

print(
    f'Endpoint "{endpoint.name}" with provisioning state "{endpoint.provisioning_state}" is retrieved'
)

Endpoint "fligths-endpoint-37fddcb3" with provisioning state "Succeeded" is retrieved


In [10]:
# Let's pick the latest version of the model
registered_model_name = "Trained_flights_model"

latest_model_version = max([int(m.version) for m in ml_client.models.list(name=registered_model_name)])
print(f'Latest model is version "{latest_model_version}" ')

Latest model is version "10" 


In [11]:
# picking the model to deploy. Here we use the latest version of our registered model
model = ml_client.models.get(name=registered_model_name, version=latest_model_version)

# Expect this deployment to take approximately 6 to 8 minutes.
# create an online deployment.
# if you run into an out of quota error, change the instance_type to a comparable VM that is available.
# Learn more on https://azure.microsoft.com/pricing/details/machine-learning/.
blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=model,
    instance_type="Standard_DS3_v2",
    instance_count=1,
)

blue_deployment = ml_client.begin_create_or_update(blue_deployment).result()

Check: endpoint fligths-endpoint-37fddcb3 exists


............................................................................................................

In [12]:
deploy_dir = "./deploy"
os.makedirs(deploy_dir, exist_ok=True)

In [18]:
import pandas as pd
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

ml_client = MLClient.from_config(credential=DefaultAzureCredential())
data_asset = ml_client.data.get("cleanFlightsDF_validation", version="1")

validation = pd.read_csv(data_asset.path)
columns = validation.columns.tolist()
index = validation.index.tolist()
data = validation.values.tolist()

Found the config file in: /config.json


In [19]:
import json

json_file_path = f'{deploy_dir}/sample-request.json'

request_body = {
    "input_data": {
        "columns": columns,
        "index": index,
        "data": data
    }
}

with open(json_file_path, 'w') as json_file:
    json.dump(request_body, json_file)

In [20]:
# test the blue deployment with some sample data
ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    request_file="./deploy/sample-request.json",
    deployment_name="blue",
)

'[0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [21]:
ml_client.online_endpoints.begin_delete(name=online_endpoint_name)

<azure.core.polling._poller.LROPoller at 0x7fc0ac097460>

.....