In [98]:
import configparser

from azure.ai.ml import Input, load_component, MLClient
from azure.ai.ml.entities import AmlCompute
from azure.ai.ml.dsl import pipeline
from azure.identity import DefaultAzureCredential

In [99]:
import os
from azure.identity import DefaultAzureCredential
from azure.identity import AzureCliCredential
from azure.ai.ml import automl, Input, MLClient, command

from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities import Data
from azure.ai.ml.automl import (
    classification,
    ClassificationPrimaryMetrics,
    ClassificationModels,
)

In [100]:
import configparser
config = configparser.ConfigParser()
config.read('Azure_credentials.cfg')
subscription_id = config.get('Azure', 'subscription_id')
resource_group = config.get('Azure', 'resource_group')
workspace_name = config.get('Azure', 'workspace')
datastore_name = config.get('Azure', 'datastore_name')
DataStoreURI = config.get('Azure', 'DataStoreURI')
#print(subscription_id, resource_group, workspace_name, datastore_name, DataStoreURI)

In [101]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

# authenticate
credential = DefaultAzureCredential()
# # Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name,
)
cpu_cluster = None

Cargamos el datset al URI-storage

In [102]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

# update the 'my_path' variable to match the location of where you downloaded the data on your
# local filesystem

my_path = "./Data/water_potability_ds.csv" # ESTA ES LA DIRECCION D ENUTRO FOLDER ALADO DE NUTROS NOOTEBOOK
# set the version number of the data asset
v1 = "initial"

my_data = Data(
    name="water_potability_ds",
    version=v1,
    description="water_potability_ds",
    path=my_path,
    type=AssetTypes.URI_FILE,
)

## create data asset if it doesn't already exist:
try:
    data_asset = ml_client.data.get(name="water_potability_ds", version=v1)
    print(
        f"Data asset already exists. Name: {my_data.name}, version: {my_data.version}"
    )
except:
    ml_client.data.create_or_update(my_data)
    print(f"Data asset created. Name: {my_data.name}, version: {my_data.version}")

Data asset already exists. Name: water_potability_ds, version: initial


In [103]:
from azure.ai.ml.entities import AmlCompute
cpu_compute_target = "cpu-cluster"

try:
     cpu_cluster = ml_client.compute.get(cpu_compute_target)
     print(
         f"You already have a cluster named {cpu_compute_target}, we'll reuse it as is."
     )

except Exception:
     print("Creating a new cpu compute target...")
     cpu_cluster = AmlCompute(
         name=cpu_compute_target,
         type="amlcompute",
         size="STANDARD_DS3_V2",
         min_instances=0,
         max_instances=4,
         idle_time_before_scale_down=180,
         tier="Dedicated",
     )
     print(
         f"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}"
     )
     cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)

You already have a cluster named cpu-cluster, we'll reuse it as is.


In [104]:
component_names = ['description', 'clean', 'correlation', 'split', 'logistic_regression_train', 'decission_trees_train', 'score', 'evaluate']
components = {}
for name in component_names:
    component = load_component(source=f"./components/{name}_component/{name}.yml")
    components[name] = ml_client.create_or_update(component)


Uploading src (0.0 MBs):   0%|          | 0/1388 [00:00<?, ?it/s]Uploading src (0.0 MBs): 100%|██████████| 1388/1388 [00:00<00:00, 13231.90it/s]Uploading src (0.0 MBs): 100%|██████████| 1388/1388 [00:00<00:00, 12826.99it/s]




In [105]:
@pipeline(
    default_compute=cpu_compute_target
)
def water_potability_logistic_regression(pipeline_input_data):
    correlation_node = components['correlation'](
        data=pipeline_input_data
    )
    clean_node = components['clean'](
        data=pipeline_input_data
    )
    split_node = components['split'](
        split_data=clean_node.outputs.clean_data_output
    )
    logistic_regression_train_node = components['logistic_regression_train'](
        train_data=split_node.outputs.train_output,
        objective='Potability'
    )
    score_node = components['score'](
        model=logistic_regression_train_node.outputs.model_output,
        test_data=split_node.outputs.test_output,
    )
    evaluate_node = components['evaluate'](
        test_data=split_node.outputs.test_output,
        predict_data=score_node.outputs.predict_output,
        objective='Potability',
        target_name='Potable',
    )
    return {
        'pairplot': correlation_node.outputs.pairplot_image_output,
        'model': logistic_regression_train_node.outputs.model_output,
        'report': evaluate_node.outputs.report_output
    }

In [95]:
data_asset = ml_client.data.get(name=datastore_name, version=v1)
water_potability = Input(type="uri_file", path=data_asset.path)
pipeline = water_potability_logistic_regression(pipeline_input_data=water_potability)

In [96]:
pipeline_job = ml_client.jobs.create_or_update(
    pipeline,
    experiment_name='pipeline-exp',
)
ml_client.jobs.stream(pipeline_job.name)

RunId: blue_squash_vqklcqmk9w
Web View: https://ml.azure.com/runs/blue_squash_vqklcqmk9w?wsid=/subscriptions/7445df36-7a0b-4835-915a-2215e66fff00/resourcegroups/RESS-TRABAJO2/workspaces/WS-TRABAJO2

Streaming logs/azureml/executionlogs.txt

[2023-11-08 15:00:52Z] Submitting 2 runs, first five are: 11929846:69ab0ecd-1e6a-43e7-b7be-de56b639f4fc,fb47725b:0695d85a-ca2f-4a2b-bd7a-77de6fca89cd
[2023-11-08 15:00:55Z] Completing processing run id 0695d85a-ca2f-4a2b-bd7a-77de6fca89cd.
[2023-11-08 15:00:55Z] Completing processing run id 69ab0ecd-1e6a-43e7-b7be-de56b639f4fc.
[2023-11-08 15:00:56Z] Submitting 1 runs, first five are: ac9f3efe:a2bc1e44-cdec-4c4e-93dc-a8907291f8b1
[2023-11-08 15:01:00Z] Completing processing run id a2bc1e44-cdec-4c4e-93dc-a8907291f8b1.
[2023-11-08 15:01:00Z] Submitting 1 runs, first five are: 947279c6:7687b8e0-3e7a-49a8-ad05-15d9d1d362fe
[2023-11-08 15:01:04Z] Completing processing run id 7687b8e0-3e7a-49a8-ad05-15d9d1d362fe.
[2023-11-08 15:01:04Z] Submitting 1 runs,

In [97]:
output = ml_client.jobs.download(name=pipeline_job.name, download_path='./pipeline_output', all=True)

Downloading artifact azureml://subscriptions/7445df36-7a0b-4835-915a-2215e66fff00/resourcegroups/RESS-TRABAJO2/workspaces/WS-TRABAJO2/datastores/workspaceblobstore/paths/azureml/9693a7fb-9498-4324-89bb-461ac19bd679/model_output to pipeline_output/named-outputs/model
Downloading artifact azureml://subscriptions/7445df36-7a0b-4835-915a-2215e66fff00/resourcegroups/RESS-TRABAJO2/workspaces/WS-TRABAJO2/datastores/workspaceblobstore/paths/azureml/5f0d3a2c-c56e-459d-ac94-19c8d90948d4/report_output to pipeline_output/named-outputs/report
Downloading artifact azureml://subscriptions/7445df36-7a0b-4835-915a-2215e66fff00/resourcegroups/RESS-TRABAJO2/workspaces/WS-TRABAJO2/datastores/workspaceblobstore/paths/azureml/e78673ab-c936-4fd6-84e5-80e9f6e03e4a/pairplot_image_output/ to pipeline_output/named-outputs/pairplot
Downloading artifact azureml://datastores/workspaceartifactstore/paths/ExperimentRun/dcid.blue_squash_vqklcqmk9w/ to pipeline_output/artifacts
Bad pipe message: %s [b']i\x98\xa7\xf9aL\