In [2]:
from azure.ai.ml import load_component
from azure.ai.ml.dsl import pipeline
from azure.ai.ml import MLClient, Input
from azure.identity import DefaultAzureCredential, EnvironmentCredential
from azure.ai.ml.entities import AmlCompute
from dotenv import load_dotenv
import os
import pandas as pd

In [4]:
def get_comput_target(ml_client, name="cpu-cluster", family='Standard_DS2_v2'):
    cpu_compute_target = name
    
    try:
        # let's see if the compute target already exists
        cpu_cluster = ml_client.compute.get(cpu_compute_target)
    except Exception:
        cpu_cluster = AmlCompute(
            name=cpu_compute_target,
            type="amlcompute",
            size=family,
            min_instances=0,
            max_instances=4,
            idle_time_before_scale_down=180,
            tier="Dedicated",
        )
    
        cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster).result()

In [5]:
def get_azure_credential(subscription_id, resource_group, ):
    credential = DefaultAzureCredential()
    ml_client = MLClient.from_config(credential)
    ml_client =  MLClient(credential, subscription_id, resource_group, workspace)
    return ml_client

ml_client = get_azure_credential("subscription_id", "resource_group", "workspace")
data_store_uri_file = "azureml://subscriptions/46169265-43c5-42f4-b171-b27bdd8e5afa/resourcegroups/rchoque/workspaces/rc_ml_test/datastores/workspaceblobstore/paths/UI/2023-11-04_184502_UTC/water_potability.csv"



Found the config file in: /config.json


In [6]:
compute_target = get_comput_target(ml_client, name="cpu-cluster")

In [7]:
clean_component = load_component(source="./components/clean-component/clean.yml")
split_component = load_component(source="./components/split-component/split.yml")

logistic_regression_component = load_component(source="./components/logistic-regression-component/logistic_regression.yml")
logistic_score_component = load_component(source="./components/logistic-score-component/logistic_score.yml")
logistic_eval_component = load_component(source="./components/logistic-eval-component/logistic_eval.yml")





In [8]:
# define a pipeline containing 3 nodes: Prepare data node, train node, and score node
@pipeline(
    default_compute='cpu-cluster',
)

def water_potability_decision_tree_dummy(pipeline_input_data):
    
    clean_node = clean_component(training_data=pipeline_input_data)

    split_node = split_component(clean_data=clean_node.outputs.model_output)


    logistic_regression = logistic_regression_component(training_data=split_node.outputs.training_data)
    
    #train_node.compute = gpu_compute_target

    score_node = logistic_score_component(
        test_data=split_node.outputs.testing_data,
        model_input=logistic_regression.outputs.model_output
    )

    eval_node = logistic_eval_component(
        scoring_result=score_node.outputs.score_output,
        test_data = split_node.outputs.testing_data
    )

    return {
        "pipeline_eval_output": eval_node.outputs.eval_output
    }


In [9]:


# create a pipeline
water_potability_ds =  Input(type="uri_file", path=data_store_uri_file)

pipeline_job = water_potability_decision_tree_dummy(pipeline_input_data=water_potability_ds)


In [10]:
pipeline_job = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="pipeline_water_potability_dummy"
)
pipeline_job

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


In [21]:
# wait until the job completes
ml_client.jobs.stream(pipeline_job.name)


RunId: salmon_cumin_x219mxk1k3
Web View: https://ml.azure.com/runs/salmon_cumin_x219mxk1k3?wsid=/subscriptions/46169265-43c5-42f4-b171-b27bdd8e5afa/resourcegroups/rchoque/workspaces/rc_ml_test


In [32]:
# Download all the outputs of the job
output = ml_client.jobs.download(name=pipeline_job.name, download_path='./pipeline_output', all=True)

Downloading artifact azureml://subscriptions/b24420ec-4741-4157-b7a7-dce7f5d571f4/resourcegroups/DefaultResourceGroup-eastus2/workspaces/ucb-ws/datastores/workspaceblobstore/paths/azureml/56d1fd7e-6be9-4c74-8ea3-b4a6cb73736c/eval_output/ to pipeline_output/named-outputs/pipeline_eval_output
Downloading artifact azureml://datastores/workspaceartifactstore/paths/ExperimentRun/dcid.helpful_tongue_mk8s16t1l6/ to pipeline_output/artifacts
