In [100]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml.dsl import pipeline
from azure.ai.ml.entities import Job
from azure.ai.ml import load_component
from azure.ai.ml import Input, Output
from azure.ai.ml.entities import PipelineJob
from azure.ai.ml.constants import AssetTypes

In [3]:
try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

In [4]:
# Get a handle to workspace
ml_client = MLClient.from_config(credential=credential)

Found the config file in: .\config.json


In [215]:
prepare_data = load_component(source="split_info.yaml")

In [216]:
logistic_regression = load_component(source="logistic_regression.yaml")

In [217]:
path_input = "azureml://subscriptions/3de29edd-5970-4662-b0fb-929561c50c41/resourcegroups/riskanalyzers-rg/workspaces/riskanalyzers-mlw/datastores/workspaceblobstore/paths/UI/2024-05-25_232652_UTC/UCI_Credit_Card.csv"

In [218]:
@pipeline()
def training_pipeline(input_data: Input):
    # Paso 1: Preparar los datos utilizando el componente de división de datos
    prepare_data_step = prepare_data(input_data=input_data)
    
    # Paso 2: Entrenar el modelo utilizando el componente de regresión logística
    train_model_step = logistic_regression(train_data=prepare_data_step.outputs.train_data,
                                           test_data=prepare_data_step.outputs.test_data)
    
    return {
        "output_train_pre_data": prepare_data_step.outputs.train_data,
        "output_test_pre_data": prepare_data_step.outputs.test_data,
        "output_preds": train_model_step.outputs.predictions
    }

In [219]:
# Crear y enviar la canalización
pipeline_job = training_pipeline(input_data=Input(type=AssetTypes.URI_FILE, path=path_input))

In [220]:
# set pipeline level compute
pipeline_job.settings.default_compute = "juan-risk"
# set pipeline level datastore
pipeline_job.settings.default_datastore = "workspaceblobstore"

In [221]:
print(pipeline_job)

display_name: training_pipeline
type: pipeline
inputs:
  input_data:
    type: uri_file
    path: azureml://subscriptions/3de29edd-5970-4662-b0fb-929561c50c41/resourcegroups/riskanalyzers-rg/workspaces/riskanalyzers-mlw/datastores/workspaceblobstore/paths/UI/2024-05-25_232652_UTC/UCI_Credit_Card.csv
outputs:
  output_train_pre_data:
    type: uri_file
  output_test_pre_data:
    type: uri_file
  output_preds:
    type: uri_file
jobs:
  prepare_data_step:
    type: command
    inputs:
      input_data:
        path: ${{parent.inputs.input_data}}
    outputs:
      train_data: ${{parent.outputs.output_train_pre_data}}
      test_data: ${{parent.outputs.output_test_pre_data}}
    component:
      $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
      name: split_data
      version: '2'
      display_name: Split data
      type: command
      inputs:
        input_data:
          type: uri_file
      outputs:
        train_data:
          type: uri_file
   

In [222]:
# submit job to workspace
pipeline_job = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="training_pipeline"
)

Uploading pipeline (2.9 MBs): 100%|##########| 2896962/2896962 [00:05<00:00, 566657.26it/s]


