### Library imports

In [68]:
from azure.identity import DefaultAzureCredential           # Simplified way to obtain credentials
from azure.identity import InteractiveBrowserCredential     # When authentication is done through a web browser

from azure.ai.ml import MLClient            # Interating with Azure ML services (datasets, moels, ...)
from azure.ai.ml.dsl import pipeline        # Define machine learning pipelines
from azure.ai.ml import load_component      # Load Azure ML components

### Configure credentials

In [69]:
try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

### Get a handle to the workspace

In [70]:
# Get a handle to workspace
ml_client = MLClient.from_config(credential=credential)

# Retrieve an already attached Azure Machine Learning Compute
# Demonstrates it connects correctly
cpu_compute_target = "default-compute-poc"
print(ml_client.compute.get(cpu_compute_target))

Found the config file in: /config.json


enable_node_public_ip: true
id: /subscriptions/27a6aae6-ce60-4ae4-a06e-cfe9c1e824d4/resourceGroups/RG-ADA-MLOPS-POC/providers/Microsoft.MachineLearningServices/workspaces/azu-ml-ada-mlops-poc/computes/default-compute-poc
identity:
  principal_id: 1b981572-d59d-4e6e-a5e2-116ca8b1d40e
  tenant_id: d5b4529b-7457-49d9-931b-7c7f9b9eed54
  type: system_assigned
idle_time_before_shutdown: PT60M
idle_time_before_shutdown_minutes: 60
last_operation:
  operation_name: Start
  operation_status: Succeeded
  operation_time: '2023-11-22T08:11:35.087Z'
  operation_trigger: User
location: westeurope
name: default-compute-poc
network_settings:
  private_ip_address: 10.0.0.4
  public_ip_address: 20.8.226.151
os_image_metadata:
  current_image_version: 23.10.09
  is_latest_os_image_version: false
  latest_image_version: 23.10.24
provisioning_state: Succeeded
services:
- display_name: Jupyter
  endpoint_uri: https://default-compute-poc.westeurope.instances.azureml.ms/tree/
- display_name: Jupyter Lab
  en

### Prepare input data

In [71]:
from azure.ai.ml import Input                       # Specify inputs to ML jobs
from azure.ai.ml.constants import AssetTypes        # Provide standarized identifiers for assets

available_data = Input(
    type=AssetTypes.URI_FILE,
    path="azureml:data_available:2"
)

### Load components

In [72]:
# Reload modules automatically
%load_ext autoreload
%autoreload 2

# Component list to update or create them later
components = []

from feature_selection.feature_selection_component import feature_selection_component
components.append(["feature_selection", feature_selection_component])

from feature_engineering.feature_engineering_component import feature_engineering_component
components.append(['feature_engineering', feature_engineering_component])

from outlier_treatment.outlier_treatment_component import outlier_treatment_component
components.append(['outlier_treatment', outlier_treatment_component])

from split_data.split_data_component import split_data_component
components.append(['split_data', split_data_component])

from imputation.imputation_component import imputation_component
components.append(['imputation', imputation_component])

from normalization.normalization_component import normalization_component
components.append(['normalization', normalization_component])

from encoding.encoding_component import encoding_component
components.append(['encoding', encoding_component])

from training.training_component import training_component
components.append(['training', training_component])

from scoring.scoring_component import scoring_component
components.append(['scoring', scoring_component])

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Build pipeline

In [74]:
@pipeline(
    default_compute=cpu_compute_target,
)

def marketing_campaign_prediction(pipeline_input_data):
    
    # Feature selection
    feature_selection_node = feature_selection_component(input_data=pipeline_input_data)

    # Feature engineering
    feature_engineering_node = feature_engineering_component(input_data=feature_selection_node.outputs.output_data)
    
    # Outlier treatment
    outlier_treatment_node = outlier_treatment_component(input_data=feature_engineering_node.outputs.output_data)
    
    # Split data
    split_data_node = split_data_component(input_data=outlier_treatment_node.outputs.output_data)
    
    # Imputation
    imputation_node = imputation_component(X_train_input=split_data_node.outputs.X_train_data,
                                           X_test_input=split_data_node.outputs.X_test_data)
    # Normalization
    normalization_node = normalization_component(X_train_input=imputation_node.outputs.X_train_output,
                                                 X_test_input=imputation_node.outputs.X_test_output)
    # Encoding
    encoding_node = encoding_component(X_train_input=normalization_node.outputs.X_train_output,
                                       X_test_input=normalization_node.outputs.X_test_output)
    # Training
    training_node = training_component(X_train_input=encoding_node.outputs.X_train_output,
                                       y_train_input=split_data_node.outputs.y_train_data)
    # Scoring
    scoring_node = scoring_component(X_test_input=encoding_node.outputs.X_test_output,
                                     y_test_input=split_data_node.outputs.y_test_data,
                                     model_input=training_node.outputs.model_output)

# create a pipeline
pipeline_job = marketing_campaign_prediction(pipeline_input_data=available_data)

### Submit pipeline job

In [75]:
pipeline_job = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="marketing_pipeline_test_3"
)

pipeline_job

[32mUploading scoring (0.0 MBs): 100%|██████████| 1881/1881 [00:00<00:00, 11779.59it/s]
[39m



Experiment,Name,Type,Status,Details Page
marketing_pipeline_test_3,mango_night_fl4h3tjrqc,pipeline,Preparing,Link to Azure Machine Learning studio


### Register components

In [80]:
# Register each component if it's not registered already
for component in components:
    try:
        # try get back the component
        retrieved_component = ml_client.components.get(name=component[0], version="1")
    except:
        # if not exists, register component using following code
        retrieved_component = ml_client.components.create_or_update(component[1])

In [81]:
# List the registered components
for c in ml_client.components.list():
    print(c)

creation_context:
  created_at: '2023-11-22T11:14:22.832285+00:00'
  created_by: Alejandro Donaire
  created_by_type: User
  last_modified_at: '2023-11-22T11:14:22.906819+00:00'
  last_modified_by: Alejandro Donaire
  last_modified_by_type: User
description: ''
id: azureml:/subscriptions/27a6aae6-ce60-4ae4-a06e-cfe9c1e824d4/resourceGroups/rg-ada-mlops-poc/providers/Microsoft.MachineLearningServices/workspaces/azu-ml-ada-mlops-poc/components/scoring
name: scoring

creation_context:
  created_at: '2023-11-22T11:14:21.153766+00:00'
  created_by: Alejandro Donaire
  created_by_type: User
  last_modified_at: '2023-11-22T11:14:21.233687+00:00'
  last_modified_by: Alejandro Donaire
  last_modified_by_type: User
description: ''
id: azureml:/subscriptions/27a6aae6-ce60-4ae4-a06e-cfe9c1e824d4/resourceGroups/rg-ada-mlops-poc/providers/Microsoft.MachineLearningServices/workspaces/azu-ml-ada-mlops-poc/components/training
name: training

creation_context:
  created_at: '2023-11-22T11:14:19.080083+00