# 安裝套件與確認 SDK 版本

In [1]:
import os
import azureml.core
from azureml.core import (
    Workspace,
    Experiment,
    Dataset,
    Datastore,
    ComputeTarget,
    Environment,
    ScriptRunConfig
)
from azureml.data import OutputFileDatasetConfig
from azureml.core.compute import AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.pipeline.core import PublishedPipeline
from azureml.pipeline.core import PipelineData

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.48.0


# 配置工作環境

In [None]:
workspace = Workspace.from_config()
print(f"workspace: {workspace}")

exp = Experiment(workspace=workspace, name="iris-fashion")

# 連接已有的 Blob

In [3]:
from azureml.core import Datastore
blob_datastore_name = 'iris'
blob_account_name= ''
blob_account_key = ''
blob_container_name = ''
blob_datastore = Datastore.register_azure_blob_container(workspace=workspace, 
                                                         datastore_name=blob_datastore_name, 
                                                         container_name=blob_container_name, 
                                                         account_name=blob_account_name,
                                                         account_key=blob_account_key)

# 建立運算叢集

In [4]:
cluster_name = "cpu-cluster-4core"

found = False

cts = workspace.compute_targets
if cluster_name in cts and cts[cluster_name].type == "AmlCompute":
    found = True
    print("Found existing compute target.")
    compute_target = cts[cluster_name]
if not found:
    print("Creating a new compute target...")
    compute_config = AmlCompute.provisioning_configuration(
        vm_size= "STANDARD_DS3_V2",
        max_nodes=1,
    )
    compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=10
    )

Creating a new compute target...
InProgress..
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


# 設定執行環境的 config

In [5]:
run_config = RunConfiguration()
run_config.environment.docker.enabled = True
conda = CondaDependencies()
conda.add_pip_package('azureml-sdk[automl]')
conda.add_pip_package('opencv-python-headless')
conda.add_pip_package('tensorflow')
run_config.environment.python.conda_dependencies = conda

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


# 建立 Python 腳本步驟

In [38]:
from azureml.pipeline.core import PipelineParameter
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.data.data_reference import DataReference
from azureml.core import Dataset

datastore = workspace.get_default_datastore()
training_step_processed_data = PipelineData('training_step_processed_data', datastore=datastore)
evaluate_step_processed_data = PipelineData('evaluate_step_processed_data', datastore=datastore)

# 建立 OutputFileDatasetConfig，以傳遞資料
prepared_data = OutputFileDatasetConfig('prepared_data')

# Get Iris Datastore
source_datastore = Datastore.get(workspace, 'iris')

datasets = Dataset.get_all(workspace)
if not 'IrisTrainingFileData' in datasets:
    # Register training Dataset
    iris_training_datastore_path = [DataPath(source_datastore, 'training/*')]
    iris_training_ds = Dataset.File.from_files(path=iris_training_datastore_path)
    iris_training_ds.register(workspace, "IrisTrainingFileData", create_new_version=True)

if not 'IrisTestFileData' in datasets:
    # Register training Dataset
    iris_test_datastore_path = [DataPath(source_datastore, 'test/*')]
    iris_test_ds = Dataset.File.from_files(path=iris_test_datastore_path)
    iris_test_ds.register(workspace, "IrisTestFileData", create_new_version=True)

fileIrisTrainingData = Dataset.get_by_name(workspace, 'IrisTrainingFileData')
fileIrisTestData = Dataset.get_by_name(workspace, 'IrisTestFileData')

script_folder = "./pipeline-python"
dataset_training_path = "/tmp/dataset/iris/training"
dataset_test_path = "/tmp/dataset/iris/test"

training = PythonScriptStep(
    name="Training Step",
    script_name="training.py",
    arguments=[
        "--blob_datastore_name", blob_datastore_name,
        "--blob_account_name", blob_account_name,
        "--blob_account_key", blob_account_key,
        "--blob_container_name", blob_container_name,
        "--output_path", training_step_processed_data,
        "--dataset_training_path", dataset_training_path,
        "--dataset_test_path", dataset_test_path,
        "--output_folder", prepared_data,
    ],
    inputs=[fileIrisTrainingData.as_named_input("Iris_Training_Files_mount").as_mount(dataset_training_path),
        fileIrisTestData.as_named_input("Iris_Test_Files_mount").as_mount(dataset_test_path)],
    outputs=[training_step_processed_data],
    source_directory=script_folder,
    compute_target=compute_target,
    runconfig=run_config,
    allow_reuse=False,
)

evaluate = PythonScriptStep(
    name="Evaluate Step",
    script_name="evaluate.py",
    arguments=[
        "--output_path", evaluate_step_processed_data,
        "--output_folder", prepared_data.as_input(),
    ],
    inputs=[fileIrisTestData.as_named_input("Iris_Test_Files_mount").as_mount(dataset_test_path)],
    outputs=[evaluate_step_processed_data],
    source_directory=script_folder,
    compute_target=compute_target,
    runconfig=run_config,
    allow_reuse=False,
)

pipeline = Pipeline(workspace, steps=[training, evaluate])


print("Pipeline is built")
pipeline.validate()

Pipeline is built
Step Training Step is ready to be created [9d22a305]Step Evaluate Step is ready to be created [34809451]



[]

# 部署成 pipeline 與儲存 pipeline ID

In [116]:
published_pipeline = pipeline.publish(name="iris-blob-trigger-pipeline", description="iris-blob-trigger-pipeline", continue_on_step_failure=True)
open('pipeline.id', 'w').write(published_pipeline.id)

Created step go step1 [ad5e7d43][4294c800-3945-4749-82d5-75bdfb5523bb], (This step will run and generate new outputs)


36

# 提交實驗

In [39]:
run = exp.submit(pipeline, regenerate_outputs=True)

Created step Training Step [9d22a305][2555685e-521e-479d-839d-76c37c4dd3b9], (This step will run and generate new outputs)Created step Evaluate Step [34809451][f7702132-7807-48e8-868e-8178f3dd02d3], (This step will run and generate new outputs)

Submitted PipelineRun 38abaf63-b5cd-4fdf-a4fa-445d432e2648
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/38abaf63-b5cd-4fdf-a4fa-445d432e2648?wsid=/subscriptions/095c17da-056e-43f2-8d52-a30d1bdb6423/resourcegroups/adt-3d/workspaces/adt-3d&tid=c7f98dc5-2792-4fd7-bb88-7cb2506df48b


# List Models

In [40]:
from azureml.core import Model
for model in Model.list(workspace):
    print(f"model name: {model.name}, version: {model.version}")

model name: tf-iris-decision-tree, version: 26
