# 安裝套件與確認 SDK 版本

In [1]:
import os
import azureml.core
from azureml.core import (
    Workspace,
    Experiment,
    Dataset,
    Datastore,
    ComputeTarget,
    Environment,
    ScriptRunConfig
)
from azureml.data import OutputFileDatasetConfig
from azureml.core.compute import AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.pipeline.core import PublishedPipeline
from azureml.pipeline.core import PipelineData

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.48.0


# 配置工作環境

In [2]:
workspace = Workspace.from_config()
print(f"workspace: {workspace}")

exp = Experiment(workspace=workspace, name="test-fashion")

workspace: Workspace.create(name='adt-3d', subscription_id='095c17da-056e-43f2-8d52-a30d1bdb6423', resource_group='adt-3d')


# 連接已有的 Blob

In [3]:
from azureml.core import Datastore
blob_datastore_name = 'filmcheck'
blob_account_name= ''
blob_account_key = ''
blob_container_name = ''
blob_datastore = Datastore.register_azure_blob_container(workspace=workspace, 
                                                         datastore_name=blob_datastore_name, 
                                                         container_name=blob_container_name, 
                                                         account_name=blob_account_name,
                                                         account_key=blob_account_key)

# 建立運算叢集

In [9]:
cluster_name = "cpu-cluster-4core"

found = False

cts = workspace.compute_targets
if cluster_name in cts and cts[cluster_name].type == "AmlCompute":
    found = True
    print("Found existing compute target.")
    compute_target = cts[cluster_name]
if not found:
    print("Creating a new compute target...")
    compute_config = AmlCompute.provisioning_configuration(
        vm_size= "STANDARD_DS3_V2",
        max_nodes=1,
    )
    compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=10
    )

Creating a new compute target...
InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


# 設定執行環境的 config

In [10]:
run_config = RunConfiguration()
run_config.environment.docker.enabled = True
conda = CondaDependencies()
conda.add_pip_package('azureml-sdk[automl]')
conda.add_pip_package('opencv-python-headless')
run_config.environment.python.conda_dependencies = conda

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


# 建立 Python 腳本步驟

In [32]:
datastore = workspace.get_default_datastore()
step1_processed_data = PipelineData('step1_processed_data', datastore=datastore)
step2_processed_data = PipelineData('step2_processed_data', datastore=datastore)

# 建立 OutputFileDatasetConfig，以傳遞資料
prepared_data = OutputFileDatasetConfig('prepared_data')

script_folder = "./pipeline-python"

step1 = PythonScriptStep(
    name="go step1",
    script_name="create_png.py",
    arguments=[
        "--output_path", step1_processed_data,
        "--blob_datastore_name", blob_datastore_name,
        "--blob_account_name", blob_account_name,
        "--blob_account_key", blob_account_key,
        "--blob_container_name", blob_container_name,
        '--output_folder', prepared_data,
    ],
    outputs=[step1_processed_data],
    source_directory=script_folder,
    compute_target=compute_target,
    runconfig=run_config,
    allow_reuse=False,
)

step2 = PythonScriptStep(
    name="go step2",
    script_name="create_txt.py",
    arguments=[
        "--output_path", step2_processed_data,
        "--blob_datastore_name", blob_datastore_name,
        "--blob_account_name", blob_account_name,
        "--blob_account_key", blob_account_key,
        "--blob_container_name", blob_container_name,
        '--datadir', prepared_data.as_input(),
    ],
    outputs=[step2_processed_data],
    source_directory=script_folder,
    compute_target=compute_target,
    runconfig=run_config,
    allow_reuse=False,
)

pipeline = Pipeline(workspace, steps=[step1, step2])
print("Pipeline is built")
pipeline.validate()

Pipeline is built
Step go step1 is ready to be created [2b3c675e]
Step go step2 is ready to be created [e7801665]


[]

# 部署成 pipeline 與儲存 pipeline ID

In [33]:
published_pipeline = pipeline.publish(name="blob-trigger-pipeline", description="blob-trigger-pipeline", continue_on_step_failure=True)
open('pipeline.id', 'w').write(published_pipeline.id)

Created step go step1 [2b3c675e][9b9fb24e-f85a-4315-8172-a1f1c88dc1fc], (This step will run and generate new outputs)
Created step go step2 [e7801665][af40195d-6b13-4dae-b1d0-eefa519a7c20], (This step will run and generate new outputs)


36

# 提交實驗

In [34]:
run = exp.submit(pipeline, regenerate_outputs=True)

Created step go step1 [2b3c675e][1d98ed9c-193a-491e-b134-de87c6473769], (This step will run and generate new outputs)
Created step go step2 [e7801665][818f0bba-318b-408f-91bb-1fb3404945f5], (This step will run and generate new outputs)
Submitted PipelineRun 174f62b5-92b2-4128-8207-ca94805e1f87
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/174f62b5-92b2-4128-8207-ca94805e1f87?wsid=/subscriptions/095c17da-056e-43f2-8d52-a30d1bdb6423/resourcegroups/adt-3d/workspaces/adt-3d&tid=c7f98dc5-2792-4fd7-bb88-7cb2506df48b
