# Experimentの実行

In [2]:
from azureml.core import Workspace, Datastore, Dataset, Experiment
ws = Workspace.from_config('./config')
az_store = Datastore.get(ws, 'azure_sdk_blob01')
az_dataset = Dataset.get_by_name(ws, 'Loan Applications Using SDK')

In [3]:
# Experimentの作成/アクセス
experiment = Experiment(workspace=ws,
                       name='Loan-SDK-Exp01')

In [4]:
# Experimentをスタートする
new_run = experiment.start_logging()

In [5]:
df = az_dataset.to_pandas_dataframe()
total_observations = len(df)
nulldf = df.isnull().sum()

# ログの記録
# Metricsのタブで確認可能
new_run.log('Total Observations', total_observations)

for columns in df.columns:
    new_run.log(columns, nulldf[columns])

In [6]:
# runを終わらせる
new_run.complete()

# ScriptをAzureMLで実行する
* Anacondaで新しいAzureの環境が作成されて、ローカルで計算されるはずだった
* Environmentの準備がうまくいかなかった

In [15]:
# from azureml.core import Workspace, Experiment, ScriptRunConfig

# ws = Workspace.from_config('./config')
# new_experiment = Experiment(workspace=ws,
#                            name='Loan_Script00')

# script_config = ScriptRunConfig(source_directory='./script',
#                                script='180 - Script To Run.py')
# new_run = new_experiment.submit(config=script_config)

# new_run.wait_for_completion()

In [16]:
# from azureml.core import Workspace, Experiment, ScriptRunConfig

# ws = Workspace.from_config('./config')
# new_experiment = Experiment(workspace=ws,
#                            name='Training_Script')

# # 今回はscikit-learnが必要
# from azureml.core import Environment
# from azureml.core.environment import CondaDependencies
# myenv = Environment(name="MyEnvironment")

# # Create the dependencies
# myenv_dep = CondaDependencies.create(conda_packages=['scikit-learn'])
# myenv.python.conda_dependencies = myenv_dep
# myenv.register(ws)

# script_config = ScriptRunConfig(source_directory='./script',
#                                script='200 - Training Script.py',
#                                environment=myenv)
# new_run = new_experiment.submit(config=script_config)

# new_run.wait_for_completion()

# Compute Clusterの準備

In [17]:
from azureml.core import Workspace

ws = Workspace.from_config(path="./config")

cluster_name = "my-cluster-001"

from azureml.core.compute import AmlCompute
compute_config = AmlCompute.provisioning_configuration(
                            vm_size='STANDARD_D11_V2',
                            max_nodes=2)

# クラスターの作成
cluster = AmlCompute.create(ws, cluster_name, compute_config)

# Pipelineの実行
* これまでの集大成
* データセットはdefaults.csvをアップロードして使用

In [33]:
from azureml.core import Workspace, Experiment, ScriptRunConfig

ws = Workspace.from_config('./config')
new_experiment = Experiment(workspace=ws,
                           name='Training_Script')

# 今回はscikit-learnが必要
from azureml.core import Environment
from azureml.core.environment import CondaDependencies
myenv = Environment(name="MyEnvironment")

# Create the dependencies
myenv_dep = CondaDependencies.create(conda_packages=['scikit-learn', 'pandas'])
myenv.python.conda_dependencies = myenv_dep
myenv.register(ws)


# Compute Clusterの作成 
cluster_name = "pipeline-cluster"
from azureml.core.compute import AmlCompute
compute_config = AmlCompute.provisioning_configuration(
                                    vm_size='STANDARD_D11_V2', 
                                    max_nodes=2)


from azureml.core.compute import ComputeTarget
compute_cluster = ComputeTarget.create(ws, cluster_name, compute_config)

compute_cluster.wait_for_completion()

# RunConfigurationを作成する
from azureml.core.runconfig import RunConfiguration
run_config = RunConfiguration()
run_config.target = compute_cluster
run_config.environment = myenv

# Pipeline-step
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import PipelineData

input_ds = ws.datasets.get('default')
dataFolder = PipelineData('datafolder', datastore=ws.get_default_datastore())

# Step1 前処理
dataPrep_step = PythonScriptStep(name='01 Data Preparation',
                                source_directory='./script',
                                script_name='220 - Dataprep Pipeline.py',
                                inputs=[input_ds.as_named_input('raw_data')],
                                outputs=[dataFolder],
                                runconfig=run_config,
                                arguments=['--datafolder', dataFolder])

# Step2 Train
train_step = PythonScriptStep(name='02 Train the Model',
                                source_directory='./script',
                                script_name='220 - Training Pipeline.py',
                                inputs=[dataFolder],
                                runconfig=run_config,
                                arguments=['--datafolder', dataFolder])

# pipelineの構成
steps =[dataPrep_step, train_step]
from azureml.pipeline.core import Pipeline
new_pipeline = Pipeline(workspace=ws, steps=steps)

# experimentの作成
from azureml.core import Experiment
new_experiment = Experiment(workspace=ws, name='PipelineExp01')
new_pipeline_run = new_experiment.submit(new_pipeline)

new_pipeline_run.wait_for_completion(show_output=True)

Provisioning operation finished, operation "Succeeded"
Created step 01 Data Preparation [c1f2f7d6][37cf7e9d-6e31-410b-ab55-959e8754177c], (This step will run and generate new outputs)
Created step 02 Train the Model [c50ae812][2555a09d-5161-4553-9373-dd539884cd12], (This step will run and generate new outputs)
Submitted PipelineRun 5556b211-80ba-4013-9125-218269f28f3e
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/5556b211-80ba-4013-9125-218269f28f3e?wsid=/subscriptions/3467f739-a57b-4612-9de8-72a6616c01b3/resourcegroups/AzuremlSDKRG00/workspaces/Azureml-SDK-WS01&tid=bcd8db96-8bb9-4f0d-af35-e471bf92c072
PipelineRunId: 5556b211-80ba-4013-9125-218269f28f3e
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/5556b211-80ba-4013-9125-218269f28f3e?wsid=/subscriptions/3467f739-a57b-4612-9de8-72a6616c01b3/resourcegroups/AzuremlSDKRG00/workspaces/Azureml-SDK-WS01&tid=bcd8db96-8bb9-4f0d-af35-e471bf92c072
PipelineRun Status: NotStarted
PipelineRun Status: Running







StepRunId: 27c1fd08-8659-440c-b688-eaf71d020d08
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/27c1fd08-8659-440c-b688-eaf71d020d08?wsid=/subscriptions/3467f739-a57b-4612-9de8-72a6616c01b3/resourcegroups/AzuremlSDKRG00/workspaces/Azureml-SDK-WS01&tid=bcd8db96-8bb9-4f0d-af35-e471bf92c072
StepRun( 02 Train the Model ) Status: Running

StepRun(02 Train the Model) Execution Summary
StepRun( 02 Train the Model ) Status: Finished
{'runId': '27c1fd08-8659-440c-b688-eaf71d020d08', 'target': 'pipeline-cluster', 'status': 'Completed', 'startTimeUtc': '2022-10-03T15:22:40.619827Z', 'endTimeUtc': '2022-10-03T15:22:57.696622Z', 'services': {}, 'properties': {'ContentSnapshotId': 'fda7af53-9d66-4b8a-8318-19e68effd568', 'StepType': 'PythonScriptStep', 'ComputeTargetType': 'AmlCompute', 'azureml.moduleid': '2555a09d-5161-4553-9373-dd539884cd12', 'azureml.moduleName': '02 Train the Model', 'azureml.runsource': 'azureml.StepRun', 'azureml.nodeid': 'c50ae812', 'azureml.pipelinerunid'



PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': '5556b211-80ba-4013-9125-218269f28f3e', 'status': 'Completed', 'startTimeUtc': '2022-10-03T15:22:02.523914Z', 'endTimeUtc': '2022-10-03T15:22:58.581396Z', 'services': {}, 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'SDK', 'azureml.parameters': '{}', 'azureml.continue_on_step_failure': 'False', 'azureml.continue_on_failed_optional_input': 'True', 'azureml.pipelineComponent': 'pipelinerun'}, 'inputDatasets': [], 'outputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://azuremlsstorage74437b7d7.blob.core.windows.net/azureml/ExperimentRun/dcid.5556b211-80ba-4013-9125-218269f28f3e/logs/azureml/executionlogs.txt?sv=2019-07-07&sr=b&sig=uQ1vJmH%2FMbmfWAgyAytL1EHCZHTV6ZI3jz1TjQN4ors%3D&skoid=285719f7-ab93-4fe7-a7ba-e7b47a2d53bd&sktid=bcd8db96-8bb9-4f0d-af35-e471bf92c072&skt=2022-10-03T14%3A31%3A57Z&ske=2022-10-04T22%3A41%3A57Z&sks=b&skv=2019-07-07&st=2022-10-0

'Finished'