In [16]:
import logging

from matplotlib import pyplot as plt
import pandas as pd
import os

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.train.automl import AutoMLConfig

print("# [SDK Check]")
print("SDK version:", azureml.core.VERSION)
print("*"*60)

print("# [WS Check]")
ws = Workspace.from_config()
ws_key = ['WS Name', 'RG', 'Region', 'Subscription']
ws_val = [ws.name, ws.resource_group, ws.location, ws.subscription_id]
for k,v in zip(ws_key, ws_val):
    print(f'{k}: {v}')
print("*"*60)

print("# [Get DS]")
# Get the default datastore
ds = ws.get_default_datastore()
ds_key = ['DS Name', 'DS Type', 'Storage Account Name', 'BLOB Contaner Name']
ds_val = [ds.name, ds.datastore_type, ds.account_name, ds.container_name]
for k,v in zip(ds_key, ds_val):
    print(f'{k}: {v}')
print("*"*60)

# [SDK Check]
SDK version: 1.39.0
************************************************************
# [WS Check]
WS Name: publicws
RG: myrg
Region: japaneast
Subscription: 10bc7ab7-8f4c-47dc-b052-be0b53fe3c24
************************************************************
# [Get DS]
DS Name: workspaceblobstore
DS Type: AzureBlob
Storage Account Name: publicws8148534499
BLOB Contaner Name: azureml-blobstore-7eb7bbf7-3c36-411a-87f8-2d5015489bfd
************************************************************


In [17]:
# 計算環境
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

compute_name = "cpu-clusters"

if compute_name not in ws.compute_targets: # WSに存在しなければ新規作成
    compute_config = AmlCompute.provisioning_configuration(vm_size = "Standard_F4S_V2", 
                                                           max_nodes=4, 
                                                           idle_seconds_before_scaledown = 300
                                                           )

    compute_target = ComputeTarget.create(ws, compute_name, compute_config)
    compute_target.wait_for_completion(show_output=True)
compute_target = ComputeTarget(workspace=ws, name=compute_name)

In [18]:
# データセット
from  azureml.exceptions import UserErrorException

try:
    dataset = Dataset.get_by_name(ws, name='titanic_train')
except UserErrorException as e:
    print(f'error: {e}')

label_column_name = 'Survived'

In [19]:
# 実験
experiment_name = 'classification_titanic'
experiment=Experiment(ws, experiment_name)

In [20]:
# AutoML設定
automl_settings = {
    "n_cross_validations": 3,
    "primary_metric": 'AUC_weighted',
    "enable_early_stopping": True,
    "max_concurrent_iterations": 2, # This is a limit for testing purpose, please increase it as per cluster size
    "experiment_timeout_hours": 0.25, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ablity to find the best model possible
    "verbosity": logging.INFO,
}

automl_config = AutoMLConfig(
    task = 'classification',
    debug_log = 'automl_errors.log',
    compute_target = compute_target,
    training_data = dataset,
    label_column_name = label_column_name,
    **automl_settings
)

In [21]:
# 実行
remote_run = experiment.submit(automl_config, show_output = False)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
classification_titanic,AutoML_8e23b335-737b-4923-9b00-38e1a3cc2a26,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


In [28]:
# 実行完了まで待機
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [29]:
from azureml.core import Experiment 
from azureml.train.automl.constants import Scenarios
from azureml._restclient.jasmine_client import JasmineClient
 
training_experiment = Experiment(ws, 'classification_titanic')
 
jasmine_client = JasmineClient(
        service_context=training_experiment.workspace.service_context,
        experiment_name=training_experiment.name,
        experiment_id=training_experiment.id,
    )
 
training_env = jasmine_client.get_curated_environment(
    scenario=Scenarios.AUTOML,
    enable_dnn=False,
    enable_gpu=False,
    compute=compute_target,
    compute_sku=compute_target.vm_size
)

training_env.save_to_directory('./env', overwrite=False)