In [1]:
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.core.experiment import Experiment
from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

## AML Workspace 생성

In [2]:
ws = Workspace.from_config()

## CSV 데이터를 Tabular 데이터셋으로 변환

In [95]:
data = "https://kfdatalake01.blob.core.windows.net/data/Iris.csv"
dataset = Dataset.Tabular.from_delimited_files(data)

In [38]:
# Get the default datastore
default_ds = ws.get_default_datastore()

# Enumerate all datastores, indicating which is the default
for ds_name in ws.datastores:
    print(ds_name, "- Default =", ds_name == default_ds.name)

2022-12-01:01:37:33,70 INFO     [datastore_client.py:991] <azureml.core.authentication.InteractiveLoginAuthentication object at 0x7f2307407100>
2022-12-01:01:37:33,912 INFO     [datastore_client.py:991] <azureml.core.authentication.InteractiveLoginAuthentication object at 0x7f2307407100>


## Dataset 을 Pandas 데이터프레임으로 변경

In [40]:
#from sklearn import datasets
import pandas
iris = pandas.load_iris()

iris_data = pandas.DataFrame(iris.data)
iris_data["Class"] = pandas.Categorical.from_codes(iris.target, iris.target_names)



## AML 데이터소스에 등록

In [41]:
# The default datastore is a blob storage container where datasets are stored
datastore = ws.get_default_datastore()

# Register the dataset
ds = Dataset.Tabular.register_pandas_dataframe(
        dataframe=iris_data, 
        name='Iris', 
        description='Sample dataset',
        target=datastore
    )

2022-12-01:02:00:33,72 INFO     [datastore_client.py:991] <azureml.core.authentication.InteractiveLoginAuthentication object at 0x7f2307407100>


Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/ca13570c-8492-4997-add9-cc828bba852b/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


In [65]:
# ds.to_csv_files(separator=',')

{
  "source": [
    "('workspaceblobstore', 'managed-dataset/ca13570c-8492-4997-add9-cc828bba852b/')"
  ],
  "definition": [
    "GetDatastoreFiles",
    "ReadParquetFile",
    "DropColumns",
    "ToCsvStreams"
  ]
}

In [64]:
#iris_ds = Dataset.get_by_name(workspace=ws, name="Iris")

In [83]:
# iris_df.columns

Index(['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')

## 컬럼 변경

In [84]:
iris_df = iris_df.rename(columns={
    'Species': "Class"
})

In [87]:
iris_df.head()
iris_df_ds = iris_df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm','Class']]

In [None]:
# X_train, X_test = train_test_split(dataset, test_size=0.2, random_state=42)

## 컴퓨팅 클러스터 생성

In [90]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
batchai_cluster_name = 'cpucluster'
cluster_min_nodes = 0
cluster_max_nodes = 1
vm_size = "STANDARD_D2_V2" 
      
cts = ws.compute_targets
if batchai_cluster_name in cts:
    found = True
    print('Found existing compute target...%s' % batchai_cluster_name)
    compute_target = cts[batchai_cluster_name]
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size, 
                                                                vm_priority = 'lowpriority', #optional
                                                                min_nodes = cluster_min_nodes, 
                                                                max_nodes = cluster_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)
    
    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
    # For a more detailed view of current BatchAI cluster status, use the 'status' property    
    compute_target.status.serialize()

Found existing compute target...cpucluster


In [97]:
automl_settings = {
    "iteration_timeout_minutes": 2,
    "experiment_timeout_minutes": 15,
    "enable_early_stopping": True,
    "primary_metric": 'AUC_weighted',
    "featurization": 'auto',
    "n_cross_validations": 5,
}



In [98]:
Automl_config = AutoMLConfig(task='classification',
    debug_log='automl_errors.log',
    training_data=dataset,
    label_column_name="Class",
    compute_target=compute_target,
    **automl_settings)

   

In [99]:
experiment = Experiment(ws, "iris_experiment")

In [100]:
run = experiment.submit(Automl_config, show_output=True)

Submitting remote run.
No run_configuration provided, running on cpucluster with default configuration
Running on remote compute: cpucluster


Experiment,Id,Type,Status,Details Page,Docs Page
iris_experiment,AutoML_c2216245-2f41-4911-ba1a-eb291290f916,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

********************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

********************************************************************************************

TYPE:         High cardinality feature detection
STATUS:       PASSED
DESCRIPTI

In [101]:
best_run, fitted_model = run.get_output()

In [102]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [110]:
model_name = best_run.properties['model_name']

script_file_name = 'inference/score.py'

best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'inference/score.py')

In [104]:
description = 'AutoML Iris model'
model = run.register_model(model_name = model_name, description = description, tags = None)

print("Model ID", run.model_id)

Model ID AutoMLc2216245220


In [105]:
from azureml.core.environment import Environment
from azureml.automl.core.shared import constants
best_run.download_file(constants.CONDA_ENV_FILE_PATH, 'myenv.yml')
myenv = Environment.from_conda_specification(name="myenv", file_path="myenv.yml")

In [106]:
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment

inference_config = InferenceConfig(entry_script=script_file_name, environment=myenv)

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 1, 
                                               description = 'Iris classification service')

aci_service_name = 'automl-sample-bankmarketing-all'
print(aci_service_name)
aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)
aci_service.wait_for_deployment(True)
print(aci_service.state)

automl-sample-bankmarketing-all
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-12-01 05:20:29+00:00 Creating Container Registry if not exists.
2022-12-01 05:20:29+00:00 Registering the environment.
2022-12-01 05:20:30+00:00 Building image..
2022-12-01 05:40:46+00:00 Generating deployment configuration.
2022-12-01 05:40:47+00:00 Submitting deployment to compute..
2022-12-01 05:40:55+00:00 Checking the status of deployment automl-sample-bankmarketing-all..
2022-12-01 05:45:21+00:00 Checking the status of inference endpoint automl-sample-bankmarketing-all.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy
