## AutoML Experiment using SDK V2

Make sure SDK v2 is installed, via. the documentation in this [README.md](https://msdata.visualstudio.com/Vienna/_git/sdk-cli-v2?path=%2FREADME.md&_a=preview).

Also ensure that the above installation is done in a conda environment where AutoML SDK v1 is already installed. You may also need to install MLFlow to do some operations. (e.g. via. `pip install azureml-mlflow`)

In [19]:
# utility methods

# Currently, there's no SDK v2 equivalent of v1's 'show_output' or 'wait_for_completion' functionality, 
# that prints the AutoML iteration info

def show_output(client, job) -> None:    
    # This doesn't appear to stream anything at the moment
    client.jobs.stream(created_job.name)


def wait_for_completion(client, job, poll_duration: int = 30) -> None:    
    """Poll for job status every `poll_duration` seconds, until it is terminated"""
    import time
    from azure.ml._operations.run_history_constants import RunHistoryConstants

    cur_status = client.jobs.get(job.name).status
    print("Current job status: ", cur_status)
    while cur_status not in RunHistoryConstants.TERMINAL_STATUSES:
        time.sleep(poll_duration)
        cur_status = client.jobs.get(job.name).status
        print("Current job status: ", cur_status)


def download_outputs(client, job) -> None:
    # This does not download any logs (no models as well, since this is at the parent run level)
    client.jobs.download(job.name, download_path="./outputs")

    # For the child run level, currently this throws an exception saying it's not supported for the job type
    try:
        first_child_run = "{}_0".format(job.name)
        client.jobs.download(first_child_run, download_path="./outputs/")
    except Exception as e:
        import traceback

        print(str(e))
        traceback.print_exc()
        

def print_studio_url(job, open_in_new_tab: bool = False) -> None:
    # TODO: Any easier way to get the URL?
    
    print("Studio URL: ", job.interaction_endpoints['Studio'].endpoint)
    if open_in_new_tab:
        import webbrowser
        webbrowser.open(job.interaction_endpoints['Studio'].endpoint)

In [2]:
# Global imports
from azure.ml import MLClient
from azure.core.exceptions import ResourceExistsError

from azure.ml.entities.workspace.workspace import Workspace
from azure.ml.entities.compute.compute import Compute
from azure.ml.entities.assets import Data

In [3]:
subscription_id = '381b38e9-9840-4719-a5a0-61d9585e1e91'
resource_group_name = 'gasi_rg_neu'
workspace_name = 'gasi_ws_neu'

experiment_name = "3-automl-remote-compute-run"

In [4]:
# Create an MLClient
# A resource group must already be existing at this point

client = MLClient(subscription_id, resource_group_name) # default_workspace_name=workspace)

In [5]:
# Set the default workspace for the Client, creating one if it doesn't exist.

workspace = Workspace(name=workspace_name)

try:
    client.workspaces.create(workspace)
except ResourceExistsError as re:
    print(re)
    
client.default_workspace_name = workspace_name

Workspace with name gasi_ws_neu already exists.


In [6]:
# Set or create compute

cpu_cluster_name = "cpucluster"
compute = Compute("amlcompute",
                  name=cpu_cluster_name, size="STANDARD_D2_V2",
                  min_instances=0, max_instances=3,
                  idle_time_before_scale_down=120)

# Load directly from YAML file
# compute = Compute.load("./compute.yaml")

try:
    # TODO: This currently results in an exception in Azure ML, please create compute manually.
    client.compute.create(compute)
except ResourceExistsError as re:
    print(re)
except Exception as e:
    import traceback
    
    print("Could not create compute.", str(e))
    traceback.print_exc()

Could not create compute. Cannot deserialize duration object., ISO8601Error: Unable to parse duration string ''


Traceback (most recent call last):
  File "/home/schrodinger/anaconda3/envs/devmar/lib/python3.7/site-packages/msrest/serialization.py", line 1872, in deserialize_duration
    duration = isodate.parse_duration(attr)
  File "/home/schrodinger/anaconda3/envs/devmar/lib/python3.7/site-packages/isodate/isoduration.py", line 104, in parse_duration
    raise ISO8601Error("Unable to parse duration string %r" % datestring)
isodate.isoerror.ISO8601Error: Unable to parse duration string ''

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-6-5ddad0ec168a>", line 14, in <module>
    client.compute.create(compute)
  File "/home/schrodinger/automl/sdk-cli-v2/src/azure-ml/azure/ml/_operations/compute_operations.py", line 98, in create
    polling=not no_wait,
  File "/home/schrodinger/automl/sdk-cli-v2/src/azure-ml/azure/ml/_restclient/_2021_03_01_preview/machinelearningservices/operations/_machine_learning_compute_operatio

In [7]:
# Upload dataset

dataset_name = "train_dataset_beer"
training_data = Data(name=dataset_name, version=1, local_path="./data")

# Load directly from YAML file
# training_data = Data.load("./data.yaml")

try:
    data = client.data.create_or_update(training_data)
    print("Uploaded to path  : ", data.path)
    print("Datastore location: ", data.datastore)
except Exception as e:
    print("Could not create dataset. ", str(e))

Could not create dataset.  (UserError) A data version with this name and version already exists. If you are trying to create a new data version, use a different name or version. If you are trying to update an existing data version, the existing asset's Path, Properties cannot be changed. Only tags and description can be updated.


In [10]:
# Initialize MLFlow, setting the tracking URI to AzureML, and changing the active experiment

import mlflow

##### NOTE: This is SDK v1 API #####
# TODO: How do we get this from MLClient? Tracking URI can't be obtained from v2 Workspace object
from azureml.core import Workspace as WorkspaceV1
ws = WorkspaceV1(workspace_name=workspace_name, resource_group=resource_group_name, subscription_id=subscription_id)
####################################

mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

# Set the active experiment, creating one if it doesn't exist
mlflow.set_experiment(experiment_name)

# Get Experiment Details
experiment = mlflow.get_experiment_by_name(experiment_name)
print("Experiment_id: {}".format(experiment.experiment_id))
print("Artifact Location: {}".format(experiment.artifact_location))
print("Tags: {}".format(experiment.tags))
print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))

print("\nRegistry URI:         {}".format(mlflow.get_registry_uri()))
print("\nCurrent tracking uri: {}".format(mlflow.get_tracking_uri()))

Experiment_id: e76232b5-3215-43ad-8c33-54089e072777
Artifact Location: 
Tags: {}
Lifecycle_stage: active

Registry URI:         azureml://northeurope.experiments.azureml.net/mlflow/v1.0/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourceGroups/gasi_rg_neu/providers/Microsoft.MachineLearningServices/workspaces/gasi_ws_neu?

Current tracking uri: azureml://northeurope.experiments.azureml.net/mlflow/v1.0/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourceGroups/gasi_rg_neu/providers/Microsoft.MachineLearningServices/workspaces/gasi_ws_neu?


In [17]:
from azure.ml._restclient._2020_09_01_preview.machinelearningservices.models import GeneralSettings, LimitSettings, DataSettings, TrainingDataSettings, ValidationDataSettings, TrainingSettings
from azure.ml._restclient._2020_09_01_preview.machinelearningservices.models._azure_machine_learning_workspaces_enums import TaskType, OptimizationMetric
from azure.ml._schema.compute_binding import InternalComputeConfiguration
from azure.ml.entities import AutoMLJob
from azure.ml.entities.job.automl.forecasting import ForecastingSettings
from azure.ml.entities.job.automl.featurization import FeaturizationSettings

compute = InternalComputeConfiguration(target=cpu_cluster_name)

general_settings = GeneralSettings(task_type=TaskType.FORECASTING,
                                   primary_metric= OptimizationMetric.NORMALIZED_ROOT_MEAN_SQUARED_ERROR,
                                   enable_model_explainability=True)

# TODO: Seems like a bug here, max_trials=3 + max_concurrent_trials=4 seems to only trigger one child run
limit_settings = LimitSettings(job_timeout=60,
                               max_trials=4,
                               max_concurrent_trials=4,
                               enable_early_termination=False)

# TODO: How can we reuse the 'data' object created above?
training_data_settings = TrainingDataSettings(dataset_arm_id="train_dataset_beer:1",
                                              target_column_name="BeerProduction")
validation_data_settings = ValidationDataSettings(n_cross_validations=5)
data_settings = DataSettings(training_data=training_data_settings, validation_data=validation_data_settings)

featurization_settings = FeaturizationSettings(featurization_config="auto")

training_settings = TrainingSettings(enable_dnn_training=False)

forecasting_settings = ForecastingSettings(country_or_region_for_holidays="US",
                                           forecast_horizon=12,
                                           target_rolling_window_size=0,
                                           time_column_name="DATE")

### get unique job name for repeated trials ###
### This can be skipped, in which case a random guid is generated for the job name
import time
job_name = "simplebeerjob{}".format(str(int(time.time())))
################################################

extra_automl_settings = {"save_mlflow": True}

automl_job = AutoMLJob(
#     name=job_name,
    compute=compute,
    general_settings=general_settings,
    limit_settings=limit_settings,
    data_settings=data_settings,
    forecasting_settings=forecasting_settings,
    training_settings=training_settings,
    featurization_settings=featurization_settings,
    properties=extra_automl_settings,
)

######## For loading directly from YAML ########
# from pathlib import Path
# from azure.ml.entities import Job, AutoMLJob

# job_path_yaml = Path("./automl_beer_job.yml") 
# automl_job = Job.load(job_path_yaml)

automl_job

AutoMLJob({'name': '598fcd86-f7e8-4379-ae7b-4e7f1a1a566e', 'id': None, 'description': None, 'tags': {}, 'properties': {'save_mlflow': True}, 'base_path': './', 'type': 'automl_job', 'creation_context': None, 'experiment_name': '3-automl-remote-compute-run', 'status': None, 'interaction_endpoints': None, 'log_files': None, 'output': None, 'general_settings': <azure.ml._restclient._2020_09_01_preview.machinelearningservices.models._models_py3.GeneralSettings object at 0x7fbef25178d0>, 'data_settings': <azure.ml._restclient._2020_09_01_preview.machinelearningservices.models._models_py3.DataSettings object at 0x7fbef25179d0>, 'limit_settings': <azure.ml._restclient._2020_09_01_preview.machinelearningservices.models._models_py3.LimitSettings object at 0x7fbef2517a50>, 'forecasting_settings': <azure.ml.entities.job.automl.forecasting.ForecastingSettings object at 0x7fbef2517b10>, 'training_settings': <azure.ml._restclient._2020_09_01_preview.machinelearningservices.models._models_py3.Trainin

In [18]:
# Submit job
# TODO: There appears to be a bug here (repro: try executing this cell twice)
created_job = client.jobs.create_or_update(automl_job)
created_job

AutoMLJob({'name': '598fcd86-f7e8-4379-ae7b-4e7f1a1a566e', 'id': '/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourceGroups/gasi_rg_neu/providers/Microsoft.MachineLearningServices/workspaces/gasi_ws_neu/jobs/598fcd86-f7e8-4379-ae7b-4e7f1a1a566e', 'description': None, 'tags': {}, 'properties': {'save_mlflow': 'True'}, 'base_path': './', 'type': 'automl_job', 'creation_context': <azure.ml._restclient._2020_09_01_preview.machinelearningservices.models._models_py3.SystemData object at 0x7fbef25a3b90>, 'experiment_name': '3-automl-remote-compute-run', 'status': 'NotStarted', 'interaction_endpoints': {'Tracking': <azure.ml._restclient._2020_09_01_preview.machinelearningservices.models._models_py3.JobEndpoint object at 0x7fbef25a3150>, 'Studio': <azure.ml._restclient._2020_09_01_preview.machinelearningservices.models._models_py3.JobEndpoint object at 0x7fbef25a3b10>}, 'log_files': None, 'output': None, 'general_settings': <azure.ml._restclient._2020_09_01_preview.machinelearningservi

In [20]:
# Get Studio URL, open in new tab
print_studio_url(created_job)

# Wait until the job is finished
wait_for_completion(client, created_job)

# Download logs + outputs locally
download_outputs(client, created_job)


Studio URL:  https://ml.azure.com/runs/598fcd86-f7e8-4379-ae7b-4e7f1a1a566e?wsid=/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourcegroups/gasi_rg_neu/workspaces/gasi_ws_neu&tid=72f988bf-86f1-41af-91ab-2d7cd011db47
Current job status:  NotStarted
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:  Running
Current job status:

Downloading the job logs ExperimentRun/dcid.598fcd86-f7e8-4379-ae7b-4e7f1a1a566e/ at ./outputs/598fcd86-f7e8-4379-ae7b-4e7f1a1a566e


(UserError) A job was found, but it is not supported in this API version and cannot be accessed.


Traceback (most recent call last):
  File "<ipython-input-19-4fa2dbf6b888>", line 31, in download_outputs
    client.jobs.download(first_child_run, download_path="./outputs/")
  File "/home/schrodinger/automl/sdk-cli-v2/src/azure-ml/azure/ml/_operations/job_operations.py", line 246, in download
    job_details = self.get(name)
  File "/home/schrodinger/automl/sdk-cli-v2/src/azure-ml/azure/ml/_operations/job_operations.py", line 101, in get
    job_object = self._get_job(name)
  File "/home/schrodinger/automl/sdk-cli-v2/src/azure-ml/azure/ml/_operations/job_operations.py", line 285, in _get_job
    **self._kwargs,
  File "/home/schrodinger/automl/sdk-cli-v2/src/azure-ml/azure/ml/_restclient/_2020_09_01_preview/machinelearningservices/operations/_jobs_operations.py", line 196, in get
    raise HttpResponseError(response=response, model=error, error_format=ARMErrorFormat)
azure.core.exceptions.HttpResponseError: (UserError) A job was found, but it is not supported in this API version and 

## Code below currently doens't work

In [16]:
from pprint import pprint
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType

def print_model_info(models):
    import datetime
    import time
    
    for m in models:
        print("--")
        print("Name: {}".format(m.name))
        print("Time Created: {}".format(m.creation_timestamp))
#         print("description: {}".format(m.description))


mlflow_client = MlflowClient()


experiment = mlflow_client.get_experiment_by_name(experiment_name)
print(experiment)
mlflow_client.list_run_infos(experiment.experiment_id, run_view_type=ViewType.ACTIVE_ONLY)

dir(mlflow_client)
mlflow_client.list_registered_models()
# best_run = client.search_runs(experiment_ids=[experiment.id], filter_string="", run_vew_type=ViewType.ACTIVE_ONLY, max_results=1, order_by=[f"metrics.{primary_metric} DESC"])[0]
# best_models = client.search_model_versions(f"name='{best_run.id}'")

# for rm in client.list_registered_models():
#     pprint(dict(rm), indent=4)

<Experiment: artifact_location='', experiment_id='e76232b5-3215-43ad-8c33-54089e072777', lifecycle_stage='active', name='beerproduction', tags={}>


[<RegisteredModel: creation_timestamp=1616452676112, description='', last_updated_timestamp=1616452676112, latest_versions=[], name='AutoMLfe4299a1632', tags={}>,
 <RegisteredModel: creation_timestamp=1616452682701, description='', last_updated_timestamp=1616452682701, latest_versions=[], name='AutoML8342bc81c2', tags={}>,
 <RegisteredModel: creation_timestamp=1616452683282, description='', last_updated_timestamp=1616452683282, latest_versions=[], name='tf-dnn-mnist', tags={}>,
 <RegisteredModel: creation_timestamp=1616452683304, description='', last_updated_timestamp=1616452683304, latest_versions=[], name='tf-dnn-mnist-warm-start', tags={}>,
 <RegisteredModel: creation_timestamp=1616452683324, description='', last_updated_timestamp=1616452683324, latest_versions=[], name='tf-dnn-mnist-resumed', tags={}>,
 <RegisteredModel: creation_timestamp=1616452694989, description='', last_updated_timestamp=1616452694989, latest_versions=[], name='AutoML988d71b280', tags={}>,
 <RegisteredModel: c

In [None]:
# TODO: What's the API to get Experiment / id via SDK v2.0?
from azureml.core.experiment import Experiment, ViewType
experiment = Experiment(workspace=ws, name="3-automl-remote-compute-run")

client = MlflowClient()
print(client.list_registered_models())
print(dir(client))

best_run = client.search_runs(
    experiment_ids=[experiment.id],
    filter_string="", max_results=1, order_by=[f"metrics.{OptimizationMetric.NORMALIZED_ROOT_MEAN_SQUARED_ERROR} DESC"])[0]
best_models = client.search_model_versions(f"name='simplebeerjob1620684744'")
best_model = best_models[0]
# we may store 1 or 2 models depending on how our API proposal goes. 
# If sklearn and onnx are flavors of the same model, this would only contain one,
# if they are stored separately, we'll have 2 and we'll need to specify an aditional filter

# the above is requiring us to name the model after the child run id, it should be achievable without that,
# need to sync with some folks, but if getting that run's model isn't really supported, something like
# the below would be convenient:
model_filter = f"parent_run_id='simplebeerjob1620684744';sort_by_metric=\'{OptimizationMetric.NORMALIZED_ROOT_MEAN_SQUARED_ERROR}\'"  
models = client.list_registered_models(model_filter)
best_model = models[0] 