# Setup and run ML Service experiment
This notebook will setup and run an Azure Machine Learning Service experiment

In [46]:
import azureml.core
from azureml.core import Environment, Experiment, Workspace

print("This notebook was created using version 1.0.48 of the Azure ML SDK")
print("You are currently using version", azureml.core.VERSION, "of the Azure ML SDK")

This notebook was created using version 1.0.48 of the Azure ML SDK
You are currently using version 1.0.69 of the Azure ML SDK


In [2]:
script_folder = "./experiment"

In [83]:
import os
# from dotenv import load_dotenv, find_dotenv

# load_dotenv(find_dotenv(), override=True, verbose=True)

subscription_id = os.environ.get('subscription_id')
resource_group = os.environ.get('resource_group')
workspace_name = os.environ.get('workspace_name')

tenant_id = os.environ.get('tenant_id')
service_principal_id = os.environ.get('service_principal_id')
service_principal_password = os.environ.get('service_principal_password')

## Workspace setup
First we try and get from a provided configuration file. You need to download this from the portal if you haven't setup with the step above. If not found then we setup accessing directly and creating.

In [91]:
try:
    ws=Workspace.from_config()
    print('Workspace loaded')
except  Exception as e:
    print(e.message)

Workspace loaded


In [93]:
if ws is None:
    try:
        ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)
        ws.write_config()
        print('Library configuration succeeded')
    except:
        print('Workspace not found, check passed parameters match')

Turn on diagnostics collection

In [94]:
from azureml.telemetry import set_diagnostics_collection
set_diagnostics_collection(send_diagnostics=True)

Turning diagnostics collection on. 


## Execution 
You can run the training script directly with python, or locally / remotely using the SDK
### Locally using SDK

Create / access the experiment

In [95]:
experiment = Experiment(workspace=ws, name='ml-service-test-local')

In [96]:
# Editing a run configuration property on-fly.
user_managed_env = Environment("user-managed-env")

user_managed_env.python.user_managed_dependencies = True

# You can choose a specific Python environment by pointing to a Python path 
#user_managed_env.python.interpreter_path = '/home/johndoe/miniconda3/envs/myenv/bin/python'

In [97]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=script_folder, script='train.py')
src.run_config.environment = user_managed_env
run = experiment.submit(src)

## Remotely  using SDK
Create / access the experiment

In [98]:
experiment = Experiment(workspace=ws, name='ml-service-test')

Create / access compute

In [99]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException


# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "mltest-cluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 2)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")
vm_priority = os.environ.get("AML_COMPUTE_CLUSTER_PRIORITY", "dedicated") # "lowpriority",

if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')  
    compute_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                           min_nodes=compute_min_nodes,
                                                           max_nodes=compute_max_nodes,
                                                           vm_priority=vm_priority,
                                                           # vnet_resourcegroup_name='ddpdRGDev',
                                                           # vnet_name='azureml-vnet',
                                                           # subnet_name='azureml-subnet',
                                                          )
    
    compute_target = ComputeTarget.create(ws, compute_name, compute_config)
    
    compute_target.wait_for_completion(show_output=True)

found compute target. just use it. mltest-cluster


Setup an estimator - we will use the SKLearn specific estimator.

In [100]:
from azureml.train.sklearn import SKLearn

script_params = {
    '--output-dir' : 'outputs'
    '--kernel': 'linear',
    '--penalty': 1.0,
}

estimator = SKLearn(source_directory=script_folder, 
                    script_params=script_params,
                    compute_target=compute_target,
                    entry_script='train.py',
                    pip_packages=['matplotlib']
                   )

In [101]:
run = experiment.submit(estimator)

## Wait For Completion
If running remotely this can take some time if the compute resources need to be scaled up

In [105]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO', 's…

In [103]:
run.wait_for_completion(show_output=False)

RunId: ml-service-test_1572004129_1de9d711
Web View: https://mlworkspace.azure.ai/portal/subscriptions/d4e5fecf-32d0-4314-a56e-ca2389ac7ac3/resourceGroups/DataPlatformMHEWRGDev/providers/Microsoft.MachineLearningServices/workspaces/mhew-ml-service-workspace/experiments/ml-service-test/runs/ml-service-test_1572004129_1de9d711

Streaming azureml-logs/20_image_build_log.txt

2019/10/25 11:48:56 Downloading source code...
2019/10/25 11:48:57 Finished downloading source code
2019/10/25 11:48:58 Creating Docker network: acb_default_network, driver: 'bridge'
2019/10/25 11:48:58 Successfully set up Docker network: acb_default_network
2019/10/25 11:48:58 Setting up Docker configuration...
2019/10/25 11:48:59 Successfully set up Docker configuration
2019/10/25 11:48:59 Logging in to registry: mhewmlservicew8204786564.azurecr.io
2019/10/25 11:49:00 Successfully logged into mhewmlservicew8204786564.azurecr.io
2019/10/25 11:49:00 Executing step ID: acb_step_0. Timeout(sec): 5400, Working directory:

  Downloading https://files.pythonhosted.org/packages/1c/cb/5b70897d8c98daebcbba9df6a5fd4a5cf80ef52d97fa7ba7c6be5e6a2d69/azureml_defaults-1.0.69-py2.py3-none-any.whl
Collecting scikit-learn==0.20.3
  Downloading https://files.pythonhosted.org/packages/5e/82/c0de5839d613b82bddd088599ac0bbfbbbcbd8ca470680658352d2c435bd/scikit_learn-0.20.3-cp36-cp36m-manylinux1_x86_64.whl (5.4MB)
Collecting scipy==1.2.1
  Downloading https://files.pythonhosted.org/packages/7f/5f/c48860704092933bf1c4c1574a8de1ffd16bf4fde8bab190d747598844b2/scipy-1.2.1-cp36-cp36m-manylinux1_x86_64.whl (24.8MB)
Collecting numpy==1.16.2
  Downloading https://files.pythonhosted.org/packages/35/d5/4f8410ac303e690144f0a0603c4b8fd3b986feb2749c435f7cdbb288f17e/numpy-1.16.2-cp36-cp36m-manylinux1_x86_64.whl (17.3MB)
Collecting joblib==0.13.2
  Downloading https://files.pythonhosted.org/packages/cd/c1/50a758e8247561e58cb87305b1e90b171b8c767b15b12a1734001f41d356/joblib-0.13.2-py2.py3-none-any.whl (278kB)
Collecting python-dateutil>=2.

  Downloading https://files.pythonhosted.org/packages/82/59/cb226752e20d83598d7fdcabd7819570b0329a61db07cfbdd21b2ef546e3/SecretStorage-3.1.1-py3-none-any.whl
Collecting cryptography!=1.9,!=2.0.*,!=2.1.*,!=2.2.*
  Downloading https://files.pythonhosted.org/packages/ca/9a/7cece52c46546e214e10811b36b2da52ce1ea7fa203203a629b8dfadad53/cryptography-2.8-cp34-abi3-manylinux2010_x86_64.whl (2.3MB)
Collecting Jinja2>=2.10
  Downloading https://files.pythonhosted.org/packages/65/e0/eb35e762802015cab1ccee04e8a277b03f1d8e53da3ec3106882ec42558b/Jinja2-2.10.3-py2.py3-none-any.whl (125kB)
Collecting Werkzeug>=0.14
  Downloading https://files.pythonhosted.org/packages/ce/42/3aeda98f96e85fd26180534d36570e4d18108d62ae36f87694b476b83d6f/Werkzeug-0.16.0-py2.py3-none-any.whl (327kB)
Collecting itsdangerous>=0.24
  Downloading https://files.pythonhosted.org/packages/76/ae/44b03b253d6fade317f32c24d100b3b35c2239807046a4c953c7b89fa49e/itsdangerous-1.1.0-py2.py3-none-any.whl
Collecting click>=5.1
  Downloading h

 ---> Running in e961e9485d56
Removing intermediate container e961e9485d56
 ---> 08199fbb8896
Step 12/14 : COPY azureml-environment-setup/spark_cache.py azureml-environment-setup/log4j.properties /azureml-environment-setup/
 ---> 8b5cb5d5b214
Step 13/14 : ENV AZUREML_ENVIRONMENT_IMAGE True
 ---> Running in 9cb530d8946f
Removing intermediate container 9cb530d8946f
 ---> e2a092244959
Step 14/14 : CMD ["bash"]
 ---> Running in 748b12605f39
Removing intermediate container 748b12605f39
 ---> be02c7a37363
Successfully built be02c7a37363
Successfully tagged mhewmlservicew8204786564.azurecr.io/azureml/azureml_55a0f51ee1f6c2e990597b480c66998a:latest
2019/10/25 11:51:32 Successfully executed container: acb_step_0
2019/10/25 11:51:32 Executing step ID: acb_step_1. Timeout(sec): 5400, Working directory: '', Network: 'acb_default_network'
2019/10/25 11:51:32 Pushing image: mhewmlservicew8204786564.azurecr.io/azureml/azureml_55a0f51ee1f6c2e990597b480c66998a:latest, attempt 1
The push refers to repos


Streaming azureml-logs/75_job_post-tvmps_ec796df868fb6081b7a344f7d73a864cea32882922f509cbba902fa667dc1778_d.txt

bash: /azureml-envs/azureml_635627c72a1b7b09dc7cca6153928737/lib/libtinfo.so.5: no version information available (required by bash)
Starting job release. Current time:2019-10-25T11:57:07.277134
Logging experiment finalizing status in history service.
Starting the daemon thread to refresh tokens in background for process with pid = 197
Job release is complete. Current time:2019-10-25T11:57:08.968803

Execution Summary
RunId: ml-service-test_1572004129_1de9d711
Web View: https://mlworkspace.azure.ai/portal/subscriptions/d4e5fecf-32d0-4314-a56e-ca2389ac7ac3/resourceGroups/DataPlatformMHEWRGDev/providers/Microsoft.MachineLearningServices/workspaces/mhew-ml-service-workspace/experiments/ml-service-test/runs/ml-service-test_1572004129_1de9d711



{'runId': 'ml-service-test_1572004129_1de9d711',
 'target': 'mltest-cluster',
 'status': 'Completed',
 'startTimeUtc': '2019-10-25T11:56:08.489795Z',
 'endTimeUtc': '2019-10-25T11:57:20.412075Z',
 'properties': {'_azureml.ComputeTargetType': 'batchai',
  'ContentSnapshotId': 'dd377c1e-6691-45e3-aa7f-752f1ff03bee',
  'azureml.git.repository_uri': 'git@github.com:FlipWebApps/azure-playground.git',
  'mlflow.source.git.repoURL': 'git@github.com:FlipWebApps/azure-playground.git',
  'azureml.git.branch': 'master',
  'mlflow.source.git.branch': 'master',
  'azureml.git.commit': 'c176d96a70917c8b014af7531a85fc75b9b1a8e7',
  'mlflow.source.git.commit': 'c176d96a70917c8b014af7531a85fc75b9b1a8e7',
  'azureml.git.dirty': 'True',
  'AzureML.DerivedImageName': 'azureml/azureml_55a0f51ee1f6c2e990597b480c66998a',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'runDefinition': {'script': 'train.py',
  'arguments

## Results

In [104]:
print(run.get_metrics())

{'Accuracy': 0.9736842105263158, 'Confusion matrix, without normalization': 'aml://artifactId/ExperimentRun/dcid.ml-service-test_1572004129_1de9d711/Confusion matrix, without normalization_1572004621.png', 'Normalized confusion matrix': 'aml://artifactId/ExperimentRun/dcid.ml-service-test_1572004129_1de9d711/Normalized confusion matrix_1572004621.png'}


In [106]:
print(run.get_file_names())

['Confusion matrix, without normalization_1572004621.png', 'Normalized confusion matrix_1572004621.png', 'azureml-logs/20_image_build_log.txt', 'azureml-logs/55_azureml-execution-tvmps_ec796df868fb6081b7a344f7d73a864cea32882922f509cbba902fa667dc1778_d.txt', 'azureml-logs/65_job_prep-tvmps_ec796df868fb6081b7a344f7d73a864cea32882922f509cbba902fa667dc1778_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_ec796df868fb6081b7a344f7d73a864cea32882922f509cbba902fa667dc1778_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/153_azureml.log', 'logs/azureml/azureml.log', 'outputs/model/model.joblib']


In [107]:
print('Repo:', run.properties['azureml.git.repository_uri'])
print('Branch:', run.properties['azureml.git.branch'])
print('Commit:', run.properties['azureml.git.commit'])
if run.properties['azureml.git.dirty']:
    print('You have uncomitted changes so the logged commit number might not be representative!')
else:
    print('All changes are comitted')

Repo: git@github.com:FlipWebApps/azure-playground.git
Branch: master
Commit: c176d96a70917c8b014af7531a85fc75b9b1a8e7
You have uncomitted changes so the logged commit number might not be representative!


In [108]:
print(run.get_tags())

{'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":1,"RunningNodeCount":0,"CurrentNodeCount":1}'}


You need to decide whether to register all models or only specific ones. You might want to only register those that show  better performance, especially if using CI with a trigger to automatically deploy whenever a change is detected.

In [109]:
model = run.register_model(model_name='ml-service-test', model_path='outputs/model/model.joblib')

## Find the best run

In [110]:
# Get the best run
minimum_rmse_runid = None
minimum_rmse = None

for run in experiment.get_runs():
    run_metrics = run.get_metrics()
    run_details = run.get_details()
    # each logged metric becomes a key in this returned dict
    if 'Accuracy' in run_metrics:
        run_rmse = run_metrics["Accuracy"]
        run_id = run_details["runId"]

        if minimum_rmse is None:
            minimum_rmse = run_rmse
            minimum_rmse_runid = run_id
        else:
            if run_rmse < minimum_rmse:
                minimum_rmse = run_rmse
                minimum_rmse_runid = run_id

print("Best run_id: " + minimum_rmse_runid)
print("Best run_id rmse: " + str(minimum_rmse))

Best run_id: ml-service-test_1572004129_1de9d711
Best run_id rmse: 0.9736842105263158


In [111]:
from azureml.core import Run
best_run = Run(experiment=experiment, run_id=minimum_rmse_runid)
print(best_run.get_file_names())

['Confusion matrix, without normalization_1572004621.png', 'Normalized confusion matrix_1572004621.png', 'azureml-logs/20_image_build_log.txt', 'azureml-logs/55_azureml-execution-tvmps_ec796df868fb6081b7a344f7d73a864cea32882922f509cbba902fa667dc1778_d.txt', 'azureml-logs/65_job_prep-tvmps_ec796df868fb6081b7a344f7d73a864cea32882922f509cbba902fa667dc1778_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_ec796df868fb6081b7a344f7d73a864cea32882922f509cbba902fa667dc1778_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/153_azureml.log', 'logs/azureml/azureml.log', 'outputs/model/model.joblib']


In [112]:
print(best_run.get_portal_url())

https://mlworkspace.azure.ai/portal/subscriptions/d4e5fecf-32d0-4314-a56e-ca2389ac7ac3/resourceGroups/DataPlatformMHEWRGDev/providers/Microsoft.MachineLearningServices/workspaces/mhew-ml-service-workspace/experiments/ml-service-test/runs/ml-service-test_1572004129_1de9d711


In [113]:
RunDetails(best_run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO', 's…

In [114]:
best_run.download_file(name='outputs/model/model.joblib')

# Appendix

In [115]:
envs = Environment.list(workspace=ws)

for env in envs:
    if env.startswith("AzureML"):
        print("Name",env)
        print("packages", envs[env].python.conda_dependencies.serialize_to_string())

Name AzureML-Tutorial
packages channels:
- conda-forge
dependencies:
- python=3.6.2
- pip:
  - azureml-core==1.0.69
  - azureml-defaults==1.0.69
  - azureml-telemetry==1.0.69
  - azureml-train-restclients-hyperdrive==1.0.69
  - azureml-train-core==1.0.69
  - azureml-widgets==1.0.69
  - azureml-pipeline-core==1.0.69
  - azureml-pipeline-steps==1.0.69
  - azureml-opendatasets==1.0.69
  - azureml-automl-core==1.0.69
  - azureml-train-automl==1.0.69
  - azureml-explain-model==1.0.69
  - azureml-tensorboard==1.0.69
  - azureml-mlflow==1.0.69
  - mlflow
  - sklearn-pandas
- pandas
- numpy
- tqdm
- scikit-learn
- matplotlib
name: azureml_15549395a113679a9fb7f144ae9386ff

Name AzureML-Minimal
packages channels:
- conda-forge
dependencies:
- python=3.6.2
- pip:
  - azureml-core==1.0.69
  - azureml-defaults==1.0.69
name: azureml_5f6ffffd4487e99555beea9f6150391b

Name AzureML-Chainer-5.1.0-GPU
packages channels:
- conda-forge
dependencies:
- python=3.6.2
- pip:
  - azureml-core==1.0.69
  - azurem