In [3]:
import os
import json
import time
import azureml
import logging
from azureml.core.model import Model
from azureml.train.automl import AutoMLConfig
from azureml.core import Workspace, Run, Datastore, Experiment
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.hyperdrive import *
from azureml.train.dnn import TensorFlow
from azureml.widgets import RunDetails

print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.18


In [5]:
# use this code to set up config file
#subscription_id ='<SUB_ID>'
#resource_group ='<RESOURCE_GROUP>'
#workspace_name = '<WORKSPACE>'

#try:
#    ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)
#    ws.write_config()
#    print('Workspace configuration succeeded. You are all set!')
#except:
#    print('Workspace not found. TOO MANY ISSUES!!!')
ws = Workspace.from_config()

Found the config file in: C:\projects\SimpleMNIST\aml_config\config.json


# Compute Environment

In [7]:
cluster = 'sauron'
try:
    compute = ComputeTarget(workspace=ws, name=cluster)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', min_nodes=1, max_nodes=6)
    compute = ComputeTarget.create(ws, cluster, compute_config)
    compute.wait_for_completion(show_output=True)
    

Found existing compute target


# Run Experiment

In [15]:
# Create and run experiment
mnist = Experiment(ws, 'simplemnist')
estimator = TensorFlow(source_directory='.',
                       compute_target=compute,
                       entry_script='train.py',
                       use_gpu=True)

run = mnist.submit(estimator)

In [16]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
simplemnist,simplemnist_1547760561539,azureml.scriptrun,Queued,Link to Azure Portal,Link to Documentation


In [17]:
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'NOTSET',…

# Manage Data

In [18]:
ds = ws.get_default_datastore()
mnist_data = ds.upload(src_dir = 'data', target_path = 'mnist', show_progress = True)

Target already exists. Skipping upload for mnist\mnist.npz


In [8]:
# run the same way as above
script_params={
    '--data': mnist_data.as_mount(),
}

# Create and run experiment
mnist = Experiment(ws, 'simplemnist')
estimator = TensorFlow(source_directory='.',
                       compute_target=compute,
                       entry_script='train.py',
                       script_params=script_params,
                       use_gpu=True)

run = mnist.submit(estimator)
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'NOTSET',…

# Optimize hyperparameters

In [9]:
# same as above but increase the max_steps and remove the parameters
script_params={
    '--data': mnist_data,
    '--epochs': 100
}

mnist = Experiment(ws, 'simplemnist')
estimator = TensorFlow(source_directory='.',
                       compute_target=compute,
                       entry_script='train.py',
                       script_params=script_params,
                       use_gpu=True)

## hyperparameter search

In [12]:
ps = RandomParameterSampling(
    {
        '--lr': loguniform(-15, -3),
        '--batch': choice(16, 32, 64, 128, 512)
    }
)

early_termination_policy = BanditPolicy(slack_factor = 0.15, evaluation_interval=2)

hyperdrive_run_config = HyperDriveRunConfig(estimator = estimator, 
                                            hyperparameter_sampling = ps, 
                                            policy = early_termination_policy,
                                            primary_metric_name = "accuracy",
                                            primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
                                            max_total_runs = 10,
                                            max_concurrent_runs = 5)

hd_run = mnist.submit(hyperdrive_run_config)

RunDetails(hd_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'NOTSE…

In [71]:
for item in hd_run.get_children():
    if item.get_status() == 'Completed':
        print(item.id)
        metrics = item.get_metrics()
        print(metrics['accuracy'][-1])
        print('---------')

simplemnist_1547667457545_6
0.992900013923645
---------
simplemnist_1547667457545_1
0.9909999966621399
---------
simplemnist_1547667457545_0
0.9876000285148621
---------
simplemnist_1547667457545_3
0.9891999959945679
---------


In [20]:
exp = ws.experiments['simplemnist']
hd_run = [r for r in exp.get_runs() if r.id == 'simplemnist_1548818458668'][0]
RunDetails(hd_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

# Automatic Machine Learning

In [8]:
automl_settings = {
    "name": "AUTOMLMNIST_{0}".format(time.time()),
    "iteration_timeout_minutes": 5,
    "iterations": 20,
    "n_cross_validations": 5,
    "primary_metric": 'AUC_weighted',
    "preprocess": False,
    "max_concurrent_iterations": 10,
    "verbosity": logging.INFO
}

automl_config = AutoMLConfig(task='classification',
                             debug_log='automl_errors.log',
                             path='.',
                             compute_target = compute,
                             data_script='./get_data.py',
                             **automl_settings)

In [14]:
experiment=Experiment(ws, 'simplemnist')
remote_run = experiment.submit(automl_config)

In [14]:
exp = ws.experiments['simplemnist']
remote_run = [r for r in exp.get_runs() if r.id == 'AutoML_b41af9f3-998c-4f3c-8493-9a076f702631'][0]
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

# Deploy best model

In [22]:
hd_run = [r for r in exp.get_runs() if r.id == 'simplemnist_1548818458668'][0]
best = [r for r in hd_run.get_children() if r.id == 'simplemnist_1548818458668_3'][0]
best.get_file_names()

['azureml-logs/55_batchai_execution.txt',
 'azureml-logs/60_control_log.txt',
 'azureml-logs/80_driver_log.txt',
 'azureml-logs/azureml.log',
 'logs/events.out.tfevents.1548818941.4329db9481f7433480f722b11724d72a000008',
 'outputs/checkpoint',
 'outputs/digits.pb',
 'outputs/model.ckpt.data-00000-of-00001',
 'outputs/model.ckpt.index',
 'outputs/model.ckpt.meta',
 'outputs/model.pb']

In [74]:
best.download_file(name='outputs/digits.pb', output_file_path='outputs')
model_file = 'outputs/digits.pb'
model = Model.register(ws, model_name='SimpleMNIST', model_path=model_file, 
                       description='Simple MNIST model extracted from hyperparamter optimization run')

Registering model SimpleMNIST


## Create Conda Deps

In [75]:
from azureml.core.conda_dependencies import CondaDependencies
myenv = CondaDependencies()

myenv.add_pip_package('numpy')
myenv.add_tensorflow_pip_package()

with open('simplemnist.yml','w') as f:
    print('Writing out {}'.format('simplemnist.yml'))
    f.write(myenv.serialize_to_string())
    print('Done!')

Writing out simplemnist.yml
Done!


## Create Image

In [88]:
model = ws.models['SimpleMNIST']

In [89]:
from azureml.core.image import ContainerImage, Image
image_config = ContainerImage.image_configuration(execution_script="score.py", 
                                    runtime="python", 
                                    conda_file="simplemnist.yml")


image = Image.create(ws, 'simplemnist', [model], image_config)
image.wait_for_creation(show_output=True)

Creating image
Running....................................
SucceededImage creation operation finished for image simplemnist:2, operation "Succeeded"


In [90]:
image.image_build_log_uri

'https://icebatchwestus2.blob.core.windows.net/logs/halacr_26d4eff49acc408f9937724f59cb371a.txt?sig=KlT5DMUpzvZWi2Wb%2B1fhqtTYH5IlcLHc%2BxmcBT5yXVU%3D&sp=r&sr=b&sv=2017-04-17&se=2019-02-16T07%3A05%3A05Z'

# Deploy Service
Deploy newly created image!

## ACI Deployment

In [91]:
image = ws.images['simplemnist']

In [92]:
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice

service_name = 'simplemnist-svc'
if service_name in ws.webservices:
    ws.webservices[service_name].delete()

aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=1, 
                                               description='simple MNIST digit detection')
service = Webservice.deploy_from_image(workspace=ws, 
                                       image=image, 
                                       name=service_name, 
                                       deployment_config=aciconfig)
service.wait_for_deployment(show_output=True)

Creating service
Running...............
SucceededACI service creation operation finished, operation "Succeeded"


In [93]:
with open('deploy.log','w') as f:
    f.write(service.get_logs())
service.scoring_uri

'http://13.83.105.225:80/score'

## Enable App Insights

In [94]:
service.update(enable_app_insights=True)

## Kubernetes Deployment

In [95]:
from azureml.core.webservice import Webservice, AksWebservice

# get aks target
aks_target = ws.compute_targets['eagles']
# Set configuration and service name
aks_config = AksWebservice.deploy_configuration()
aks_service_name ='simplemnist-aks-svc'

if aks_service_name in ws.webservices:
    ws.webservices[aks_service_name].delete()
    
# Deploy from image
akssvc = Webservice.deploy_from_image(workspace = ws, 
                                            name = aks_service_name,
                                            image = image,
                                            deployment_config = aks_config,
                                            deployment_target = aks_target)
# Wait for the deployment to complete
akssvc.wait_for_deployment(show_output = True)
print(akssvc.state)

Creating service
Running.......
SucceededAKS service creation operation finished, operation "Succeeded"
Healthy


In [96]:
akssvc.update(collect_model_data=True, enable_app_insights=True)