In [25]:
# constants
subscription_id = '2a779d6f-0806-4359-a6e8-f1fd57bb5dd7' 
resource_group  = 'devintersection-2018-aml-demo'
workspace_name  = 'devintersection-workspace'
experiment_name = 'automl-regression'
project_folder = './automl-regression'
data_url = "https://devintersection.blob.core.windows.net/data/simulated-data.csv"
cluster_name = "cpucluster"
aks_cluster_name = 'my-aks-cluster' 
resource_id = '/subscriptions/2a779d6f-0806-4359-a6e8-f1fd57bb5dd7/resourceGroups/devintersection-2018-aml-demo/providers/Microsoft.BatchAI/workspaces/devintersection-workspace/clusters/cpucluster1c848275bca'

### Import required packages

In [26]:
import logging
import os
import random
import re

from matplotlib import pyplot as plt
from matplotlib.pyplot import imshow
import numpy as np
import pandas as pd
from sklearn import datasets

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.compute import AksCompute, ComputeTarget
from azureml.core.webservice import Webservice, AksWebservice
from azureml.core.image import Image
from azureml.core.model import Model

from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun


### Connect to Azure Workspace

In [27]:
from azureml.core import Workspace

try:
    ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)
    ws.write_config()
    print('Library configuration succeeded')
except:
    print('Workspace not found')

Wrote the config file config.json to: /home/nbuser/library/aml_config/config.json
Library configuration succeeded


### Create a Workspace Experiment

In [4]:
ws = Workspace.from_config()

import os

output = {}
output['SDK version'] = azureml.core.VERSION
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Project Directory'] = project_folder
pd.set_option('display.max_colwidth', -1)
pd.DataFrame(data=output, index=['']).T

experiment = Experiment(ws, experiment_name)

Found the config file in: /home/nbuser/library/aml_config/config.json


### Opt-in to send Diagnostics

In [40]:
from azureml.telemetry import set_diagnostics_collection
set_diagnostics_collection(send_diagnostics=True)

### Let's review our data

In [10]:
data = pd.read_csv(data_url)
print(data)

      Survival_In_Days  Suffered_Freeze_Event     Sex  Car_Is_Garaged State  \
0     975               False                  Female  False           KS     
1     1690              False                  Male    True            AR     
2     926               False                  Female  True            DE     
3     1742              False                  Male    True            IL     
4     1427              False                  Female  True            NY     
5     2280              False                  Male    False           CO     
6     743               False                  Female  True            HI     
7     1118              False                  Male    True            KY     
8     1835              False                  Female  True            AZ     
9     1508              False                  Male    False           FL     
10    1494              False                  Female  True            IN     
11    1511              False                  Male 

### Create get_data.py for remote compute

In [41]:
# create project folder
if not os.path.exists(project_folder):
    os.makedirs(project_folder)

In [42]:
%%writefile $project_folder/get_data.py

import pandas as pd
import numpy as np

def get_data():
    
    data = pd.read_csv("https://devintersection.blob.core.windows.net/data/simulated-data.csv")
    
    X = data.iloc[:,4:72]
    Y = np.empty(data.shape[0], dtype=object)
    Y[:] = data.iloc[:,0].tolist()

    return { "X" : X, "y" : Y }

Overwriting ./automl-regression/get_data.py


### Create AML Compute Cluster

In [14]:
### Create AML CPU Compute Cluster

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target.')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D12_V2',
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

    compute_target.wait_for_completion(show_output=True)

# Use the 'status' property to get a detailed status for the current AmlCompute. 
print(compute_target.status.serialize())

Found existing compute target.
{'allocationState': 'steady', 'allocationStateTransitionTime': '2018-12-04T18:13:30.463000+00:00', 'creationTime': '2018-12-04T18:13:10.187000+00:00', 'currentNodeCount': 0, 'errors': None, 'nodeStateCounts': {'idleNodeCount': 0, 'leavingNodeCount': 0, 'preparingNodeCount': 0, 'runningNodeCount': 0, 'unusableNodeCount': 0}, 'provisioningState': 'succeeded', 'provisioningStateTransitionTime': '2018-12-04T18:13:29.797000+00:00', 'scaleSettings': {'manual': None, 'autoScale': {'maximumNodeCount': 4, 'minimumNodeCount': 0, 'initialNodeCount': 0}}, 'vmPriority': 'dedicated', 'vmSize': 'STANDARD_D12_V2'}


### Instantiate an Automated ML Config

In [19]:
automl_config = AutoMLConfig(task = 'regression',
                             iterations = 100,
                             iteration_timeout_minutes = 10, 
                             max_cores_per_iteration = 10,
                             preprocess= True,
                             primary_metric='r2_score',
                             n_cross_validations = 5,
                             debug_log = 'automl.log',
                             verbosity = logging.INFO,
                             data_script = project_folder + "/get_data.py",
                             compute_target = compute_target,
                             blacklist_models = "",
                             path = project_folder)

### Run our Experiment on AML Compute

In [20]:
remote_run = experiment.submit(automl_config, show_output=False)
remote_run

Experiment,Id,Type,Status,Details Page,Docs Page
automl-regression,AutoML_99de1c5a-78e3-4f33-ac20-36e4b02eec20,automl,Preparing,Link to Azure Portal,Link to Documentation


### Display Workspace Experiments

In [5]:
ws = Workspace.from_config()
experiment_list = Experiment.list(workspace=ws)

summary_df = pd.DataFrame(index = ['No of Runs'])
pattern = re.compile('^AutoML_[^_]*$')
for experiment in experiment_list:
    all_runs = list(experiment.get_runs())
    automl_runs = []
    for run in all_runs:
        if(pattern.match(run.id)):
            automl_runs.append(run)    
    summary_df[experiment.name] = [len(automl_runs)]
    
pd.set_option('display.max_colwidth', -1)
summary_df.T

Found the config file in: /home/nbuser/library/aml_config/config.json


Unnamed: 0,No of Runs
automl-regression,5


### Display Automated ML Runs for the Experiment

In [6]:
proj = ws.experiments[experiment_name]
summary_df = pd.DataFrame(index = ['Type', 'Status', 'Primary Metric', 'Iterations', 'Compute', 'Name'])
pattern = re.compile('^AutoML_[^_]*$')
all_runs = list(proj.get_runs(properties={'azureml.runsource': 'automl'}))
for run in all_runs:
    if(pattern.match(run.id)):
        properties = run.get_properties()
        tags = run.get_tags()
        amlsettings = eval(properties['RawAMLSettingsString'])
        if 'iterations' in tags:
            iterations = tags['iterations']
        else:
            iterations = properties['num_iterations']
        summary_df[run.id] = [amlsettings['task_type'], run.get_details()['status'], properties['primary_metric'], iterations, properties['target'], amlsettings['name']]
    
from IPython.display import HTML
projname_html = HTML("<h3>{}</h3>".format(proj.name))

from IPython.display import display
display(projname_html)
display(summary_df.T)

Unnamed: 0,Type,Status,Primary Metric,Iterations,Compute,Name
AutoML_ab755820-4bfd-4e8a-8b4b-9e0a2446b1c2,regression,Completed,r2_score,100,d12-cluster,automl-regression
AutoML_e73345b0-5094-4bbe-970b-06f4521da5dd,regression,Completed,r2_score,10,d12-cluster,automl-regression
AutoML_1e6dd094-9885-4998-a2d0-0a598956ed65,regression,Completed,r2_score,10,d12-cluster,automl-regression
AutoML_3e948b39-bab0-4cd5-bb60-fb3bdcc79e7e,regression,Completed,r2_score,10,d12-cluster,automl-regression
AutoML_3ce57e56-58bb-45ab-923a-aa10393a4837,regression,NotStarted,r2_score,10,d12-cluster,automl-regression


### Display Automated ML Run Details

In [15]:
run_id = 'AutoML_ab755820-4bfd-4e8a-8b4b-9e0a2446b1c2' 

from azureml.widgets import RunDetails

experiment = Experiment(ws, experiment_name)
ml_run = AutoMLRun(experiment=experiment, run_id=run_id)

RunDetails(ml_run).show() 

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'NOTSET', …

### Show best run and model

In [None]:
best_run, fitted_model = ml_run.get_output()
print(best_run)
print(fitted_model)

In [None]:
# show run and model from metric
lookup_metric = "root_mean_squared_error"
best_run, fitted_model = ml_run.get_output(metric = lookup_metric)
print(best_run)
print(fitted_model)

In [None]:
# show run and model from iteration 3
iteration = 3
third_run, third_model = ml_run.get_output(iteration=iteration)
print(third_run)
print(third_model)

### Deploy the Model to AKS

In [49]:
# register the model for deployment
model = Model.register(model_path = "model.pkl",
                       model_name = "model.pkl",
                       tags = {'area': "auto", 'type': "regression"},
                       description = "Contoso Auto model to predict battery failure",
                       workspace = ws)

print(model.name, model.description, model.version)

Registering model model.pkl
model.pkl Contoso Auto model to predict battery failure 4


### Create Scoring File

In [79]:
%%writefile score.py
import pickle
import json
import numpy
import azureml.train.automl
from sklearn.externals import joblib
from azureml.core.model import Model

def init():
    global model
    model_path = Model.get_model_path('model.pkl') # this name is model.id of model that we want to deploy
    # deserialize the model file back into a sklearn model
    model = joblib.load(model_path)

def run(rawdata):
    try:
        data = json.loads(rawdata)['data']
        data = numpy.array(data)
        result = model.predict(data)
    except Exception as e:
        result = str(e)
        return json.dumps({"error": result})
    return json.dumps({"result":result.tolist()})

Overwriting score.py


### Create Environment Dependency File

In [59]:
from azureml.core.conda_dependencies import CondaDependencies 

myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'],pip_packages=['azureml-sdk[notebooks,automl]'])
print(myenv.serialize_to_string())

with open("myenv.yml","w") as f:
    f.write(myenv.serialize_to_string())

# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
  - azureml-sdk[notebooks,automl]==0.1.80
- numpy
- scikit-learn



### Create Container Image

In [80]:
from azureml.core.image import ContainerImage

image_config = ContainerImage.image_configuration(execution_script = "score.py",
                                                  runtime = "python",
                                                  conda_file = "myenv.yml",
                                                  description = "Image with regression model",
                                                  tags = {'area': "auto", 'type': "regression"}
                                                 )

image = ContainerImage.create(name = "myimage1",
                              # this is the model object
                              models = [model],
                              image_config = image_config,
                              workspace = ws)

image.wait_for_creation(show_output = True)

Creating image
Running..................................................
SucceededImage creation operation finished for image myimage1:11, operation "Succeeded"


In [61]:
print(model.name, model.description, model.version)

model.pkl Contoso Auto model to predict battery failure 4


### Create AKS Compute Cluster

In [18]:
try:
    # attach to existing cluster
    aks_target = AksCompute.attach_configuration(resource_group,aks_cluster_name)
    print('Found existing compute target.')

except ComputeTargetException:
    print('Creating a new compute target...')
    
    # Use the default configuration (can also provide parameters to customize)
    prov_config = AksCompute.provisioning_configuration()
    
    # Create the cluster
    aks_target = ComputeTarget.create(workspace = ws, 
                                  name = aks_cluster_name, 
                                  provisioning_configuration = prov_config)
    
    aks_target.wait_for_completion(True)

Found existing compute target.


In [63]:
%%time
aks_target.wait_for_completion(show_output = True)
print(aks_target.provisioning_state)
print(aks_target.provisioning_errors)

SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
None
CPU times: user 132 ms, sys: 19.2 ms, total: 151 ms
Wall time: 1.77 s


### Activate Data Collection and App Insights

In [34]:
aks_config = AksWebservice.deploy_configuration(collect_model_data=True, enable_app_insights=True)

### Deploy AKS Service

In [81]:
%%time
aks_service_name ='aks-automl-service'

aks_service = Webservice.deploy_from_image(workspace = ws, 
                                           name = aks_service_name,
                                           image = image,
                                           deployment_config = aks_config,
                                           deployment_target = aks_target
                                           )
aks_service.wait_for_deployment(show_output = True)
print(aks_service.state)

Creating service
Running..........
SucceededAKS service creation operation finished, operation "Succeeded"
Healthy
CPU times: user 2.91 s, sys: 1.99 s, total: 4.91 s
Wall time: 1min 35s


### Let's send some data to our deployed model for scoring

In [33]:
aks_service_name ='aml-deployment'
aks_service = Webservice(ws,aks_service_name)

In [35]:
%%time
import json

test_sample = json.dumps({'data': [
    ['IL',11.84838,3.949458,4.838391,1.209598,275,'10/01/18 16:35',0.9831527,'TRUE',0.7995941,-8.11275,-0.4847943,0.4920518,-0.5428199,-2.321156,0.8585413,-0.3123408,0.7219636,1.097728,2.26926,-0.05967173,-0.5832617,0.8348302,-1.446835,-0.2638084,-0.7817461,-0.4884371,0.6024499,0.4706367,-0.8506926,0.3336521,-1.787212,-5.35282,-1.46283,-1.139778,-1.623536,0.9576696,-1.38004,-2.415984,-1.171993,1.130002,-0.8953804,0.7990591,-0.8824322,0.650575,0.01930443,0.2998053,-0.02662569,-2.049862,-1.750986,0.9513005,-1.56352,-4.504795,-1.260962,0.6667178,-1.560586,1.16729,1.19256,-0.7794458,0.671654,0.9005552,-1.588768,0.6133893,-1.343587,-0.4241357,-1.05026,-0.7518107,-0.6202381,-1.411967],
    ['DE',14.17665,4.725551,4.672841,1.16821,200,'30/09/04 9:55',0.8730668,'TRUE',18.80838,24.92621,19.49836,24.74844,17.68516,22.93362,25.37442,26.52723,23.5315,24.68806,26.10528,26.17741,23.21661,26.121,25.84424,27.08594,22.64841,24.81732,19.15317,24.85756,23.57444,23.50925,22.27799,26.55124,21.67293,25.02146,26.4417,27.2171,23.92386,22.08664,20.96359,25.51439,22.23584,27.07054,14.85068,20.55521,17.95652,24.07309,18.34271,24.09618,16.89754,22.32475,19.02732,18.38464,16.03233,20.88918,23.68929,24.01699,20.38528,18.55932,20.25734,21.38462,21.53481,22.67359,13.89645,19.47617,17.84697,20.89151,18.58493,23.48934]
]})
test_sample = bytes(test_sample,encoding = 'utf8')

prediction = aks_service.run(input_data = test_sample)
print(prediction)

{"result": [1713.0575009811266, 1187.30301976508]}
CPU times: user 756 ms, sys: 989 ms, total: 1.75 s
Wall time: 1.42 s
