## Check Azure ML SDK version

In [41]:
from azureml.core import ComputeTarget, Dataset, Datastore, Experiment, Workspace
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration

from azureml.pipeline.core import Pipeline, PipelineData, TrainingOutput
from azureml.pipeline.core.graph import PipelineParameter
from azureml.pipeline.steps import AutoMLStep, PythonScriptStep

from azureml.train.automl import AutoMLConfig

import os

In [42]:
import azureml.core
print("This notebook was created and tested using version 1.3.0 of the Azure ML SDK")
print("You are currently using version", azureml.core.VERSION, "of the Azure ML SDK")

This notebook was created and tested using version 1.3.0 of the Azure ML SDK
You are currently using version 1.3.0 of the Azure ML SDK


## Retrieve initial dataset

In [43]:
from azureml.core import Workspace, Dataset

ws = Workspace.from_config()
if not 'titanic_ds' in ws.datasets.keys() :
    # create a TabularDataset from Titanic training data
    web_paths = ['https://dprepdata.blob.core.windows.net/demo/Titanic.csv',
                 'https://dprepdata.blob.core.windows.net/demo/Titanic2.csv']
    titanic_ds = Dataset.Tabular.from_delimited_files(path=web_paths)

    titanic_ds.register(workspace = ws,
                                     name = 'titanic_ds',
                                     description = 'Titanic baseline data',
                                     create_new_version = True)

titanic_ds = Dataset.get_by_name(ws, 'titanic_ds')

## Configure your storage and compute target

In [44]:
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core import Datastore

datastore = ws.get_default_datastore()

compute_name = 'cpu-compute'
if not compute_name in ws.compute_targets :
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                                min_nodes=0,
                                                                max_nodes=1)
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20)

    # Show the result
    print(compute_target.get_status().serialize())

compute_target = ws.compute_targets[compute_name]

The intermediate data between the data preparation and the automated ML step can be stored in the workspace's default datastore (object 'datastore' in the notebook), so we don't need to do more than call get_default_datastore() on the Workspace object.

### Configure the training run

The next step is making sure that the remote training run has all the dependencies that are required by the training steps. Dependencies and the runtime context are set by creating and configuring a RunConfiguration object.

In [45]:
from azureml.core.runconfig import RunConfiguration, CondaDependencies

aml_run_config = RunConfiguration()
# Use just-specified compute target ("cpu-compute")
aml_run_config.target = compute_target
aml_run_config.environment.python.user_managed_dependencies = False

# Add some packages relied on by data prep step
aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(
    conda_packages=['pandas','scikit-learn'], 
    pip_packages=['azureml-sdk[automl,explain]', 'azureml-dataprep[fuse,pandas]'], 
    pin_sdk_version=False)

### Prepare data for automated machine learning

#### Write the data preparation code
The baseline Titanic dataset consists of mixed numerical and text data, with some values missing. To prepare it for automated machine learning, the data preparation pipeline step will:

Fill missing data with either random data or a category corresponding to "Unknown"
Transform categorical data to integers
Drop columns that we don't intend to use
Split the data into training and testing sets
Write the transformed data to the PipelineData output paths

In [46]:
%%writefile dataprep.py
# dataprep.py
from azureml.core import Run
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
import argparse
import pyarrow as pa
import pyarrow.parquet as pq

RANDOM_SEED=42

def prepare_age(df):
    # Fill in missing Age values from distribution of present Age values 
    mean = df["Age"].mean()
    std = df["Age"].std()
    is_null = df["Age"].isnull().sum()
    # compute enough (== is_null().sum()) random numbers between the mean, std
    rand_age = np.random.randint(mean - std, mean + std, size = is_null)
    # fill NaN values in Age column with random values generated
    age_slice = df["Age"].copy()
    age_slice[np.isnan(age_slice)] = rand_age
    df["Age"] = age_slice
    df["Age"] = df["Age"].astype(int)
    
    # Quantize age into 5 classes
    df['Age_Group'] = pd.qcut(df['Age'],5, labels=False)
    df.drop(['Age'], axis=1, inplace=True)
    return df

def prepare_fare(df):
    df['Fare'].fillna(0, inplace=True)
    df['Fare_Group'] = pd.qcut(df['Fare'],5,labels=False)
    df.drop(['Fare'], axis=1, inplace=True)
    return df 

def prepare_genders(df):
    genders = {"male": 0, "female": 1, "unknown": 2}
    df['Sex'] = df['Sex'].map(genders)
    df['Sex'].fillna(2, inplace=True)
    df['Sex'] = df['Sex'].astype(int)
    return df

def prepare_embarked(df):
    df['Embarked'].replace('', 'U', inplace=True)
    df['Embarked'].fillna('U', inplace=True)
    ports = {"S": 0, "C": 1, "Q": 2, "U": 3}
    df['Embarked'] = df['Embarked'].map(ports)
    return df
    
parser = argparse.ArgumentParser()
parser.add_argument('--output_path', dest='output_path', required=True)
args = parser.parse_args()
    
titanic_ds = Run.get_context().input_datasets['titanic_ds']
df = titanic_ds.to_pandas_dataframe().drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
df = prepare_embarked(prepare_genders(prepare_fare(prepare_age(df))))

os.makedirs(os.path.dirname(args.output_path), exist_ok=True)
pq.write_table(pa.Table.from_pandas(df), args.output_path)

print(f"Wrote test to {args.output_path} and train to {args.output_path}")

Overwriting dataprep.py


The code parses the input argument, which is the path to which we want to write our data. (These values will be determined by PipelineData objects that will be discussed in the next step.) The code retrieves the registered 'titanic_cs' Dataset and calls the various data preparation functions.

The code uses mkdirs to create the directory for the output data file (args.output_path) and then writes the datasets as a Parquet file at that destination.

#### Write the data preparation pipeline step (PythonScriptStep)

The data preparation code described above must be associated with a PythonScripStep object in order to be used with a pipeline. The path to which the Parquet data-preparation output is written is generated by a PipelineData object. The resources prepared earlier, such as the ComputeTarget, the RunConfig, and the 'titanic_ds' Dataset are used to complete the specification.

In [47]:
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep

prepped_data_path = PipelineData("titanic_train", datastore).as_dataset()

dataprep_step = PythonScriptStep(
    name="dataprep", 
    script_name="dataprep.py", 
    compute_target=compute_target, 
    runconfig=aml_run_config,
    arguments=["--output_path", prepped_data_path],
    inputs=[titanic_ds.as_named_input("titanic_ds")],
    outputs=[prepped_data_path],
    allow_reuse=True
)

The prepped_data_path object is of type PipelineOutputFileDataset. Notice that it is specified in both the arguments and outputs arguments. If you review the previous step, you'll see that within the data preparation code, the value of the argument '--output_path' is the file path to which the Parquet file was written.

### Train with AutoMLStep

Configuring an automated ML pipeline step is done with the AutoMLConfig class. This flexible class is described in Configure automated ML experiments in Python. Data input and output are the only aspects of configuration that require special attention in an ML pipeline. Input and output for AutoMLConfig in pipelines is discussed in detail below. Beyond data, an advantage of ML pipelines is the ability to use different compute targets for different steps. You might choose to use a more powerful ComputeTarget only for the automated ML process. Doing so is as straightforward as assigning a more powerful RunConfiguration to the AutoMLConfig object's run_configuration parameter.

#### Send data to AutoMLStep

As discussed above, configuring input to your automated ML step requires the use of certain configurations. In an ML pipeline, you must provide your data using an `X,y` technique and cannot use the `training_data` technique. You may provide all your data in `X` and `y` and use `n_cross_validations` or you may provide your own validation data in `X_valid` and `y_valid` and leave `n_cross_validations` to the default `None` value.

In an ML pipeline, the input data must be a Dataset object. The highest-performing way is to provide the input data in the form of `PipelineOutputTabularDataset` objects. You create an object of that type with the `parse_parquet_files()` or `parse_delimited_files()` on a `PipelineOutputFileDataset`, such as the `prepped_data_path` object.

In [48]:
# type(prepped_data_path) == PipelineOutputFileDataset
# type(prepped_data_potds) == PipelineOutputTabularDataset
prepped_data_potds = prepped_data_path.parse_parquet_files(file_extension=None)

X = prepped_data_potds.drop_columns('Survived')
y = prepped_data_potds.keep_columns('Survived')

#### Specify automated ML outputs

The outputs of the `AutoMLStep` are the final metric scores of the higher-performing model and that model itself. To use these outputs in further pipeline steps, prepare `PipelineData` objects to receive them.

**** What's the difference between this line:

dstor = Datastore.get_default(ws)

and the initial one that we also got a default Datastore?:

datastore = ws.get_default_datastore()

Can't we use the same DataStore object? (Confirm)

In [49]:
from azureml.pipeline.core import TrainingOutput

dstor = Datastore.get_default(ws)

metrics_data = PipelineData(name='metrics_data',
                           datastore=dstor,
                           pipeline_output_name='metrics_output',
                           training_output=TrainingOutput(type='Metrics'))
model_data = PipelineData(name='best_model_data',
                           datastore=dstor,
                           pipeline_output_name='model_output',
                           training_output=TrainingOutput(type='Model'))

The snippet above assigns the default datastore of the workspace to `dstor`. Then, it creates the two `PipelineData` objects for the metrics and model output. Each is named, assigned `dstor` as the datastore on which the output will be stored, and associated with the particular `type` of `TrainingOutput` from the `AutoMLStep`.

###  Configure and create the automated ML pipeline step

In [50]:
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.steps import AutoMLStep

# Change timeouts and increase iterations to a reasonable number (e.g., 50) for better accuracy
automl_settings = {
    "iteration_timeout_minutes" : 10,
    "iterations" : 2,
    "experiment_timeout_hours" : 0.25,
    "primary_metric" : 'AUC_weighted',
    "n_cross_validations" : 3
}

automl_config = AutoMLConfig(task = 'classification',
                             path = '.',
                             debug_log = 'automated_ml_errors.log',
                             compute_target = compute_target,
                             run_configuration = aml_run_config,
                             featurization = 'auto',
                             X = X,
                             y = y,
                             **automl_settings)

train_step = AutoMLStep(name='AutoML_Classification',
                                 automl_config=automl_config,
                                 passthru_automl_config=False,
                                 outputs=[metrics_data,model_data],
                                 allow_reuse=True)



### Register the model generated by automated ML

In [51]:
%%writefile register_model.py

# register_model.py
from azureml.core.model import Model, Dataset
from azureml.core.run import Run, _OfflineRun
from azureml.core import Workspace
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--model_name", required=True)
parser.add_argument("--model_path", required=True)
args = parser.parse_args()

print(f"model_name : {args.model_name}")
print(f"model_path: {args.model_path}")

run = Run.get_context()
ws = Workspace.from_config() if type(run) == _OfflineRun else run.experiment.workspace

model = Model.register(workspace=ws,
                       model_path=args.model_path,
                       model_name=args.model_name)

print("Registered version {0} of model {1}".format(model.version, model.name))

Overwriting register_model.py


#### Write the PythonScriptStep code

In [52]:
from azureml.pipeline.core.graph import PipelineParameter

# The model name with which to register the trained model in the workspace.
model_name = PipelineParameter("model_name", default_value="TitanicSurvivalInitial")

register_step = PythonScriptStep(script_name="register_model.py",
                                       name="register_model",
                                       allow_reuse=False,
                                       arguments=["--model_name", model_name, "--model_path", model_data],
                                       inputs=[model_data],
                                       compute_target=compute_target,
                                       runconfig=aml_run_config)

### Create and run your automated ML pipeline

In [53]:
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(ws, [dataprep_step, train_step, register_step])

In [54]:
from azureml.core import Experiment

experiment = Experiment(workspace=ws, 
                        name='titanic_automl')

run = experiment.submit(pipeline, show_output=True)
run.wait_for_completion()

Created step dataprep [6ff81186][788c41ac-97c4-41aa-b515-3acc286f1991], (This step will run and generate new outputs)
Created step AutoML_Classification [7dd2316e][f2632b19-420a-4a79-81fa-2a79e0832ff1], (This step will run and generate new outputs)
Created step register_model [3e7e3955][17635acc-c022-4039-a432-c223530b39fd], (This step will run and generate new outputs)
Submitted PipelineRun 9f9f92ad-c088-4e69-aba8-6a7ff6c354d0
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/titanic_automl/runs/9f9f92ad-c088-4e69-aba8-6a7ff6c354d0?wsid=/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourcegroups/cesardl-automl-ncentralus-demo-ws-resgrp/workspaces/cesardl-automl-ncentralus-demo-ws
PipelineRunId: 9f9f92ad-c088-4e69-aba8-6a7ff6c354d0
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/titanic_automl/runs/9f9f92ad-c088-4e69-aba8-6a7ff6c354d0?wsid=/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourcegroups/cesardl-automl-ncentralus-de

[91m
mkl-2019.4           | 204.1 MB  | ########8  |  88% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  88% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  88% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########9  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########9  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########9  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########9  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########9  |  


scipy-1.4.1          | 18.9 MB   |            |   0% [0m[91m
scipy-1.4.1          | 18.9 MB   | 2          |   3% [0m[91m
scipy-1.4.1          | 18.9 MB   | 3          |   4% [0m[91m
scipy-1.4.1          | 18.9 MB   | ####1      |  42% [0m[91m
scipy-1.4.1          | 18.9 MB   | #######5   |  75% [0m[91m
scipy-1.4.1          | 18.9 MB   | #########  |  90% [0m[91m
scipy-1.4.1          | 18.9 MB   | ########## | 100% [0m[91m

readline-7.0         | 387 KB    |            |   0% [0m[91m
readline-7.0         | 387 KB    | #########8 |  98% [0m[91m
readline-7.0         | 387 KB    | ########## | 100% [0m[91m

pandas-1.0.3         | 11.1 MB   |            |   0% [0m[91m
pandas-1.0.3         | 11.1 MB   | ###6       |  36% [0m[91m
pandas-1.0.3         | 11.1 MB   | #######5   |  75% [0m[91m
pandas-1.0.3         | 11.1 MB   | #########  |  91% [0m[91m
pandas-1.0.3         | 11.1 MB   | ########## | 100% [0m[91m

libstdcxx-ng-9.1.0   | 4.0 MB    |            |   0

  Downloading isodate-0.6.0-py2.py3-none-any.whl (45 kB)
Collecting requests-oauthlib>=0.5.0
  Downloading requests_oauthlib-1.3.0-py2.py3-none-any.whl (23 kB)
Collecting chardet<4,>=3.0.2
  Downloading chardet-3.0.4-py2.py3-none-any.whl (133 kB)
Collecting idna<3,>=2.5
  Downloading idna-2.9-py2.py3-none-any.whl (58 kB)
Collecting importlib-metadata
  Downloading importlib_metadata-1.6.0-py2.py3-none-any.whl (30 kB)
Collecting cffi!=1.11.3,>=1.8
  Downloading cffi-1.14.0-cp36-cp36m-manylinux1_x86_64.whl (399 kB)
Collecting jeepney>=0.4.2
  Downloading jeepney-0.4.3-py3-none-any.whl (21 kB)
Collecting backports.weakref
  Downloading backports.weakref-1.0.post1-py2.py3-none-any.whl (5.2 kB)
Collecting applicationinsights
  Downloading applicationinsights-0.11.9-py2.py3-none-any.whl (58 kB)
Collecting flake8<=3.7.9,>=3.1.0; python_version >= "3.6"
  Downloading flake8-3.7.9-py2.py3-none-any.whl (69 kB)
Collecting azureml-train-restclients-hyperdrive~=1.4.0
  Downloading azureml_train_res

  Created wheel for dill: filename=dill-0.3.1.1-py3-none-any.whl size=78530 sha256=fc315156f26ec28027a5eea77e89968382317a937b621a65a54ac95bf99dcf74
  Stored in directory: /root/.cache/pip/wheels/09/84/74/d2b4feb9ac9488bc83c475cb2cbe8e8b7d9cea8320d32f3787
  Building wheel for py-cpuinfo (setup.py): started
  Building wheel for py-cpuinfo (setup.py): finished with status 'done'
  Created wheel for py-cpuinfo: filename=py_cpuinfo-5.0.0-py3-none-any.whl size=18682 sha256=6bf53ad7cfd7666dbb7accb7b7bf976024ce7b9db575cb6f7ba1915dfa7c8ad6
  Stored in directory: /root/.cache/pip/wheels/e6/54/db/65176a1697a583d8ec5f90510f6faab11cda739d0e4f0ba2ea
  Building wheel for smart-open (setup.py): started
  Building wheel for smart-open (setup.py): finished with status 'done'
  Created wheel for smart-open: filename=smart_open-1.9.0-py3-none-any.whl size=73085 sha256=a51fcd8dc907569de9537d369f655210724e5cfd5ce7c32f5dd98ed137995260
  Stored in directory: /root/.cache/pip/wheels/8e/9f/cd/dbf5c1362c59abb699

[91m
[0m#
# To activate this environment, use:
# > source activate /azureml-envs/azureml_a202ddab594f378a0a5fa176c4dadf6e
#
# To deactivate an active environment, use:
# > source deactivate
#


Removing intermediate container 733c6abfa36c
 ---> 3c66b73be182
Step 9/15 : ENV PATH /azureml-envs/azureml_a202ddab594f378a0a5fa176c4dadf6e/bin:$PATH
 ---> Running in 9fa1d82836fc
Removing intermediate container 9fa1d82836fc
 ---> 9ce0505b47ad
Step 10/15 : ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/azureml_a202ddab594f378a0a5fa176c4dadf6e
 ---> Running in 8eb5c17a9e9f
Removing intermediate container 8eb5c17a9e9f
 ---> 499989e2be2d
Step 11/15 : ENV LD_LIBRARY_PATH /azureml-envs/azureml_a202ddab594f378a0a5fa176c4dadf6e/lib:$LD_LIBRARY_PATH
 ---> Running in 5982248aeeb2
Removing intermediate container 5982248aeeb2
 ---> 62b3f20144ad
Step 12/15 : COPY azureml-environment-setup/spark_cache.py azureml-environment-setup/log4j.properties /azureml-environment-setup/
 ---> e575956206dc
Step 13/15 


Streaming azureml-logs/75_job_post-tvmps_e500d72f3653bd93798b2ddbb4794a8c9a5413486dd0711ca01fc9d124136621_d.txt
Starting job release. Current time:2020-04-30T04:08:14.654475
Logging experiment finalizing status in history service.
Starting the daemon thread to refresh tokens in background for process with pid = 349
Job release is complete. Current time:2020-04-30T04:08:16.269368

StepRun(dataprep) Execution Summary
StepRun( dataprep ) Status: Finished

This compute target type doesn't support non-Docker runs; overriding run configuration to enable Docker.






StepRunId: d1868f39-9984-4a87-b652-47a763b6238f
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/titanic_automl/runs/d1868f39-9984-4a87-b652-47a763b6238f?wsid=/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourcegroups/cesardl-automl-ncentralus-demo-ws-resgrp/workspaces/cesardl-automl-ncentralus-demo-ws
StepRun( AutoML_Classification ) Status: NotStarted
StepRun( AutoML_Classification ) Status: Running

StepRun(AutoML_Classification) Execution Summary
StepRun( AutoML_Classification ) Status: Finished
{'runId': 'd1868f39-9984-4a87-b652-47a763b6238f', 'target': 'cpu-compute', 'status': 'Completed', 'startTimeUtc': '2020-04-30T04:31:41.955585Z', 'endTimeUtc': '2020-04-30T04:35:52.950692Z', 'properties': {'azureml.runsource': 'azureml.StepRun', 'ContentSnapshotId': '2aa0cf83-4c53-4f10-ad4e-9b992653a844', 'StepType': 'AutoMLStep', 'azureml.pipelinerunid': '9f9f92ad-c088-4e69-aba8-6a7ff6c354d0', 'num_iterations': '2', 'training_type': 'TrainFull', 'acquisitio




StepRunId: 90ad1c09-113c-4c58-8742-837027e59913
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/titanic_automl/runs/90ad1c09-113c-4c58-8742-837027e59913?wsid=/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourcegroups/cesardl-automl-ncentralus-demo-ws-resgrp/workspaces/cesardl-automl-ncentralus-demo-ws
StepRun( register_model ) Status: NotStarted
StepRun( register_model ) Status: Running

Streaming azureml-logs/20_image_build_log.txt
2020/04/30 04:36:27 Downloading source code...
2020/04/30 04:36:28 Finished downloading source code
2020/04/30 04:36:28 Creating Docker network: acb_default_network, driver: 'bridge'
2020/04/30 04:36:29 Successfully set up Docker network: acb_default_network
2020/04/30 04:36:29 Setting up Docker configuration...
2020/04/30 04:36:29 Successfully set up Docker configuration
2020/04/30 04:36:29 Logging in to registry: cesardlautoma5f87185.azurecr.io
2020/04/30 04:36:30 Successfully logged into cesardlautoma5f87185.azurecr.io


[91m
mkl-2019.4           | 204.1 MB  | ########1  |  81% [0m[91m
mkl-2019.4           | 204.1 MB  | ########3  |  84% [0m[91m
mkl-2019.4           | 204.1 MB  | ########5  |  85% [0m[91m
mkl-2019.4           | 204.1 MB  | ########6  |  86% [0m[91m
mkl-2019.4           | 204.1 MB  | ########6  |  87% [0m[91m
mkl-2019.4           | 204.1 MB  | ########7  |  87% [0m
[91m
mkl-2019.4           | 204.1 MB  | ########7  |  88% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  88% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  88% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########8  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########9  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########9  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########9  |  89% [0m[91m
mkl-2019.4           | 204.1 MB  | ########9  | 


gcc_impl_linux-64-7. | 73.2 MB   |            |   0% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | 4          |   5% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | #3         |  14% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | ##2        |  22% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | ##9        |  30% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | ###5       |  35% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | ####5      |  46% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | #####3     |  53% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | ######1    |  62% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | #######    |  70% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | #######8   |  78% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | ########4  |  84% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | ########8  |  88% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | #########  |  91% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | #########3 |  93% [0m[91m
gcc_impl_linux-64-7. | 73.2 MB   | #########4 |  95% 


pandas-0.23.4        | 10.1 MB   |            |   0% [0m[91m
pandas-0.23.4        | 10.1 MB   | #######    |  70% [0m[91m
pandas-0.23.4        | 10.1 MB   | #########1 |  91% [0m[91m
pandas-0.23.4        | 10.1 MB   | ########## | 100% [0m[91m

py-xgboost-0.80      | 1.7 MB    |            |   0% [0m[91m
py-xgboost-0.80      | 1.7 MB    | #######7   |  77% [0m[91m
py-xgboost-0.80      | 1.7 MB    | #########8 |  99% [0m[91m
py-xgboost-0.80      | 1.7 MB    | ########## | 100% [0m[91m

_py-xgboost-mutex-2. | 9 KB      |            |   0% [0m[91m
_py-xgboost-mutex-2. | 9 KB      | ########## | 100% [0m[91m

freetype-2.9.1       | 822 KB    |            |   0% [0m[91m
freetype-2.9.1       | 822 KB    | ########9  |  89% [0m[91m
freetype-2.9.1       | 822 KB    | ########## | 100% [0m[91m

holidays-0.9.11      | 39 KB     |            |   0% [0m[91m
holidays-0.9.11      | 39 KB     | ########## | 100% [0m[91m

tk-8.6.8             | 3.1 MB    |            |  

  Downloading Resource-0.2.1-py2.py3-none-any.whl (25 kB)
Collecting onnx<=1.6.0,>=1.5.0
  Downloading onnx-1.6.0-cp36-cp36m-manylinux1_x86_64.whl (4.8 MB)
Collecting gensim
  Downloading gensim-3.8.2-cp36-cp36m-manylinux1_x86_64.whl (24.2 MB)
Collecting wheel==0.30.0
  Downloading wheel-0.30.0-py2.py3-none-any.whl (49 kB)
Collecting azureml-defaults~=1.4.0
  Downloading azureml_defaults-1.4.0-py3-none-any.whl (3.0 kB)
Collecting dill>=0.2.8
  Downloading dill-0.3.1.1.tar.gz (151 kB)
Collecting skl2onnx==1.4.9
  Downloading skl2onnx-1.4.9-py2.py3-none-any.whl (114 kB)
Collecting py-cpuinfo
  Downloading py-cpuinfo-5.0.0.tar.gz (82 kB)
Collecting nimbusml>=1.5.0
  Downloading nimbusml-1.7.0-cp36-none-manylinux1_x86_64.whl (116.2 MB)
Collecting pmdarima==1.1.1
  Downloading pmdarima-1.1.1-cp36-cp36m-manylinux1_x86_64.whl (682 kB)
Collecting psutil<6.0.0,>=5.2.2
  Downloading psutil-5.7.0.tar.gz (449 kB)
Collecting interpret-community==0.10.*
  Downloading interpret_community-0.10.2-py3-n

  Created wheel for pyrsistent: filename=pyrsistent-0.16.0-cp36-cp36m-linux_x86_64.whl size=113423 sha256=23b0b2fc7312b51a8feb286edcb75027e5c083aa7ac7946731a19234e5862c7f
  Stored in directory: /root/.cache/pip/wheels/d1/8a/1c/32ab9017418a2c64e4fbaf503c08648bed2f8eb311b869a464
Successfully built fusepy wrapt smart-open dill py-cpuinfo psutil JsonSir JsonForm json-logging-py shap fire PyYAML liac-arff termcolor pyrsistent
Installing collected packages: urllib3, idna, chardet, requests, pycparser, cffi, cryptography, pyopenssl, pyasn1, ndg-httpsclient, isodate, oauthlib, requests-oauthlib, msrest, PyJWT, adal, msrestazure, azure-common, azure-mgmt-authorization, jeepney, SecretStorage, pathspec, ruamel.yaml, zipp, importlib-metadata, jsonpickle, azure-graphrbac, azure-mgmt-containerregistry, azure-mgmt-storage, websocket-client, docker, azure-mgmt-resource, jmespath, contextlib2, azure-mgmt-keyvault, backports.weakref, backports.tempfile, azureml-core, pyflakes, entrypoints, pycodestyle,


Streaming azureml-logs/55_azureml-execution-tvmps_e500d72f3653bd93798b2ddbb4794a8c9a5413486dd0711ca01fc9d124136621_d.txt
2020-04-30T04:49:31Z Starting output-watcher...
2020-04-30T04:49:31Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_4d2abf2a4d077fa8b64770f2e645d085
a1298f4ce990: Already exists
04a3282d9c4b: Already exists
9b0d3db6dc03: Already exists
8269c605f3f1: Already exists
6504d449e70c: Already exists
4e38f320d0d4: Already exists
b0a763e8ee03: Already exists
11917a028ca4: Already exists
a6c378d11cbf: Already exists
6cc007ad9140: Already exists
6c1698a608f3: Already exists
ac090c4c469f: Pulling fs layer
56f9d9a90033: Pulling fs layer
e6b6c6e8d4bf: Pulling fs layer
65abb8fd27c8: Pulling fs layer
2fd341ee11ca: Pulling fs layer
20e7552f074d: Pulling fs layer
65abb8fd27c8: Waiting
2fd341ee11ca: Waiting
20e7552f074d: Waiting
ac090c4c469f: Verifying Checksum
ac090c4c469f: Download complete



PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': '9f9f92ad-c088-4e69-aba8-6a7ff6c354d0', 'status': 'Completed', 'startTimeUtc': '2020-04-30T03:55:57.188277Z', 'endTimeUtc': '2020-04-30T04:53:33.862673Z', 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'SDK', 'azureml.parameters': '{"model_name":"TitanicSurvivalInitial"}'}, 'inputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://cesardlautomln9894098850.blob.core.windows.net/azureml/ExperimentRun/dcid.9f9f92ad-c088-4e69-aba8-6a7ff6c354d0/logs/azureml/executionlogs.txt?sv=2019-02-02&sr=b&sig=ZqvAVDINnkTm%2FmQ5WY4039AYd5HlNQ%2Brcl%2FHFOyj85Q%3D&st=2020-04-30T04%3A43%3A36Z&se=2020-04-30T12%3A53%3A36Z&sp=r', 'logs/azureml/stderrlogs.txt': 'https://cesardlautomln9894098850.blob.core.windows.net/azureml/ExperimentRun/dcid.9f9f92ad-c088-4e69-aba8-6a7ff6c354d0/logs/azureml/stderrlogs.txt?sv=2019-02-02&sr=b&sig=WPQQ6oq7nKaasGsvlQwOW%2BA1NDMDC1fCf2cMZUoA4ho%3D&st

'Finished'

#### Download the results of an automated ML run

In [None]:
# Run on local machine
ws = Workspace.from_config()

experiment = ws.experiments['titanic_automl']
run = next(run for run in ex.get_runs() if run.id == 'aaaaaaaa-bbbb-cccc-dddd-0123456789AB')
automl_run = next(r for r in run.get_children() if r.name == 'AutoML_Classification')
outputs = automl_run.get_outputs()
metrics = outputs['default_metrics_AutoML_Classification']
model = outputs['default_model_AutoML_Classification']

metrics.get_port_data_reference().download('.')
model.get_port_data_reference().download('.')