In [1]:
from azureml.core import Workspace, Experiment, ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration

ws = ws = Workspace.from_config()
exp = Experiment(workspace=ws, name='udacity-project')

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-151893
Azure region: southcentralus
Subscription id: 6971f5ac-8af1-446e-8034-05acea24681f
Resource group: aml-quickstarts-151893


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
amlcompute_cluster_name = "cpu-cluster"

# Verify that cluster does not exist already
try:
    aml_compute = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    aml_compute = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

aml_compute.wait_for_completion(show_output=True)

Creating......
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [3]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.pipeline.steps import HyperDriveStep, HyperDriveStepRun, PythonScriptStep
from azureml.pipeline.core import Pipeline, PipelineData, TrainingOutput
%run train
import os

# Specify parameter sampler
ps = RandomParameterSampling({'--learning-rate':uniform(-6,-1)})

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2,slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")
    
#Create the data store

datastore = ws.get_default_datastore()

# Create a SKLearn estimator for use with train.py
#est = SKLearn(source_directory, *, compute_target=aml_compute, vm_size=None, vm_priority=None, entry_script=None)

# Running ScriptRun instead of estimator because estimator is depracated
src = ScriptRunConfig(source_directory='.',script='train.py')

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.

hd_config= HyperDriveConfig(run_config=src,hyperparameter_sampling=ps,policy=policy,primary_metric_name='validation_acc',primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,max_total_runs=200)

#I made hyperdrive config runs>Logistic Regression runs

Attempted to log scalar metric Regularization Strength::
1.0
Attempted to log scalar metric Max iterations::
1000
Attempted to log scalar metric Accuracy:
0.9045523520485584


In [7]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

#Creating data folder

data_folder = ds.as_named_input(name='bank_marketing')

metrics_output_name = 'metrics_output'
metrics_data = PipelineData(name='metrics_data',
                            datastore=datastore,
                            pipeline_output_name=metrics_output_name,
                            training_output=TrainingOutput("Metrics"))

model_output_name = 'model_output'
saved_model = PipelineData(name='saved_model',
                            datastore=datastore,
                            pipeline_output_name=model_output_name,
                            training_output=TrainingOutput("Model",
                                                           model_file="outputs/model/saved_model.pb"))

hd_step_name='hd_step01'
hd_step = HyperDriveStep(
    name=hd_step_name,
    hyperdrive_config=hd_config,
    inputs=[data_folder],
    outputs=[metrics_data, saved_model])

In [13]:
conda_dep = CondaDependencies()
conda_dep.add_pip_package("azureml-sdk")

rcfg = RunConfiguration(conda_dependencies=conda_dep)

register_model_step = PythonScriptStep(script_name='register_model.py',
                                       name='register_model_step01',
                                       inputs=[saved_model],
                                       compute_target=amlcompute_cluster_name,
                                       arguments=["--saved-model", saved_model],
                                       allow_reuse=True,
                                       runconfig=rcfg)

register_model_step.run_after(hd_step)

pipeline = Pipeline(workspace=ws, steps=[hd_step, register_model_step])
pipeline_run = exp.submit(pipeline)

RunDetails(pipeline_run).show()

Created step hd_step01 [e44a20cd][6aa1566d-65d8-493c-b104-a2af930d7b11], (This step is eligible to reuse a previous run's output)Created step register_model_step01 [a1b3c771][ddae8733-2601-4481-9148-98f24d2797bf], (This step is eligible to reuse a previous run's output)

Submitted PipelineRun 9b96231e-f2af-4303-abe5-4befedb3c460
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/9b96231e-f2af-4303-abe5-4befedb3c460?wsid=/subscriptions/6971f5ac-8af1-446e-8034-05acea24681f/resourcegroups/aml-quickstarts-151893/workspaces/quick-starts-ws-151893&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [None]:
import joblib
# Get your best run and save the model from that run.

### YOUR CODE HERE ###

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###

In [None]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(### YOUR DATA OBJECT HERE ###)

In [None]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task=,
    primary_metric=,
    training_data=,
    label_column_name=,
    n_cross_validations=)

In [2]:
# Submit your automl run

### YOUR CODE HERE ###

In [None]:
# Retrieve and save your best automl model.

### YOUR CODE HERE ###