In [1]:
from azureml.core import Workspace, Experiment, Environment

### INFO 04.04.2021
# PC Name: testpc
# Cluster name:  udacity-project
# with 'Standard_DS2_v2' nodes  min 0, max 4

ws = Workspace.from_config()
myenv = Environment.get(workspace=ws, name="AzureML-Minimal")
ws.get_details()

exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-141953
Azure region: southcentralus
Subscription id: 6b4af8be-9931-443e-90f6-c4c34a1f9737
Resource group: aml-quickstarts-141953


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

### YOUR CODE HERE ###
computer_cluster_name = "udacity-project" # see info

try:
    compute_target = ComputeTarget(workspace=ws, name=computer_cluster_name)
    print("Found existing cluster, use this cluster that was found.")
except:
    print("Creating new cluster...")
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS2_v2', max_nodes=4)
    compute_target = ComputeTarget.create(ws, computer_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

print(compute_target.get_status().serialize())



Found existing cluster, use this cluster that was found.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-04-04T19:02:32.930000+00:00', 'errors': None, 'creationTime': '2021-04-04T17:56:40.006307+00:00', 'modifiedTime': '2021-04-04T17:56:55.539252+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_DS2_V2'}


In [3]:
compute_targets = ws.compute_targets
for compute_name, compute_type in compute_targets.items():
    print(compute_name, compute_type.type, compute_type.provisioning_state)

testpc ComputeInstance Succeeded
udacity-project AmlCompute Succeeded


In [4]:
from azureml.widgets import RunDetails
from azureml.core.experiment import Experiment
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.core.script_run_config import ScriptRunConfig
import os
import shutil

experiment = Experiment(ws, 'quick-starts-ws-140195')

#cluster = Workspace.compute_targets['udacity-project']

# Specify parameter sampler
ps = RandomParameterSampling({
    "--C" : choice(0.01, 0.1, 1.2,1.5),
    "--max_iter" : choice(20, 40, 60, 100,150,200,250)
})

# Specify a Policy
policy = BanditPolicy(slack_factor=0.12, evaluation_interval=1, delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training")

script_folder = './training'
os.makedirs(script_folder, exist_ok=True)

shutil.copy('./train.py', script_folder)

# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory = script_folder, entry_script='train.py', compute_target=compute_target, vm_size="Standard_DS2_v2", vm_priority="lowpriority")

#script_run_config.script_run_config.target = cluster

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(hyperparameter_sampling = ps,
                                    primary_metric_name = 'Accuracy',
                                    primary_metric_goal= PrimaryMetricGoal.MAXIMIZE,
                                    max_total_runs=5,
                                    policy=policy,
                                    estimator= est)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [5]:
from azureml.widgets import RunDetails
from azureml.core.experiment import Experiment

# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###

hyperdrive_run = exp.submit(config=hyperdrive_config)

RunDetails(hyperdrive_run).show()

#hyperdrive_run.get_status()

hyperdrive_run.wait_for_completion(show_output=True)







_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_efdb95c8-50e1-4b6d-847d-8e64cd245ff5
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_efdb95c8-50e1-4b6d-847d-8e64cd245ff5?wsid=/subscriptions/6b4af8be-9931-443e-90f6-c4c34a1f9737/resourcegroups/aml-quickstarts-141953/workspaces/quick-starts-ws-141953

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-04-04T19:17:32.130033][API][INFO]Experiment created<END>\n""<START>[2021-04-04T19:17:33.120688][GENERATOR][INFO]Trying to sample '5' jobs from the hyperparameter space<END>\n""<START>[2021-04-04T19:17:33.315984][GENERATOR][INFO]Successfully sampled '5' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-04-04T19:17:33.6490497Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_efdb95c8-50e1-4b6d-847d-8e64cd245ff5
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_efdb95c8-50e1-4b6d-847d-8e64cd245ff5?wsid=/subscriptions/6b4af

{'runId': 'HD_efdb95c8-50e1-4b6d-847d-8e64cd245ff5',
 'target': 'udacity-project',
 'status': 'Completed',
 'startTimeUtc': '2021-04-04T19:17:31.943336Z',
 'endTimeUtc': '2021-04-04T19:24:10.844706Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'f97a1d5a-da65-4ac1-a312-9aeb475fb170',
  'score': '0.9113808801213961',
  'best_child_run_id': 'HD_efdb95c8-50e1-4b6d-847d-8e64cd245ff5_2',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg141953.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_efdb95c8-50e1-4b6d-847d-8e64cd245ff5/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=97B3WwgQIf1zV4Gdu0VXJBGbpVQgCeVtR0U5dVMOIzE%3D&st=2021-04-04T19%3A14%3A20Z&se=2021-04-05T03%3A24%3A20Z&sp=r'},
 'submittedBy': 'ODL_User 141953'}

In [6]:
import joblib
# Get your best run and save the model from that run.

### YOUR CODE HERE ###

hyperdrive_best_run = hyperdrive_run.get_best_run_by_primary_metric()

print("best run details :", hyperdrive_best_run.get_details())
print('\n')
print("best run file names :", hyperdrive_best_run.get_file_names())
print('\n')
print("best run metrics :", hyperdrive_best_run.get_metrics())

model = hyperdrive_best_run.register_model(model_name='hyperdrive_log_reg_best_run', 
model_path='outputs/model.joblib', tags={"Method" : "HyperDrive"},
properties={"Accuracy" : hyperdrive_best_run.get_metrics()["Accuracy"]})



best run details : {'runId': 'HD_efdb95c8-50e1-4b6d-847d-8e64cd245ff5_2', 'target': 'udacity-project', 'status': 'Completed', 'startTimeUtc': '2021-04-04T19:21:13.095467Z', 'endTimeUtc': '2021-04-04T19:22:43.317161Z', 'properties': {'_azureml.ComputeTargetType': 'amlcompute', 'ContentSnapshotId': 'f97a1d5a-da65-4ac1-a312-9aeb475fb170', 'ProcessInfoFile': 'azureml-logs/process_info.json', 'ProcessStatusFile': 'azureml-logs/process_status.json'}, 'inputDatasets': [], 'outputDatasets': [], 'runDefinition': {'script': 'train.py', 'command': '', 'useAbsolutePath': False, 'arguments': ['--C', '0.01', '--max_iter', '40'], 'sourceDirectoryDataStore': None, 'framework': 'Python', 'communicator': 'None', 'target': 'udacity-project', 'dataReferences': {}, 'data': {}, 'outputData': {}, 'jobName': None, 'maxRunDurationSeconds': None, 'nodeCount': 1, 'priority': None, 'credentialPassthrough': False, 'identity': None, 'environment': {'name': 'Experiment udacity-project Environment', 'version': 'Autos

In [7]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
ds = TabularDatasetFactory.from_delimited_files(['https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'])

In [8]:
from train import clean_data
import pandas as pd
from sklearn.model_selection import train_test_split
from azureml.core import Dataset

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

predict_col = pd.DataFrame(y)
predict_col.head()
full_data = pd.concat([x,predict_col],axis=1)

train_data, test_data = train_test_split(full_data,test_size=0.28)


# Save training data to a csv file 
train_data.to_csv("bankmarket_training.csv")

# Read training data from CSV File
data_store = ws.get_default_datastore()
data_store.upload(src_dir='./',target_path='./training/')
train_ds = Dataset.Tabular.from_delimited_files(path = [(data_store, './training/bankmarket_training.csv')])





Uploading an estimated of 24 files
Target already exists. Skipping upload for training/.amlignore
Target already exists. Skipping upload for training/.amlignore.amltmp
Target already exists. Skipping upload for training/bankmarketing_train.csv
Target already exists. Skipping upload for training/bankmarket_training.csv
Target already exists. Skipping upload for training/README.md
Target already exists. Skipping upload for training/train.py
Target already exists. Skipping upload for training/udacity-project (2).ipynb.amltmp
Target already exists. Skipping upload for training/udacity-project.ipynb
Target already exists. Skipping upload for training/udacity-project.ipynb.amltmp
Target already exists. Skipping upload for training/.ipynb_aml_checkpoints/udacity-project-checkpoint2021-3-4-17-50-57.ipynb
Target already exists. Skipping upload for training/.ipynb_aml_checkpoints/udacity-project-checkpoint2021-3-4-17-53-39.ipynb
Target already exists. Skipping upload for training/.ipynb_aml_chec

In [9]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs. 
automl_config = AutoMLConfig(
    experiment_timeout_minutes=40,
    task='classification',
    primary_metric='accuracy',
    training_data=train_ds,
    label_column_name='y',
    n_cross_validations=5,
    compute_target=compute_target,
    enable_early_stopping = True,
    enable_onnx_compatible_models = True)

# primary_metric is case sensitive!

In [10]:
# Submit your automl run

### YOUR CODE HERE ###

remote_run = exp.submit(config=automl_config, show_output=True)



Running on remote.
No run_configuration provided, running on udacity-project with default configuration
Running on remote compute: udacity-project
Parent Run ID: AutoML_3bc87d90-26e7-4c4d-b4ff-4e49aa950451

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead 

In [11]:
# Retrieve and save your best automl model.

### YOUR CODE HERE ###

remote_run.get_metrics()



{'experiment_status': ['DatasetEvaluation',
  'FeaturesGeneration',
  'DatasetFeaturization',
  'DatasetFeaturizationCompleted',
  'DatasetBalancing',
  'DatasetCrossValidationSplit',
  'ModelSelection'],
 'experiment_status_description': ['Gathering dataset statistics.',
  'Generating features for the dataset.',
  'Beginning to fit featurizers and featurize the dataset.',
  'Completed fit featurizers and featurizing the dataset.',
  'Performing class balancing sweeping',
  'Generating individually featurized CV splits.',
  'Beginning model selection.'],
 'balanced_accuracy': 0.736118507495833,
 'log_loss': 0.18323501737943726,
 'matthews_correlation': 0.5370532863878725,
 'f1_score_macro': 0.7644206680633138,
 'precision_score_micro': 0.915627829453258,
 'precision_score_macro': 0.8055768978420386,
 'precision_score_weighted': 0.9080939943122713,
 'AUC_macro': 0.9470140731898091,
 'AUC_micro': 0.9801567046311987,
 'recall_score_weighted': 0.915627829453258,
 'average_precision_score_w

In [12]:
remote_run.get_tags()

{'model_explain_run': 'best_run',
 '_aml_system_azureml.automlComponent': 'AutoML',
 'pipeline_id': '<5dfac790c5c209f98a1da2dc1c7fb76f0397324f>;<c7af0367625be6ac5c2fecbfc72ed444cb7a2111>;<cf1dc9b7ea4a50d759bce352343ec6737501695a>;<b19eeb61727e9c4fbb5aa45fb6e9fe3fab5b1f04>;<032c2f067189c1f56172ca21b98c8843539dd9de>;<2a4d6884653b914a3c776e7938f6ae32acfc26c6>;<8b8d6ec7348787185d1862cfe0f1be758d8cb238>;<2ffa6cf359ffe417a117666f62b1ae554d99df21>;<8d8bbc86e60505299c7e25ffdf284c5d4f3fd55d>;<c97b3fbbb3a9906a67294dc582d3b4ea08e83349>;<c455ad2a93052501a460a48e228bfac07303dabe>;<82b7a511bb356ccec67e5e1d975af2ebb63e564e>;<c58e766423d668d3696f0861210c3a0b879b8cd4>;<3f87693e8822b5543aab10f52f57857916d67b87>;<0bba159f36526d96e4603e6906113981b1ebe414>;<0c2e5f39cfcb3aa8cd8c8b5e8bc4f4829c5e420b>;<a6bd27dd928875b84f691d4dfa1c1a3794c161f1>;<3344110ae925f10b00f1bb6c4af55a28dbca4c80>;<cfd6f2e509ec64adc1635cede922e2c4c83a244c>;<69428b2a6612b3703e7304dc0ac8338d61f58853>;<7cacae7d5871b779ebb6c0b47e93b496ea0697

In [13]:
from azureml.automl.runtime.onnx_convert import OnnxConverter

automl_best_run_onnx, automl_fitted_model_onnx = remote_run.get_output(return_onnx_model=True)

OnnxConverter.save_onnx_model(automl_fitted_model_onnx, './outputs/best_automl_model.onnx' )