In [1]:
from azureml.core import Workspace, Experiment

#ws = Workspace.get(name="udacity-project")
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code R2JWNNFGL to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
Workspace name: quick-starts-ws-141470
Azure region: southcentralus
Subscription id: 61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30
Resource group: aml-quickstarts-141470


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
udacity_compute_name = 'udacityproj1comp'
compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_D2_V2', max_nodes=4)


try:
    compute_target = ComputeTarget(workspace=ws, name=udacity_compute_name)
    print("Found an existing compute target.")
except:
    compute_target = ComputeTarget.create(workspace=ws, 
    name=udacity_compute_name, provisioning_configuration=compute_config)
    print("Creating a new compute target.")

compute_target.wait_for_completion(show_output=True)


Creating a new compute target.
Creating...
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [3]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os

# Specify parameter sampler
ps = RandomParameterSampling(
    {'--C': uniform(0.0,1.0),
    '--max_iter':choice(10,100,500)}
)

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.2)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory='./',
compute_target = compute_target,
entry_script="train.py",

)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=est,
hyperparameter_sampling=ps, policy=policy, primary_metric_name="Accuracy", 
primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, max_total_runs=20, max_concurrent_runs=4)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [4]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###

# Run without the pipeline, but passing the config file.
run = exp.submit(hyperdrive_config, show_output=True)





In [5]:
run.wait_for_completion()


{'runId': 'HD_aac0e85f-96a5-4ef4-a9a4-deefef90c804',
 'target': 'udacityproj1comp',
 'status': 'Completed',
 'startTimeUtc': '2021-03-27T19:25:32.683172Z',
 'endTimeUtc': '2021-03-27T19:37:17.734933Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '3b69ba74-f950-4f72-b9d0-ad0440da6c0c',
  'score': '0.9110722825087364',
  'best_child_run_id': 'HD_aac0e85f-96a5-4ef4-a9a4-deefef90c804_15',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg141470.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_aac0e85f-96a5-4ef4-a9a4-deefef90c804/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=H%2Brgj9z8bJ2CZg8StYE79P17K6vQQZhi0p9n55HMBCo%3D&st=2021-03-27T19%3A28%3A00Z&se=2021-03-28T03%3A38%3A00Z&sp=r'},
 'submittedBy': 'ODL_User 14147

In [6]:
from azureml.widgets import RunDetails
RunDetails(run).show() 

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [7]:
import joblib
import os
# Get your best run and save the model from that run.

### YOUR CODE HERE ###
best_run = run.get_best_run_by_primary_metric()
print(best_run.get_file_names())
# Download the best model to local storage
best_run.download_file(best_run.get_file_names()[-1])

# Save the downloaded file to local storage
local_path='./outputs'
os.makedirs(local_path, exist_ok=True)

best_hd_model = joblib.load(open(best_run.get_file_names()[-1].split('/')[1], 'rb'))
joblib.dump(best_hd_model, 'outputs/best_hd_model.joblib')

['azureml-logs/55_azureml-execution-tvmps_faa1ea89e93612dfac58704b3220b63e6ff2903442425bb6a9173a1f8365d109_d.txt', 'azureml-logs/65_job_prep-tvmps_faa1ea89e93612dfac58704b3220b63e6ff2903442425bb6a9173a1f8365d109_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_faa1ea89e93612dfac58704b3220b63e6ff2903442425bb6a9173a1f8365d109_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/107_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/model 2021-03-27 19-23 -C=1.0 --max_iter=100 .joblib', 'outputs/model 2021-03-27 19-35 -C=0.08384 --max_iter=500 .joblib']


The sklearn.linear_model.logistic module is  deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.linear_model. Anything that cannot be imported from sklearn.linear_model is now part of the private API.
Trying to unpickle estimator LogisticRegression from version 0.20.3 when using version 0.22.2.post1. This might lead to breaking code or invalid results. Use at your own risk.


['outputs/best_hd_model.joblib']

In [49]:
import pandas as pd

# Output HyperDrive Best Model Summary
best_hd_accuracy = best_run.get_metrics("Accuracy")["Accuracy"]
parameter_values = best_run.get_details()['runDefinition']['arguments']
best_parameters = dict(zip(parameter_values[::2], parameter_values[1::2]))
results_hd = pd.DataFrame([best_parameters.values()], columns = ["C", "max_iter"], index=["HyperDrive Opt Params"])
results_hd["Accuracy"] = str(best_hd_accuracy)
results_hd


Unnamed: 0,C,max_iter,Accuracy
HyperDrive Opt Params,0.0838426491347973,500,0.9110722825087364


In [9]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
data_url = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(data_url)


In [10]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)
x['y'] = y

In [11]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=x,
    label_column_name='y',
    n_cross_validations=4)

In [12]:
# Submit your automl run

### YOUR CODE HERE ###
run_auto_ml = exp.submit(config=automl_config, show_output=True)


No run_configuration provided, running on local with default configuration
Running on local machine
Parent Run ID: AutoML_28ec9840-2fea-44e9-a43a-23e744d40795

Current status: DatasetEvaluation. Gathering dataset statistics.
Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed fit featurizers and featurizing the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/Auto

In [13]:
best_run_automl = run_auto_ml.get_best_child()


In [15]:
import joblib
import os
# Retrieve and save your best automl model.


### YOUR CODE HERE ###
best_run_automl = run_auto_ml.get_best_child()

print(best_run_automl.get_file_names())
# Download the best model to local storage
best_run_automl.download_file(best_run_automl.get_file_names()[-3])

# Save the downloaded file to local storage
local_path='./outputs'
os.makedirs(local_path, exist_ok=True)

best_automl_model = joblib.load(open(best_run_automl.get_file_names()[-3].split('/')[1], 'rb'))
joblib.dump(best_automl_model, 'outputs/best_automl_model.joblib')

['accuracy_table', 'confusion_matrix', 'explanation/25e8c1c6/classes.interpret.json', 'explanation/25e8c1c6/expected_values.interpret.json', 'explanation/25e8c1c6/features.interpret.json', 'explanation/25e8c1c6/global_names/0.interpret.json', 'explanation/25e8c1c6/global_rank/0.interpret.json', 'explanation/25e8c1c6/global_values/0.interpret.json', 'explanation/25e8c1c6/local_importance_values.interpret.json', 'explanation/25e8c1c6/per_class_names/0.interpret.json', 'explanation/25e8c1c6/per_class_rank/0.interpret.json', 'explanation/25e8c1c6/per_class_values/0.interpret.json', 'explanation/25e8c1c6/rich_metadata.interpret.json', 'explanation/25e8c1c6/true_ys_viz.interpret.json', 'explanation/25e8c1c6/visualization_dict.interpret.json', 'explanation/25e8c1c6/ys_pred_proba_viz.interpret.json', 'explanation/25e8c1c6/ys_pred_viz.interpret.json', 'explanation/50d49998/classes.interpret.json', 'explanation/50d49998/eval_data_viz.interpret.json', 'explanation/50d49998/expected_values.interpr

In [17]:
best_run_automl.get_metrics()

{'average_precision_score_micro': 0.9814165448190604,
 'AUC_macro': 0.9473449230418508,
 'f1_score_macro': 0.7741298941566362,
 'norm_macro_recall': 0.5085843710676627,
 'log_loss': 0.3418872244874035,
 'recall_score_weighted': 0.9167525428623498,
 'AUC_micro': 0.9807157495065917,
 'recall_score_micro': 0.9167525428623498,
 'precision_score_macro': 0.7995404416013718,
 'precision_score_micro': 0.9167525428623498,
 'recall_score_macro': 0.7542921855338314,
 'weighted_accuracy': 0.9571054117684127,
 'f1_score_micro': 0.9167525428623498,
 'accuracy': 0.9167525428623498,
 'precision_score_weighted': 0.9114478475376246,
 'average_precision_score_macro': 0.8267357165969135,
 'matthews_correlation': 0.551834848790874,
 'balanced_accuracy': 0.7542921855338314,
 'AUC_weighted': 0.9473449087652849,
 'f1_score_weighted': 0.913391096274752,
 'average_precision_score_weighted': 0.9557676529702138,
 'confusion_matrix': 'aml://artifactId/ExperimentRun/dcid.AutoML_28ec9840-2fea-44e9-a43a-23e744d40795_

In [19]:
best_run_automl.get_details()

{'runId': 'AutoML_28ec9840-2fea-44e9-a43a-23e744d40795_35',
 'status': 'Completed',
 'startTimeUtc': '2021-03-27T20:09:25.939149Z',
 'endTimeUtc': '2021-03-27T20:10:22.877407Z',
 'properties': {'runTemplate': 'automl_child',
  'pipeline_id': '__AutoML_Ensemble__',
  'pipeline_spec': '{"pipeline_id":"__AutoML_Ensemble__","objects":[{"module":"azureml.train.automl.ensemble","class_name":"Ensemble","spec_class":"sklearn","param_args":[],"param_kwargs":{"automl_settings":"{\'task_type\':\'classification\',\'primary_metric\':\'accuracy\',\'verbosity\':20,\'ensemble_iterations\':15,\'is_timeseries\':False,\'name\':\'udacity-project\',\'compute_target\':\'local\',\'subscription_id\':\'61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30\',\'region\':\'southcentralus\',\'spark_service\':None}","ensemble_run_id":"AutoML_28ec9840-2fea-44e9-a43a-23e744d40795_35","experiment_name":null,"workspace_name":"quick-starts-ws-141470","subscription_id":"61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30","resource_group_name":"aml-qui

In [47]:

best_automl_accuracy = best_run_automl.get_metrics("accuracy")["accuracy"]
automl_algorithm = best_run_automl.get_details()['properties']['run_algorithm']
results_automl = pd.DataFrame([str(best_automl_accuracy)], columns = ["accuracy"], index=[automl_algorithm])
results_automl


Unnamed: 0,accuracy
VotingEnsemble,0.9167525428623498


In [50]:
# Compare the accuracy with HyperDrive optimized Logistic Regression
results_hd

Unnamed: 0,C,max_iter,Accuracy
HyperDrive Opt Params,0.0838426491347973,500,0.9110722825087364


In [52]:
# Get all available metrics, check the weighted AUC and weighted accuracy scores.
best_run_automl.get_metrics()

{'average_precision_score_micro': 0.9814165448190604,
 'AUC_macro': 0.9473449230418508,
 'f1_score_macro': 0.7741298941566362,
 'norm_macro_recall': 0.5085843710676627,
 'log_loss': 0.3418872244874035,
 'recall_score_weighted': 0.9167525428623498,
 'AUC_micro': 0.9807157495065917,
 'recall_score_micro': 0.9167525428623498,
 'precision_score_macro': 0.7995404416013718,
 'precision_score_micro': 0.9167525428623498,
 'recall_score_macro': 0.7542921855338314,
 'weighted_accuracy': 0.9571054117684127,
 'f1_score_micro': 0.9167525428623498,
 'accuracy': 0.9167525428623498,
 'precision_score_weighted': 0.9114478475376246,
 'average_precision_score_macro': 0.8267357165969135,
 'matthews_correlation': 0.551834848790874,
 'balanced_accuracy': 0.7542921855338314,
 'AUC_weighted': 0.9473449087652849,
 'f1_score_weighted': 0.913391096274752,
 'average_precision_score_weighted': 0.9557676529702138,
 'confusion_matrix': 'aml://artifactId/ExperimentRun/dcid.AutoML_28ec9840-2fea-44e9-a43a-23e744d40795_

In [53]:

# Proof of compute_target.delete()
compute_target.delete()