In [7]:
from azureml.core import Workspace, Experiment

#run = exp.start_logging()

In [8]:
ws = Workspace.get(name="quick-starts-ws-137335",
               subscription_id='1b944a9b-fdae-4f97-aeb1-b7eea0beac53',
               resource_group='aml-quickstarts-137335')

In [9]:
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-137335
Azure region: southcentralus
Subscription id: 1b944a9b-fdae-4f97-aeb1-b7eea0beac53
Resource group: aml-quickstarts-137335


In [10]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for the CPU cluster
cpu_cluster_name= "cpu-cluster-4"

#verify cluster doesn't already exist
try:
    compute_target= ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config=AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    compute_target= ComputeTarget.create(ws, cpu_cluster_name, compute_config)
compute_target.wait_for_completion(show_output=True)



# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [16]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice, normal
from azureml.train.hyperdrive import choice, loguniform
import os

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        "--C" : choice(16,32,64,128),
        "--max_iter": choice (range (10, 200))
    }
)

### YOUR CODE HERE ###

# Specify a Policy
policy = BanditPolicy(evaluation_interval=1, slack_factor=0.1, delay_evaluation=5)

### YOUR CODE HERE ###

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est =SKLearn(source_directory=".",
entry_script="train.py",
compute_target=cpu_cluster_name)

from azureml.core.script_run_config import ScriptRunConfig

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config =HyperDriveConfig(estimator=est,
hyperparameter_sampling=ps,
policy=policy,
primary_metric_name='Accuracy',
primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
max_total_runs=20,
max_concurrent_runs=4)



In [17]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

from azureml.core.experiment import Experiment
experiment=Experiment(ws, ws.name)
hyperdrive_run=experiment.submit(hyperdrive_config)
RunDetails(hyperdrive_run).show

hyperdrive_run.get_status()
hyperdrive_run.wait_for_completion(show_output=True)



RunId: HD_ec4945c8-b00f-435c-bbaf-693392783c7f
Web View: https://ml.azure.com/experiments/quick-starts-ws-137335/runs/HD_ec4945c8-b00f-435c-bbaf-693392783c7f?wsid=/subscriptions/1b944a9b-fdae-4f97-aeb1-b7eea0beac53/resourcegroups/aml-quickstarts-137335/workspaces/quick-starts-ws-137335

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-02-05T17:18:29.147441][API][INFO]Experiment created<END>\n""<START>[2021-02-05T17:18:29.878723][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n"<START>[2021-02-05T17:18:30.0337392Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>"<START>[2021-02-05T17:18:30.215996][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_ec4945c8-b00f-435c-bbaf-693392783c7f
Web View: https://ml.azure.com/experiments/quick-starts-ws-137335/runs/HD_ec4945c8-b00f-435c-bbaf-693392783c7f?wsid=/subsc

{'runId': 'HD_ec4945c8-b00f-435c-bbaf-693392783c7f',
 'target': 'cpu-cluster-4',
 'status': 'Completed',
 'startTimeUtc': '2021-02-05T17:18:28.92822Z',
 'endTimeUtc': '2021-02-05T17:32:17.967634Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'd5b0d369-fef6-4d77-8c76-936b08f1d3fa',
  'score': '0.8813559322033898',
  'best_child_run_id': 'HD_ec4945c8-b00f-435c-bbaf-693392783c7f_16',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg137335.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_ec4945c8-b00f-435c-bbaf-693392783c7f/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=N0tBe%2BqXow2Q4gZUXBEKq6v9Beh0%2FHLlwxyQfEcv5hI%3D&st=2021-02-05T17%3A22%3A32Z&se=2021-02-06T01%3A32%3A32Z&sp=r'},
 'submittedBy': 'ODL_User 137335'

In [18]:
best_run=hyperdrive_run.get_best_run_by_primary_metric()
best_run.get_file_names()

['azureml-logs/55_azureml-execution-tvmps_4b0a39061f4c3b5e6aa4206607f6dc7d2afe26e21a2d90724b1941cab8b1d6ff_d.txt',
 'azureml-logs/65_job_prep-tvmps_4b0a39061f4c3b5e6aa4206607f6dc7d2afe26e21a2d90724b1941cab8b1d6ff_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_4b0a39061f4c3b5e6aa4206607f6dc7d2afe26e21a2d90724b1941cab8b1d6ff_d.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'logs/azureml/104_azureml.log',
 'logs/azureml/dataprep/backgroundProcess.log',
 'logs/azureml/dataprep/backgroundProcess_Telemetry.log',
 'logs/azureml/job_prep_azureml.log',
 'logs/azureml/job_release_azureml.log',
 'outputs/model.joblib']

In [19]:
#best_run.download_file("/outputs/hyperdrive_model.joblib","./outputs/hyperdrive_model.joblib")
model=best_run.register_model(model_name='hyperdrive_model_best_run', model_path='outputs/model.joblib')

In [18]:
#best_run.get_file_names()
#import joblib
#joblib.dump(model, 'outputs/model.joblib')

In [20]:
import joblib
from azureml.core.model import Model
# Get your best run and save the model from that run.
#os.makedirs('outputs', exist_ok=True)
#best_run=hyperdrive_run.get_best_run_by_primary_metric()
#best_run.get_file_names()
#model=best_run.register_model(model_name='hyperdrive_model', 
 #                             model_path='outputs/model.pkl')

In [22]:
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core  import Dataset 
# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
url="https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data"
### YOUR CODE HERE ###
#ds = pd.read_csv("https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv")  ### YOUR CODE HERE ###
ds=Dataset.Tabular.from_delimited_files(path=url)

In [24]:
def split_data(data):
        # Clean and one hot encode data
    x_df = data.to_pandas_dataframe().dropna()
    x_df.drop("name", inplace=True, axis=1)
    y_df = x_df.pop("status")
    return x_df, y_df

In [27]:
#from train import clean_data

# Use the clean_data function to clean your data.
x, y = split_data(ds) ### YOUR DATA OBJECT HERE ###

In [28]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=ds,
    label_column_name='status',
    compute_target=cpu_cluster_name,
    n_cross_validations=5)

In [29]:
# Submit your automl run
from azureml.core.experiment import Experiment
experiment=Experiment(ws, "automl_test_experiment")
aml_run=experiment.submit(config=automl_config, show_output=True)


from azureml.widgets import RunDetails
RunDetails(aml_run).show()

#aml_run.complete()

Running on remote.
No run_configuration provided, running on cpu-cluster-4 with default configuration
Running on remote compute: cpu-cluster-4
Parent Run ID: AutoML_8c38d68f-badf-4ff0-9fbc-87ef2914ae03

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputati

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [30]:
# Retrieve and save your best automl model.
best_run_customized, fitted_model_customized = aml_run.get_output()
description = 'AutoML Model trained on bank marketing data'
tags = None
model = aml_run.register_model(model_name = "AutoMLModelBankMarketing", description = description, tags = tags)

print(aml_run.model_id) # This will be written to the script file later in the notebook.

Package:azureml-automl-runtime, training version:1.21.0, current version:1.20.0
Package:azureml-core, training version:1.21.0.post1, current version:1.20.0
Package:azureml-dataprep, training version:2.8.2, current version:2.7.3
Package:azureml-dataprep-native, training version:28.0.0, current version:27.0.0
Package:azureml-dataprep-rslex, training version:1.6.0, current version:1.5.0
Package:azureml-dataset-runtime, training version:1.21.0, current version:1.20.0
Package:azureml-defaults, training version:1.21.0, current version:1.20.0
Package:azureml-interpret, training version:1.21.0, current version:1.20.0
Package:azureml-pipeline-core, training version:1.21.0, current version:1.20.0
Package:azureml-telemetry, training version:1.21.0, current version:1.20.0
Package:azureml-train-automl-client, training version:1.21.0, current version:1.20.0
Package:azureml-train-automl-runtime, training version:1.21.0, current version:1.20.0


AutoMLModelBankMarketing


In [31]:
print(fitted_model_customized)

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                                  min_impurity_split=None,
                                                                                                  min_samples_leaf=0.01,
                                                                                                  min_samples_split=0.056842105263157895,
          

In [32]:

aml_metrics=aml_run.get_metrics()
for metric_name in aml_metrics:
    metric=aml_metrics[metric_name]
    print(metric_name, metric)

experiment_status ['DatasetEvaluation', 'FeaturesGeneration', 'DatasetFeaturization', 'DatasetFeaturizationCompleted', 'DatasetCrossValidationSplit', 'ModelSelection']
experiment_status_description ['Gathering dataset statistics.', 'Generating features for the dataset.', 'Beginning to fit featurizers and featurize the dataset.', 'Completed fit featurizers and featurizing the dataset.', 'Generating individually featurized CV splits.', 'Beginning model selection.']
matthews_correlation 0.816290855203318
log_loss 0.2174638078695435
balanced_accuracy 0.8864329237071173
recall_score_micro 0.9333333333333333
AUC_weighted 0.9616007306766484
precision_score_micro 0.9333333333333333
f1_score_macro 0.9003751581040286
norm_macro_recall 0.7728658474142345
recall_score_weighted 0.9333333333333333
AUC_micro 0.9733070348454964
recall_score_macro 0.8864329237071173
AUC_macro 0.9616007306766484
precision_score_macro 0.9350863952619172
average_precision_score_weighted 0.9673894005645183
f1_score_weighte