In [6]:
from azureml.core import Workspace, Experiment

ws = Workspace.get(name="quick-starts-ws-121267")
exp = Experiment(workspace=ws, name="automl")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-121267
Azure region: southcentralus
Subscription id: 54424c07-8d8d-4896-9c92-b731a63208d3
Resource group: aml-quickstarts-121267


In [8]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException


# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.
cluster_name="hd-cluster1"
try:
  compute_target=ComputeTarget(workspace=ws,name=cluster_name)
  print("Found existing compute target")
except ComputeTargetException:
  print('Creating a new compute target')
  compute_config=AmlCompute.provisioning_configuration(vm_size='Standard_D2_V2',max_nodes=4)
  compute_target=ComputeTarget.create(ws,cluster_name,compute_config)
  

compute_targets = ws.compute_targets
for name, ct in compute_targets.items():
    print(name, ct.type, ct.provisioning_state)

### YOUR CODE HERE ###

Found existing compute target
hd-cluster1 ComputeInstance Succeeded


In [9]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
path_url = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(path=path_url)

In [16]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)
print(y)

0        0
1        0
2        0
3        0
4        0
        ..
32945    0
32946    0
32947    0
32948    0
32949    0
Name: y, Length: 32950, dtype: int64


In [17]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=ds,
    label_column_name='y',
    n_cross_validations=5,
    compute_target=compute_target,
    iterations=100,
    max_concurrent_iterations=8)

In [19]:
automl_run=exp.submit(automl_config, show_output=True)


Running on remote.
Running on remote compute: hd-cluster1
Parent Run ID: AutoML_93e5c32a-b795-4474-bab2-712ef7847181

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+---------------------------------+------------

In [22]:
from azureml.widgets import RunDetails
RunDetails(automl_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [23]:
automl_run.wait_for_completion(show_output=True)



****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+---------------------------------+---------------------------------+--------------------------------------+
|Size of the smallest class       |Name/Label of the smallest class |Number of samples in the training data|
|3692                             |yes                              |32950                                 |
+---------------------------------+---------------------------------+--------------------------------------+

********************************************

{'runId': 'AutoML_93e5c32a-b795-4474-bab2-712ef7847181',
 'target': 'hd-cluster1',
 'status': 'Completed',
 'startTimeUtc': '2020-10-18T13:54:19.757956Z',
 'endTimeUtc': '2020-10-18T14:14:45.992126Z',
 'properties': {'num_iterations': '100',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'hd-cluster1',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"fae35aef-f1d6-4240-a0ef-ca3c27dfb049\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"isArchive\\\\\\": false, \\\\\\"path\\\\\\": {\\\\\\"target\\\\\\": 1, \\\\\\"resourceDetails\\\\\\": [{\\\\\\"path\\\\\\": \\\\\\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv\\\\\\", \\\\\\"sas\\\\\\": null, \\\\\\"storageAccountName\\\\\\": nu

In [25]:
best_run,model=automl_run.get_output()

In [26]:
best_run.register_model(model_name='automl_model',path='./outputs/')

ModelPathNotFoundException: ModelPathNotFoundException:
	Message: Could not locate the provided model_path automl_model in the set of files uploaded to the run: ['accuracy_table', 'automl_driver.py', 'azureml-logs/55_azureml-execution-tvmps_d4e70925de860576777cad1a0b6ab8cfd6585aba04d54a3aebafc286b24a7fc9_d.txt', 'azureml-logs/65_job_prep-tvmps_d4e70925de860576777cad1a0b6ab8cfd6585aba04d54a3aebafc286b24a7fc9_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_d4e70925de860576777cad1a0b6ab8cfd6585aba04d54a3aebafc286b24a7fc9_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'confusion_matrix', 'explanation/2308afd2/classes.interpret.json', 'explanation/2308afd2/expected_values.interpret.json', 'explanation/2308afd2/features.interpret.json', 'explanation/2308afd2/global_names/0.interpret.json', 'explanation/2308afd2/global_rank/0.interpret.json', 'explanation/2308afd2/global_values/0.interpret.json', 'explanation/2308afd2/local_importance_values.interpret.json', 'explanation/2308afd2/per_class_names/0.interpret.json', 'explanation/2308afd2/per_class_rank/0.interpret.json', 'explanation/2308afd2/per_class_values/0.interpret.json', 'explanation/2308afd2/rich_metadata.interpret.json', 'explanation/2308afd2/visualization_dict.interpret.json', 'explanation/4ade55bc/classes.interpret.json', 'explanation/4ade55bc/expected_values.interpret.json', 'explanation/4ade55bc/features.interpret.json', 'explanation/4ade55bc/global_names/0.interpret.json', 'explanation/4ade55bc/global_rank/0.interpret.json', 'explanation/4ade55bc/global_values/0.interpret.json', 'explanation/4ade55bc/local_importance_values.interpret.json', 'explanation/4ade55bc/per_class_names/0.interpret.json', 'explanation/4ade55bc/per_class_rank/0.interpret.json', 'explanation/4ade55bc/per_class_values/0.interpret.json', 'explanation/4ade55bc/rich_metadata.interpret.json', 'explanation/4ade55bc/visualization_dict.interpret.json', 'logs/azureml/107_azureml.log', 'logs/azureml/azureml_automl.log', 'logs/azureml/dataprep/python_span_195b95fa-4fb3-4d3d-bd7d-83690913bb8b.jsonl', 'logs/azureml/dataprep/python_span_f31e48d0-c9ce-443b-86c9-d426cca690fc.jsonl', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/conda_env_v_1_0_0.yml', 'outputs/env_dependencies.json', 'outputs/model.pkl', 'outputs/pipeline_graph.json', 'outputs/scoring_file_v_1_0_0.py']
                See https://aka.ms/run-logging for more details.
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Could not locate the provided model_path automl_model in the set of files uploaded to the run: ['accuracy_table', 'automl_driver.py', 'azureml-logs/55_azureml-execution-tvmps_d4e70925de860576777cad1a0b6ab8cfd6585aba04d54a3aebafc286b24a7fc9_d.txt', 'azureml-logs/65_job_prep-tvmps_d4e70925de860576777cad1a0b6ab8cfd6585aba04d54a3aebafc286b24a7fc9_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_d4e70925de860576777cad1a0b6ab8cfd6585aba04d54a3aebafc286b24a7fc9_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'confusion_matrix', 'explanation/2308afd2/classes.interpret.json', 'explanation/2308afd2/expected_values.interpret.json', 'explanation/2308afd2/features.interpret.json', 'explanation/2308afd2/global_names/0.interpret.json', 'explanation/2308afd2/global_rank/0.interpret.json', 'explanation/2308afd2/global_values/0.interpret.json', 'explanation/2308afd2/local_importance_values.interpret.json', 'explanation/2308afd2/per_class_names/0.interpret.json', 'explanation/2308afd2/per_class_rank/0.interpret.json', 'explanation/2308afd2/per_class_values/0.interpret.json', 'explanation/2308afd2/rich_metadata.interpret.json', 'explanation/2308afd2/visualization_dict.interpret.json', 'explanation/4ade55bc/classes.interpret.json', 'explanation/4ade55bc/expected_values.interpret.json', 'explanation/4ade55bc/features.interpret.json', 'explanation/4ade55bc/global_names/0.interpret.json', 'explanation/4ade55bc/global_rank/0.interpret.json', 'explanation/4ade55bc/global_values/0.interpret.json', 'explanation/4ade55bc/local_importance_values.interpret.json', 'explanation/4ade55bc/per_class_names/0.interpret.json', 'explanation/4ade55bc/per_class_rank/0.interpret.json', 'explanation/4ade55bc/per_class_values/0.interpret.json', 'explanation/4ade55bc/rich_metadata.interpret.json', 'explanation/4ade55bc/visualization_dict.interpret.json', 'logs/azureml/107_azureml.log', 'logs/azureml/azureml_automl.log', 'logs/azureml/dataprep/python_span_195b95fa-4fb3-4d3d-bd7d-83690913bb8b.jsonl', 'logs/azureml/dataprep/python_span_f31e48d0-c9ce-443b-86c9-d426cca690fc.jsonl', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/conda_env_v_1_0_0.yml', 'outputs/env_dependencies.json', 'outputs/model.pkl', 'outputs/pipeline_graph.json', 'outputs/scoring_file_v_1_0_0.py']\n                See https://aka.ms/run-logging for more details."
    }
}

In [28]:
model._final_estimator

PreFittedSoftVotingClassifier(classification_labels=None,
                              estimators=[('89',
                                           Pipeline(memory=None,
                                                    steps=[('standardscalerwrapper',
                                                            <azureml.automl.runtime.shared.model_wrappers.StandardScalerWrapper object at 0x7fa4e9375128>),
                                                           ('xgboostclassifier',
                                                            XGBoostClassifier(base_score=0.5,
                                                                              booster='gbtree',
                                                                              colsample_bylevel=1,
                                                                              colsample_bynode=1,
                                                                              colsample_bytree=1,
                     