In [13]:
from azureml.core import Workspace, Experiment

ws = Workspace.get(name="quick-starts-ws-134927")
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-134927
Azure region: southcentralus
Subscription id: 6b4af8be-9931-443e-90f6-c4c34a1f9737
Resource group: aml-quickstarts-134927


In [14]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for the CPU cluster
cpu_cluster_name= "cpu-cluster-4"

#verify cluster doesn't already exist
try:
    compute_target= ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config=AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=6)
    compute_target= ComputeTarget.create(ws, cpu_cluster_name, compute_config)
compute_target.wait_for_completion(show_output=True)



# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [15]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.train.hyperdrive import choice, loguniform
import os

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        '--C' : choice(0.2, 0.4, 1,2),
        '--max_iter': choice (50, 100, 150, 200)
    }
)

### YOUR CODE HERE ###

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

### YOUR CODE HERE ###

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est =SKLearn(source_directory='./',
entry_script='train.py',
compute_target=cpu_cluster_name)

from azureml.core.script_run_config import ScriptRunConfig

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config =HyperDriveConfig(estimator=est,
hyperparameter_sampling=ps,
policy=policy,
primary_metric_name="Accuracy",
primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
max_total_runs=100,
max_concurrent_runs=4)



In [16]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

from azureml.core.experiment import Experiment
experiment=Experiment(ws, ws.name)
hyperdrive_run=experiment.submit(hyperdrive_config, show_output=True)





In [17]:
import joblib
from azureml.core.model import Model
# Get your best run and save the model from that run.
os.makedirs('outputs', exist_ok=True)


best_run=hyperdrive_run.get_best_run_by_primary_metric()
joblib.dump(value=best_run, filename='outputs/model.pkl')
model=Model.register(workspace=ws,model_name='hyperdrive_model',
                             model_path='outputs/model.pkl',                     
model_framework=Model.Framework.SCIKITLEARN, model_framework_version='0.19.1')


Registering model hyperdrive_model


In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core  import Dataset 
# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
url="https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
### YOUR CODE HERE ###
#ds = pd.read_csv("https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv")  ### YOUR CODE HERE ###
ds=Dataset.Tabular.from_delimited_files(path=url)

In [17]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds) ### YOUR DATA OBJECT HERE ###

NameError: name 'clean_data' is not defined

In [11]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='Accuracy',
    training_data=ds,
    label_column_name='y',
    n_cross_validations=5)

In [10]:
import sys
!{sys.executable} -m pip install --upgrade pip
##python==3.6

Collecting pip
  Downloading pip-20.3.3-py2.py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 8.2 MB/s eta 0:00:01
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 20.1.1
    Uninstalling pip-20.1.1:
      Successfully uninstalled pip-20.1.1
Successfully installed pip-20.3.3


In [12]:
# Submit your automl run
from azureml.core.experiment import Experiment
experiment=Experiment(ws, "automl_test_experiment")
hyperdrive_run=experiment.submit(config=automl_config, show_output=True)


from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()

hyperdrive_run.complete()

No run_configuration provided, running on local with default configuration


KeyboardInterrupt: 

In [None]:
# Retrieve and save your best automl model.

best_run=hyperdrive_run.get_best_run_by_primary_metric()
print(best_run.get_details()['runDefinition']['arguments'])
print(best_run.get_file_names())