In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-283802
Azure region: southcentralus
Subscription id: a24a24d5-8d87-4c8a-99b6-91ed2d2df51f
Resource group: aml-quickstarts-283802


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "aml-compute"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print(f"Found existing compute target: {cluster_name}")
except ComputeTargetException:
    print(f"Creating new compute target: {cluster_name}")
    compute_config = AmlCompute.provisioning_configuration(
        vm_size="Standard_D2_V2",
        min_nodes=0,
        max_nodes=4
    )

    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
    compute_target.wait_for_completion(show_output=True)

Found existing compute target: aml-compute


In [3]:
# from azureml.widgets import RunDetails
# from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
ps = RandomParameterSampling( {
        "C": uniform(0.05, 1),
        "max_iter": choice(16, 32, 64, 128, 150)
    }
)

# Specify a Policy
policy = BanditPolicy(evaluation_interval = 2, slack_factor= 0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(
    source_directory='.',
    script='train.py',  # This is your training script filename
    environment=sklearn_env, 
    compute_target = compute_target
)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    run_config=src,
    hyperparameter_sampling=ps,
    policy=policy,
    primary_metric_name='accuracy',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=30,
    max_concurrent_runs=4
    )

In [8]:
# from azureml.widgets import RunDetails
hyperdrive_run = exp.submit(hyperdrive_config)
# RunDetails(hyperdrive_run).show()

In [None]:
from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()

In [25]:
print(hyperdrive_run.get_status())

Completed


In [26]:
import joblib

best_run = hyperdrive_run.get_best_run_by_primary_metric()
# best_run.download_file(name='outputs/model.joblib', output_file_path='best_model.joblib')
# model = joblib.load('best_model.joblib')


In [11]:
from azureml.data.dataset_factory import TabularDatasetFactory

ds = TabularDatasetFactory.from_delimited_files("https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv")

In [12]:
from train import clean_data
import pandas as pd
from azureml.core import Dataset

x, y = clean_data(ds)
df = pd.concat([x, y], axis=1)

datastore = ws.get_default_datastore()

ds = Dataset.Tabular.register_pandas_dataframe(
        dataframe = df, 
        name='combined-data',
        description='Combined features and label for AutoML',
        target=datastore
    )

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}
Validating arguments.
Arguments validated.
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'emp.var.rate' -> 'emp_var_rate'
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'cons.price.idx' -> 'cons_price_idx'
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'cons.conf.idx' -> 'cons_conf_idx'
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'nr.employed' -> 'nr_employed'
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'job_admin.' -> 'job_admin_'
Column header contains '.' This period will be translated to '_' as we write the data

In [29]:
from azureml.train.automl import AutoMLConfig

automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task="classification",
    primary_metric="accuracy",
    training_data = ds,
    label_column_name="y",
    n_cross_validations=3,
    compute_target=compute_target)

In [30]:
run = exp.submit(config=automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_90b3e2bc-fda8-4674-b9f0-7ca00621c4da,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


In [35]:
print(run.get_status())


Completed
