In [1]:
from sklearn.linear_model import LogisticRegression
import argparse
import os
import numpy as np
from sklearn.metrics import mean_squared_error
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
from azureml.core.run import Run
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core import Workspace, Dataset
from azureml.core import Workspace,ScriptRunConfig,Experiment, Run

print("SDK version:", azureml.core.VERSION)

SDK version: 1.19.0


**Dataset**

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [2]:
ws = Workspace.from_config()
experiment_name = 'lv-hyperparameter'

exp=Experiment(ws, experiment_name)

In [3]:
run = exp.start_logging()

In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.


   # Choose a name for your CPU cluster
cpu_cluster_name = "cpu-cluster"

   # Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                              max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

    # For a more detailed view of current AmlCompute status, use get_status()
print(cpu_cluster.get_status().serialize())

Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-01-01T03:20:12.451000+00:00', 'errors': None, 'creationTime': '2021-01-01T03:20:05.851100+00:00', 'modifiedTime': '2021-01-01T03:20:22.072502+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


**Hyperdrive Configuration**

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [6]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.dnn import TensorFlow
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, normal, choice
import os,shutil


# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(slack_factor = 0.15, evaluation_interval=2)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling( {
        '--C': choice(0,0.25,0.5,1),
        '--max_iter': choice(500,1000,5000,10000,20000)
    }
)

if "training" not in os.listdir():
    os.mkdir("./training")

script_folder = "./training"    
    
# Reference: lesson 6.3: copying the training file into the script folder
shutil.copy('./train.py', script_folder)
    
script_params={
    '--datastore-dir': ws.get_default_datastore().as_mount(),
}

#TODO: Create your estimator and hyperdrive config
estimator = SKLearn(source_directory='training', 
                     script_params=script_params,
                    compute_target=cpu_cluster,
                    entry_script='train.py',
                    pip_packages=['joblib']
                   )


hyperdrive_run_config = HyperDriveConfig(estimator = estimator, 
                                            hyperparameter_sampling = param_sampling, 
                                            policy = early_termination_policy,
                                            primary_metric_name = "Accuracy",
                                            primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
                                            max_total_runs = 20,
                                            max_concurrent_runs = 4)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.
You have specified to install packages in your run. Note that you have overridden Azure ML's installation of the following packages: ['joblib']. We cannot guarantee image build will succeed.


**Run Details**

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the RunDetails widget to show the different experiments.

In [7]:
#TODO: Submit your experiment
hd_run = exp.submit(hyperdrive_run_config)
RunDetails(Run(exp, hd_run.id)).show()



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

**Get log results upon completion**

In [8]:
hd_run.wait_for_completion(show_output=True)  # specify True for a verbose log

RunId: HD_a6e6abef-5b73-4775-95ed-ba09a5ee3e51
Web View: https://ml.azure.com/experiments/lv-hyperparameter/runs/HD_a6e6abef-5b73-4775-95ed-ba09a5ee3e51?wsid=/subscriptions/610d6e37-4747-4a20-80eb-3aad70a55f43/resourcegroups/aml-quickstarts-132714/workspaces/quick-starts-ws-132714

Execution Summary
RunId: HD_a6e6abef-5b73-4775-95ed-ba09a5ee3e51
Web View: https://ml.azure.com/experiments/lv-hyperparameter/runs/HD_a6e6abef-5b73-4775-95ed-ba09a5ee3e51?wsid=/subscriptions/610d6e37-4747-4a20-80eb-3aad70a55f43/resourcegroups/aml-quickstarts-132714/workspaces/quick-starts-ws-132714



{'runId': 'HD_a6e6abef-5b73-4775-95ed-ba09a5ee3e51',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-01-01T03:21:00.483478Z',
 'endTimeUtc': '2021-01-01T03:46:45.163162Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'fbc662da-891c-45d8-a605-8be6a3286aff',
  'score': '0.9206349206349206',
  'best_child_run_id': 'HD_a6e6abef-5b73-4775-95ed-ba09a5ee3e51_7',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg132714.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_a6e6abef-5b73-4775-95ed-ba09a5ee3e51/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=SMX0KYQfFrXMjW91Op4rPfEzpoDxsFZYk3NG%2F1pcyO8%3D&st=2021-01-01T03%3A37%3A05Z&se=2021-01-01T11%3A47%3A05Z&sp=r'}}

**Best Model**

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [9]:
import joblib
# Get your best run and save the model from that run.

best_run = hd_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print('\n learning rate:',parameter_values[3])
print('\n keep probability:',parameter_values[5])

Best Run Id:  HD_a6e6abef-5b73-4775-95ed-ba09a5ee3e51_7

 Accuracy: 0.9206349206349206

 learning rate: 0

 keep probability: 10000
