In [4]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.core.model import Model

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.39.0


In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

endofproj
tsar
eastus2
d6ebc258-732e-4a76-8d46-be5af7f5737d


In [3]:
# Choose a name for the run history container in the workspace.
# NOTE: update these to match your existing experiment name
experiment_name = 'capstone-project-1'
project_folder = './hyperparam'

experiment = Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
capstone-project-1,endofproj,Link to Azure Machine Learning studio,Link to Documentation


In [5]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
amlcompute_cluster_name = "kwabenanyinaku1"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)
# For a more detailed view of current AmlCompute status, use get_status().

Found existing cluster, use it.

Running


In [8]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform, choice, normal
from azureml.core import Environment, ScriptRunConfig
import os
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        '--C':choice([0.3, 0.6, 0.9, 1.2]),
        '--max_iter':choice([40, 90, 140, 190, 240])
    }
) 

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=6)

if "training" not in os.listdir():
    os.mkdir("./training")

if "models" not in os.listdir():
    os.mkdir("./models")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='myenv', file_path='myenv.yml')

# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(
    source_directory='./',
    script='train.py',
    arguments=['--C', 5,'--max_iter', 1000],
    compute_target=compute_target,
    environment=sklearn_env
)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    run_config=src,
    hyperparameter_sampling=ps,
    policy=policy,
    primary_metric_name='Accuracy',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=10,
    max_concurrent_runs=4
)

In [11]:
from azureml.widgets import RunDetails
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
hyperdrive_run=experiment.submit(hyperdrive_config)
RunDetails(hyperdrive_run).show()
hyperdrive_run.get_status()
hyperdrive_run.wait_for_completion(show_output=True)


_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_e21b2896-2343-43fd-ac99-d305dcfa5d3b
Web View: https://ml.azure.com/runs/HD_e21b2896-2343-43fd-ac99-d305dcfa5d3b?wsid=/subscriptions/d6ebc258-732e-4a76-8d46-be5af7f5737d/resourcegroups/tsar/workspaces/endofproj&tid=52299ebb-8afb-45a8-8e21-7df88891d90a

Streaming azureml-logs/hyperdrive.txt

"<START>[2022-04-11T16:25:15.347801][API][INFO]Experiment created<END>\n""<START>[2022-04-11T16:25:16.112458][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n"<START>[2022-04-11T16:25:16.9149084Z][SCHEDULER][INFO]Scheduling job, id='HD_e21b2896-2343-43fd-ac99-d305dcfa5d3b_0'<END><START>[2022-04-11T16:25:17.0700204Z][SCHEDULER][INFO]Scheduling job, id='HD_e21b2896-2343-43fd-ac99-d305dcfa5d3b_1'<END><START>[2022-04-11T16:25:17.1484339Z][SCHEDULER][INFO]Scheduling job, id='HD_e21b2896-2343-43fd-ac99-d305dcfa5d3b_2'<END>"<START>[2022-04-11T16:25:17.248871][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<S

{'runId': 'HD_e21b2896-2343-43fd-ac99-d305dcfa5d3b',
 'target': 'kwabenanyinaku1',
 'status': 'Completed',
 'startTimeUtc': '2022-04-11T16:25:15.118906Z',
 'endTimeUtc': '2022-04-11T16:32:21.866463Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '038f332a-daf0-4014-ac14-84595f7d2855',
  'user_agent': 'python/3.8.5 (Linux-5.4.0-1073-azure-x86_64-with-glibc2.10) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.39.0',
  'space_size': '20',
  'score': '0.8909090909090909',
  'best_child_run_id': 'HD_e21b2896-2343-43fd-ac99-d305dcfa5d3b_5',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://endofproj3126245557.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_e21b2896-2343-43fd-ac99-d305dcfa5d3b/azurem

In [12]:
import joblib
# Get your best run and save the model from that run.
best_run = hyperdrive_run.get_best_run_by_primary_metric()
print('best run details:', best_run.get_details()['runDefinition']['arguments'])
print('best run metrics:', best_run.get_metrics())

best run details: ['--C', '5', '--max_iter', '1000', '--C', '0.3', '--max_iter', '40']
best run metrics: {'Regularization Strength:': 0.3, 'Max iterations:': 40, 'Accuracy': 0.8909090909090909}


In [13]:
model = best_run.register_model('hyperdrive_model', model_path ='outputs/model.joblib' )
print(model)

Model(workspace=Workspace.create(name='endofproj', subscription_id='d6ebc258-732e-4a76-8d46-be5af7f5737d', resource_group='tsar'), name=hyperdrive_model, id=hyperdrive_model:5, version=5, tags={}, properties={})


In [None]:
from azureml.core.model import InferenceConfig
from azureml.core.model import Model
from azureml.core.webservice import LocalWebservice, AciWebservice

script_file_name = 'inference/score.py'
hyperdrive_inference_config = InferenceConfig(entry_script=script_file_name,environment=sklearn_env)
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1,enable_app_insights=True)

model = ws.models['hyperdrive_model']
service = Model.deploy(workspace=ws,
                       name = 'hyperdrive-service',
                       models = [model],
                       inference_config = hyperdrive_inference_config,
                       deployment_config = aci_config
                       )
service.wait_for_deployment(show_output = True)