In [23]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
ws.write_config(path='.azureml')
exp = Experiment(workspace=ws, name="udacity-AzureML1")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: DSStudio
Azure region: eastus2
Subscription id: baa67dbf-45d0-4d84-b662-527186361068
Resource group: dwtr-t332-20210421


In [26]:
from azureml.core.compute import ComputeTarget, AmlCompute

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
cpu_cluster_name = "cluster-jupyter"
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
compute_target.wait_for_completion(show_output=True)

SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [27]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import shutil
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
#ps = ### YOUR CODE HERE ###
ps = RandomParameterSampling(
    {
        "--C" :        choice(0.001,0.01,0.1,1,10,20,50,100,200,500,1000),
        "--max_iter" : choice(50,100,200,300)
    }
)

# Specify a Policy
#Your Code Here
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval = 1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
 ### YOUR CODE HERE ###
script_folder = './training'
os.makedirs(script_folder, exist_ok=True)
import shutil
shutil.copy('./train.py', script_folder)
myenv = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

#est = ScriptRunConfig(
   # source_directory= script_folder,
    #compute_target=compute_target,
    #entry_script="train.py",
    #environment=myenv
#)
est = ScriptRunConfig(source_directory='.',
                      script='train.py',
                      compute_target=compute_target,
                      environment=myenv)
# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
### YOUR CODE HERE ###
hyperdrive_config = HyperDriveConfig(
    run_config=est,
    hyperparameter_sampling = ps, 
    primary_metric_name = "Accuracy",
    primary_metric_goal = PrimaryMetricGoal.MAXIMIZE, 
    max_total_runs = 15,
    max_concurrent_runs = 3,
    policy = policy
    #estimator = est
)

In [28]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
hyperdrive_run = exp.submit(hyperdrive_config)


In [45]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [30]:
hyperdrive_run.wait_for_completion(show_output = True)

RunId: HD_9ab8e907-5e31-4e77-9e5a-c1614b40e05a
Web View: https://ml.azure.com/runs/HD_9ab8e907-5e31-4e77-9e5a-c1614b40e05a?wsid=/subscriptions/baa67dbf-45d0-4d84-b662-527186361068/resourcegroups/dwtr-t332-20210421/workspaces/DSStudio&tid=fd799da1-bfc1-4234-a91c-72b3a1cb9e26

Streaming azureml-logs/hyperdrive.txt

"<START>[2022-05-18T09:30:30.113288][API][INFO]Experiment created<END>\n""<START>[2022-05-18T09:30:30.802435][GENERATOR][INFO]Trying to sample '3' jobs from the hyperparameter space<END>\n"<START>[2022-05-18T09:30:31.5647164Z][SCHEDULER][INFO]Scheduling job, id='HD_9ab8e907-5e31-4e77-9e5a-c1614b40e05a_0'<END><START>[2022-05-18T09:30:31.7389835Z][SCHEDULER][INFO]Scheduling job, id='HD_9ab8e907-5e31-4e77-9e5a-c1614b40e05a_1'<END>"<START>[2022-05-18T09:30:31.793693][GENERATOR][INFO]Successfully sampled '3' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2022-05-18T09:30:31.8709666Z][SCHEDULER][INFO]Scheduling job, id='HD_9ab8e907-5e31-4e77-9e5a-c1614b40e

{'runId': 'HD_9ab8e907-5e31-4e77-9e5a-c1614b40e05a',
 'target': 'cluster-jupyter',
 'status': 'Completed',
 'startTimeUtc': '2022-05-18T09:30:29.859361Z',
 'endTimeUtc': '2022-05-18T09:42:06.532185Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '75ea5053-49ea-4126-8a1a-81a1a8638011',
  'user_agent': 'python/3.8.5 (Linux-5.4.0-1074-azure-x86_64-with-glibc2.10) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.40.0',
  'space_size': '44',
  'score': '0.9174506828528073',
  'best_child_run_id': 'HD_9ab8e907-5e31-4e77-9e5a-c1614b40e05a_6',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_9ab8e907-5e31-4e77-9e5a-c1614b40e05a_6'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://dsstudio5486078760.blob.core.windows.

In [32]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']
print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print('\n C:', best_run_metrics['Regularization Strength:'])
print('\n max_iter:', best_run_metrics['Max iterations:'])

Best Run Id:  HD_9ab8e907-5e31-4e77-9e5a-c1614b40e05a_6

 Accuracy: 0.9174506828528073

 C: 200.0

 max_iter: 100


In [33]:
best_run.get_details()['runDefinition']['arguments']


['--C', '200', '--max_iter', '100']

In [35]:
best_run.get_metrics(name='Accuracy')

{'Accuracy': 0.9174506828528073}

In [68]:
print(best_run.get_file_names())

['logs/azureml/dataprep/0/backgroundProcess.log', 'logs/azureml/dataprep/0/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/0/rslex.log', 'logs/azureml/dataprep/0/rslex.log.2022-05-18-09', 'system_logs/cs_capability/cs-capability.log', 'system_logs/hosttools_capability/hosttools-capability.log', 'system_logs/lifecycler/execution-wrapper.log', 'system_logs/lifecycler/lifecycler.log', 'user_logs/std_log.txt']


In [69]:
print(best_run)

Run(Experiment: udacity-AzureML1,
Id: HD_9ab8e907-5e31-4e77-9e5a-c1614b40e05a_6,
Type: azureml.scriptrun,
Status: Completed)


In [72]:
import joblib

hyperdrive_model = best_run.register_model(model_name = 'BankMarketingModel', model_path = 'outputs/model.joblib')


ModelPathNotFoundException: ModelPathNotFoundException:
	Message: Could not locate the provided model_path outputs/model.joblib in the set of files uploaded to the run: ['logs/azureml/dataprep/0/backgroundProcess.log', 'logs/azureml/dataprep/0/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/0/rslex.log', 'logs/azureml/dataprep/0/rslex.log.2022-05-18-09', 'system_logs/cs_capability/cs-capability.log', 'system_logs/hosttools_capability/hosttools-capability.log', 'system_logs/lifecycler/execution-wrapper.log', 'system_logs/lifecycler/lifecycler.log', 'user_logs/std_log.txt']
                See https://aka.ms/run-logging for more details.
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Could not locate the provided model_path outputs/model.joblib in the set of files uploaded to the run: ['logs/azureml/dataprep/0/backgroundProcess.log', 'logs/azureml/dataprep/0/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/0/rslex.log', 'logs/azureml/dataprep/0/rslex.log.2022-05-18-09', 'system_logs/cs_capability/cs-capability.log', 'system_logs/hosttools_capability/hosttools-capability.log', 'system_logs/lifecycler/execution-wrapper.log', 'system_logs/lifecycler/lifecycler.log', 'user_logs/std_log.txt']\n                See https://aka.ms/run-logging for more details."
    }
}

In [65]:
best_run.download_file('Kuralay/hyperdrive_model.joblib', 'Kuralay/hp_best_model.joblib')

UserErrorException: UserErrorException:
	Message: File with path Kuralay/hyperdrive_model.joblib was not found,
available files include: logs/azureml/dataprep/0/backgroundProcess.log,logs/azureml/dataprep/0/backgroundProcess_Telemetry.log,logs/azureml/dataprep/0/rslex.log,logs/azureml/dataprep/0/rslex.log.2022-05-18-09,system_logs/cs_capability/cs-capability.log,system_logs/hosttools_capability/hosttools-capability.log,system_logs/lifecycler/execution-wrapper.log,system_logs/lifecycler/lifecycler.log,user_logs/std_log.txt.
	InnerException None
	ErrorResponse 
{
    "error": {
        "code": "UserError",
        "message": "File with path Kuralay/hyperdrive_model.joblib was not found,\navailable files include: logs/azureml/dataprep/0/backgroundProcess.log,logs/azureml/dataprep/0/backgroundProcess_Telemetry.log,logs/azureml/dataprep/0/rslex.log,logs/azureml/dataprep/0/rslex.log.2022-05-18-09,system_logs/cs_capability/cs-capability.log,system_logs/hosttools_capability/hosttools-capability.log,system_logs/lifecycler/execution-wrapper.log,system_logs/lifecycler/lifecycler.log,user_logs/std_log.txt."
    }
}

In [53]:
model = best_run.register_model(model_name='hyperdrive-model-mushroom', model_path='outputs/hyperdrive_model.joblib')

ModelPathNotFoundException: ModelPathNotFoundException:
	Message: Could not locate the provided model_path outputs/hyperdrive_model.joblib in the set of files uploaded to the run: ['logs/azureml/dataprep/0/backgroundProcess.log', 'logs/azureml/dataprep/0/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/0/rslex.log', 'logs/azureml/dataprep/0/rslex.log.2022-05-18-09', 'system_logs/cs_capability/cs-capability.log', 'system_logs/hosttools_capability/hosttools-capability.log', 'system_logs/lifecycler/execution-wrapper.log', 'system_logs/lifecycler/lifecycler.log', 'user_logs/std_log.txt']
                See https://aka.ms/run-logging for more details.
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Could not locate the provided model_path outputs/hyperdrive_model.joblib in the set of files uploaded to the run: ['logs/azureml/dataprep/0/backgroundProcess.log', 'logs/azureml/dataprep/0/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/0/rslex.log', 'logs/azureml/dataprep/0/rslex.log.2022-05-18-09', 'system_logs/cs_capability/cs-capability.log', 'system_logs/hosttools_capability/hosttools-capability.log', 'system_logs/lifecycler/execution-wrapper.log', 'system_logs/lifecycler/lifecycler.log', 'user_logs/std_log.txt']\n                See https://aka.ms/run-logging for more details."
    }
}

In [41]:
#TODO: Save the best model
best_run.download_file('cpstone/hyperdrive_model.joblib', 'models/hp_best_model.joblib')

UserErrorException: UserErrorException:
	Message: File with path cpstone/hyperdrive_model.joblib was not found,
available files include: logs/azureml/dataprep/0/backgroundProcess.log,logs/azureml/dataprep/0/backgroundProcess_Telemetry.log,logs/azureml/dataprep/0/rslex.log,logs/azureml/dataprep/0/rslex.log.2022-05-18-09,system_logs/cs_capability/cs-capability.log,system_logs/hosttools_capability/hosttools-capability.log,system_logs/lifecycler/execution-wrapper.log,system_logs/lifecycler/lifecycler.log,user_logs/std_log.txt.
	InnerException None
	ErrorResponse 
{
    "error": {
        "code": "UserError",
        "message": "File with path cpstone/hyperdrive_model.joblib was not found,\navailable files include: logs/azureml/dataprep/0/backgroundProcess.log,logs/azureml/dataprep/0/backgroundProcess_Telemetry.log,logs/azureml/dataprep/0/rslex.log,logs/azureml/dataprep/0/rslex.log.2022-05-18-09,system_logs/cs_capability/cs-capability.log,system_logs/hosttools_capability/hosttools-capability.log,system_logs/lifecycler/execution-wrapper.log,system_logs/lifecycler/lifecycler.log,user_logs/std_log.txt."
    }
}

In [43]:
model = best_run.register_model(model_name='hyperdrive', model_path='outputs/hyperdrive_model.joblib')

ModelPathNotFoundException: ModelPathNotFoundException:
	Message: Could not locate the provided model_path outputs/hyperdrive_model.joblib in the set of files uploaded to the run: ['logs/azureml/dataprep/0/backgroundProcess.log', 'logs/azureml/dataprep/0/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/0/rslex.log', 'logs/azureml/dataprep/0/rslex.log.2022-05-18-09', 'system_logs/cs_capability/cs-capability.log', 'system_logs/hosttools_capability/hosttools-capability.log', 'system_logs/lifecycler/execution-wrapper.log', 'system_logs/lifecycler/lifecycler.log', 'user_logs/std_log.txt']
                See https://aka.ms/run-logging for more details.
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Could not locate the provided model_path outputs/hyperdrive_model.joblib in the set of files uploaded to the run: ['logs/azureml/dataprep/0/backgroundProcess.log', 'logs/azureml/dataprep/0/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/0/rslex.log', 'logs/azureml/dataprep/0/rslex.log.2022-05-18-09', 'system_logs/cs_capability/cs-capability.log', 'system_logs/hosttools_capability/hosttools-capability.log', 'system_logs/lifecycler/execution-wrapper.log', 'system_logs/lifecycler/lifecycler.log', 'user_logs/std_log.txt']\n                See https://aka.ms/run-logging for more details."
    }
}