In [128]:
from azureml.core import Workspace, Datastore, Experiment, Run
from azureml.data.data_reference import DataReference
# from azureml.widgets import RunDetails
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.dnn import PyTorch

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

 

aml-nvs-experimentation-001
NVS_Training
westeurope
c012b3d9-9801-4d9f-8f5c-8e688cf2e70b


In [129]:
# Choose a name for your CPU cluster
gpu_cluster_name = "nvscomptar001"
min_node = 0
max_node = 10
vm_size = 'STANDARD_NC12'


# Verify that cluster does not exist already
try:
    gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException as e:
    compute_config = AmlCompute.provisioning_configuration(
        vm_size = vm_size,
        min_nodes = min_node,
        max_nodes = max_node
        )
    gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)


gpu_cluster.wait_for_completion(show_output=True)



Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [130]:
ds = ws.get_default_datastore()
try:
    blob_datastore = Datastore.register_azure_blob_container(workspace=ws, 
                                                         datastore_name=ds.name+'_nvsexperiment', 
                                                         container_name='nvsexperiment', 
                                                         account_name=ds.account_name,
                                                         account_key=ds.account_key
    )
except Exception:
    blob_datastore = Datastore.get(ws, datastore_name=ds.name+'_nvsexperiment') 

In [131]:
script_params = {
    '--container_dir': blob_datastore.as_download(),
    '--data_dir': '/datasets/cell_images/',
    '--output_dir': './outputs',
    '--num_epochs': 5,
}

In [132]:
exp = Experiment(workspace=ws, name='exp-malaria-dection')

In [133]:
estimator = PyTorch(source_directory='../src', 
                    script_params=script_params,
                    compute_target=gpu_cluster,
                    entry_script='train.py',
                    use_gpu=True,
                    pip_packages=['pillow==5.4.1']
                    )

framework_version is not specified, defaulting to version 1.4.


In [134]:
run = exp.submit(estimator)

In [135]:
run.wait_for_completion()['error']['error']['message']

KeyError: 'error'

In [136]:
run.register_model(
    model_path = './outputs/model.pt',
    model_name = "pytorch-malaria-cell-class"
    )

Model(workspace=Workspace.create(name='aml-nvs-experimentation-001', subscription_id='c012b3d9-9801-4d9f-8f5c-8e688cf2e70b', resource_group='NVS_Training'), name=pytorch-malaria-cell-class, id=pytorch-malaria-cell-class:3, version=3, tags={}, properties={})