In [12]:
import azureml
from azureml.core import Workspace


ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep='\n')

Workspace name: mlserviceworkspace
Azure region: westus2
Subscription id: 601f4351-33bb-4d76-96ca-886940409b3d
Resource group: mlserviceworkspace


In [18]:
from azureml.core import Experiment

script_folder = './mask_rcnn_horovod'
os.makedirs(script_folder, exist_ok=True)

exp = Experiment(workspace=ws, name='mask_rcnn_horovod_run')

In [None]:
from azureml.core import Datastore
blob_datastore_name='isic2018' # Name of the Datastore  to workspace
container_name=os.getenv("BLOB_CONTAINER", "isic2018") # Name of Azure blob container
account_name=os.getenv("BLOB_ACCOUNTNAME", "mlblobdatastore") # Storage account name
account_key=os.getenv("BLOB_ACCOUNT_KEY", "bPlInBOqf0kfPpSNYeemRKNiOfcWsMWAUfR3ieyTUpxBKn/FEkZG9RgHUQfVjNtI3ky32wZ+LrjCe/oVC9M2eg==") # Storage account key

blob_datastore = Datastore.register_azure_blob_container(workspace=ws, 
                                                         datastore_name=blob_datastore_name, 
                                                         container_name=container_name, 
                                                         account_name=account_name,
                                                         account_key=account_key)

In [14]:
from azureml.core import Datastore
#get named datastore from current workspace
datastore = Datastore.get(ws, datastore_name='isic2018')

In [None]:
from azureml.core.dataset import Dataset


datastore_paths = [
                  (datastore, 'ISIC2018_Task1-2_Training_Input'),(datastore, 'ISIC2018_Task1_Training_GroundTruth')
                 ]

isic_ds_training = Dataset.File.from_files(path=datastore_paths)

In [None]:
# create a new version of titanic_ds
isic_ds = isic_ds_training.register(workspace = ws,
                                 name = 'isic_ds',
                                 description = 'isic training data',
                                 create_new_version = True)

In [None]:
from azureml.core.dataset import Dataset

dataset = Dataset.get_by_name(ws, 'isic_ds')

# list the files referenced by mnist dataset
dataset.to_path()

In [15]:
script_folder = './mask_rcnn_horovod'
with open(os.path.join(script_folder, './lesions.py'), 'r') as f:
    print(f.read())

# USAGE
# python lesions.py --mode train
# python lesions.py --mode investigate
# python lesions.py --mode predict \
# 	--image isic2018/ISIC2018_Task1-2_Training_Input/ISIC_0000000.jpg

# import the necessary packages
from imgaug import augmenters as iaa
from mrcnn.config import Config
from mrcnn import model as modellib
from mrcnn import utils
from imutils import paths
import numpy as np
import argparse
import imutils
import random
import cv2
import os
from mrcnn import visualize
import glob
from azureml.core import Run



# dataset object from the run

run = Run.get_context()


# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
ap.add_argument("-w", "--weights",
    help="optional path to pretrained weights")
ap.add_argument("-m", "--mode",
    help="train or investigate")
args = vars(ap.parse_args())

data_folder = args["data_folder"]
print('Data fol

In [16]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = "gpucluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it uses the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Found existing compute target
{'currentNodeCount': 4, 'targetNodeCount': 4, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 1, 'idleNodeCount': 3, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2019-09-09T06:14:05.192000+00:00', 'errors': None, 'creationTime': '2019-09-09T04:29:17.187921+00:00', 'modifiedTime': '2019-09-09T06:07:38.700134+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 4, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_NC6'}


In [19]:
from azureml.train.dnn import TensorFlow, Mpi

script_params = {
    '--data-folder': datastore.as_mount(),
    '--mode': 'train'
}


est = TensorFlow(source_directory=script_folder,
                 script_params=script_params,
                 compute_target=compute_target, 
                 entry_script='lesions.py',
                 node_count=3,
                 distributed_training=Mpi(),
                 framework_version='1.13',
                 use_gpu = True,
                 pip_packages=['IPython[all]','scikit-image','cython','Pillow','numpy','scipy','azureml-sdk','keras','matplotlib','azureml-dataprep[pandas,fuse]','imgaug','imutils','opencv-python','h5py'])

In [None]:
run.cancel()

In [None]:
run = exp.submit(est)

In [None]:
from azureml.widgets import RunDetails
RunDetails(run).show()

In [None]:
from azureml.tensorboard import Tensorboard

tb = Tensorboard([run])

# If successful, start() returns a string with the URI of the instance.
tb.start()
