Copyright (C) Microsoft Corporation.

# Train with Mask RCNN

## Introduction

This notebook will train the model through Azure ML Compute.

In this notebook you will
* Examine the training pipeline configuration
* Create or connect to a GPU-enabled Azure ML Compute target.
* Examine the trainng and model export scripts.
* Create an Azure ML Estimator for training.
* Submit the experiment for training.
* Download and register the model.


## Examine the pipeline configuration.

In [None]:
 !cat ./trainingcode/stockout_pipeline.config

In [None]:
import os
import urllib
import shutil
import azureml

from azureml.core import Experiment
from azureml.core import Workspace, Run

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
ws = Workspace.from_config()

script_folder = './trainingcode'
os.makedirs(script_folder, exist_ok=True)

exp = Experiment(workspace=ws, name='voiddetection')

## Connect to Azure ML Compute

In [None]:
cluster_name = "gpuclusternc"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', 
                                                           max_nodes=1)

    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

## Create the training and model export scripts

In [None]:
%%writefile ./trainingcode/train.py

import argparse
import os
import sys
import subprocess

parser = argparse.ArgumentParser()
parser.add_argument('--pipeline-config-path', type=str, dest='pipeline_config', help='Inception pipeline config path')
parser.add_argument('--num-steps', type=int, dest='num_steps', help='number of training steps')
args=parser.parse_args()

def install(package):
    subprocess.call([sys.executable, "-m", "pip", "install", package])
    
install('/datastore/packages/object_detection-0.1.tar.gz')
install('/datastore/packages/slim-0.1.tar.gz')
install('/datastore/packages/pycocotools-2.0.tar.gz')

from tensorflow.python.platform import flags
from object_detection import model_main

class TrainFlagValues:
    pipeline_config_path=args.pipeline_config
    model_dir='./outputs'
    num_train_steps=args.num_steps
    sample_1_of_n_eval_examples=10
    sample_1_of_n_eval_on_train_examples=5
    alsologtostderr=True
    hparams_overrides=None
    checkpoint_dir=None
    run_once=False
    log_dir="./logs"
    
model_main.FLAGS = TrainFlagValues()

model_main.main(None)

subprocess.check_output([list({sys.executable})[0],'./export.py','--pipeline-config-path',args.pipeline_config,'--num-steps',str(args.num_steps)])


In [None]:
%%writefile ./trainingcode/export.py
from object_detection import export_inference_graph
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--pipeline-config-path', type=str, dest='pipeline_config', help='Inception pipeline config path')
parser.add_argument('--num-steps', type=int, dest='num_steps', help='number of training steps')
args=parser.parse_args()

class ExportFlagValues:
    pipeline_config_path=args.pipeline_config
    input_type="image_tensor"
    input_shape=None
    trained_checkpoint_prefix="./outputs/model.ckpt-"+str(args.num_steps)
    output_directory="./outputs/frozen_graph/"
    config_override=''
    write_inference_graph=False
    
export_inference_graph.FLAGS=ExportFlagValues()

export_inference_graph.main(None)

## Submit job for training

In [None]:
from azureml.train.estimator import Estimator
from azureml.train.dnn import TensorFlow

script_params = {
    "--pipeline-config-path":"./stockout_pipeline.config",
    "--num-steps":50000
}

est = TensorFlow(source_directory='trainingcode',
                compute_target=compute_target,
                script_params=script_params,
                entry_script='train.py',
                inputs=[ds.as_download("/datastore")],
                use_gpu=True)

In [None]:
run = exp.submit(est)
run.wait_for_completion(show_output=True)

2019-08-14 13:39:44.166052: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0
2019-08-14 13:39:44.166116: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-08-14 13:39:44.166125: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990]      0 
2019-08-14 13:39:44.166131: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0:   N 
2019-08-14 13:39:44.166308: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10805 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 91c9:00:00.0, compute capability: 3.7)
creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.29s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.483


# Download and Register Model

In [None]:
model = run.register_model(model_name='maskrcnn-void-detection', model_path='outputs/frozen_graph/frozen_inference_graph.pb')

In [None]:
# Create a model folder in the current directory
os.makedirs('./model', exist_ok=True)

for f in run.get_file_names():
    if f.startswith('outputs/frozen_graph/frozen'):
        output_file_path = os.path.join('./model', f.split('/')[-1])
        print('Downloading from {} to {} ...'.format(f, output_file_path))
        run.download_file(name=f, output_file_path=output_file_path)