## Validate Azure ML SDK installation and get version number for debugging purposes

In [None]:
# Check core SDK version number
import azureml.core
print("SDK version:", azureml.core.VERSION)

## Diagnostics
Opt-in diagnostics for better experience, quality, and security of future releases.

In [None]:
from azureml.telemetry import set_diagnostics_collection
set_diagnostics_collection(send_diagnostics = True)

## Initialize Workspace
Initialize a workspace object from persisted configuration.

In [None]:
# Initialize Workspace
from azureml.core import Workspace

ws = Workspace.from_config()
print("Resource group: ", ws.resource_group)
print("Location: ", ws.location)
print("Workspace name: ", ws.name)

## Create An Experiment
**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments.

In [None]:
from azureml.core import Experiment
experiment_name = 'image-retraining'
experiment = Experiment(workspace = ws, name = experiment_name)

## Provision the AKS Cluster

We need this cluster later in this exercise to deploy our service.

This is a one time setup. You can reuse this cluster for multiple deployments after it has been created. If you delete the cluster or the resource group that contains it, then you would have to recreate it.

In [None]:
from azureml.core.compute import AksCompute, ComputeTarget
from azureml.core.compute_target import ComputeTargetException

aks_name = 'myaks'

try:
    aks_target = AksCompute(workspace=ws, name=aks_name)
    print('found existing:', aks_target.name)
except ComputeTargetException:
    print('creating new.')

    # AKS configuration
    prov_config = AksCompute.provisioning_configuration(
        agent_count=3,
        vm_size="Standard_B4ms"
    )
    
    # Create the cluster
    aks_target = ComputeTarget.create(
        workspace = ws, 
        name = aks_name, 
        provisioning_configuration = prov_config
    )

## Create Azure ML Compute cluster (GPU-enabled) as a compute target

In [None]:
from azureml.core.compute import AmlCompute
from azureml.core.compute_target import ComputeTargetException

compute_target_name = 'myamlcompute'

try:
    aml_compute = AmlCompute(workspace=ws, name=compute_target_name)
    print('found existing:', aml_compute.name)
except ComputeTargetException:
    print('creating new.')
    aml_config = AmlCompute.provisioning_configuration(
        vm_size="Standard_NC6",
        vm_priority="dedicated",
        min_nodes = 0,
        max_nodes = 4,
        idle_seconds_before_scaledown=300
    )
    aml_compute = AmlCompute.create(
        ws, 
        name=compute_target_name, 
        provisioning_configuration=aml_config
    )
    aml_compute.wait_for_completion(show_output=True)

## Upload data files into datastore
Every workspace comes with a default datastore (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and access it from the compute target.

In [None]:
# get the default datastore
ds = ws.get_default_datastore()
print("Datastore name: ", ds.name)
print("Datastore type: ", ds.datastore_type)
print("Account name: ", ds.account_name)
print("Container name: ", ds.container_name)

Download and unpack flower images

In [None]:
import os
import shutil
import urllib.request

tmp_path = '../tmp/image_retraining'
os.makedirs(tmp_path, exist_ok=True)

print('Downloading flower photos...')
urllib.request.urlretrieve("http://download.tensorflow.org/example_images/flower_photos.tgz", tmp_path + "/flower_photos.tgz")
print('Unpacking archive...')
shutil.unpack_archive(tmp_path + '/flower_photos.tgz', tmp_path)
print('Done')

Upload files to the datastore

In [None]:
images_path = tmp_path + '/flower_photos/'
for (dirpath, dirnames, filenames) in os.walk(images_path):
    print('Uploading', dirpath, '...')
    ds.upload_files(
        [dirpath + '/' + f for f in filenames], 
        target_path=dirpath.replace(tmp_path + '/', ''), 
        overwrite=True
    )
print('Done')

## Create a project directory
Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on.

In [None]:
import os
import shutil

project_folder = '../projects/image_retraining'
os.makedirs(project_folder, exist_ok=True)

shutil.copy('./scripts/retrain.py', project_folder)

## Create a TensorFlow estimator
The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-train-tensorflow).

In [None]:
from azureml.train.dnn import TensorFlow
from azureml.core.runconfig import DataReferenceConfiguration

script_params={
    '--image_dir': str(ds.as_download()),
    '--summaries_dir': './logs',
    '--output_graph': './outputs/output_graph.pb',
    '--output_labels': './outputs/output_labels.txt',
    '--saved_model_dir': './outputs/model'
}

estimator = TensorFlow(source_directory=project_folder,
                       source_directory_data_store=ds,
                       compute_target=aml_compute,
                       script_params=script_params,
                       entry_script='retrain.py',
                       pip_packages=['tensorflow_hub'],
                       node_count=1,
                       use_gpu=True)

# Overwrite data store reference
dr = DataReferenceConfiguration(
    datastore_name=ds.name, 
    path_on_datastore='flower_photos', 
    mode='download', # download files from datastore to compute target
    overwrite=True
)
estimator.run_config.data_references[ds.name] = dr

## Submit job
Run your experiment by submitting your estimator object. Note that this call is asynchronous.

In [None]:
run = experiment.submit(estimator)
print(run.get_details())

In [None]:
run.wait_for_completion(show_output=True)

## Download results

In [None]:
import time

status = run.get_status()
seconds = 10
while status != 'Completed' and status != 'Failed':
    print('current status: {} - waiting...'.format(status))
    time.sleep(seconds)
    if seconds < 60:
        seconds = seconds + 10
    status = run.get_status()

In [None]:
import os

outputs_path = '../outputs/image_retraining'
os.makedirs(outputs_path, exist_ok=True)

In [None]:
for filename in run.get_file_names():
    if filename.startswith('outputs'):
        print("downloading", filename, '...')
        run.download_file(
            filename, 
            output_file_path=outputs_path + filename.replace('outputs/','/')
        )
print('completed')

## Test model locally

In [None]:
import tensorflow as tf
import numpy as np

print("TensorFlow Version: ", tf.__version__)

In [None]:
model_file = os.path.join(outputs_path, "output_graph.pb")
label_file = os.path.join(outputs_path, "output_labels.txt")

In [None]:
def load_graph(model_file):
    graph = tf.Graph()
    graph_def = tf.GraphDef()

    with open(model_file, "rb") as f:
        graph_def.ParseFromString(f.read())
    with graph.as_default():
        tf.import_graph_def(graph_def)

    return graph

def read_tensor_from_image_file(file_name,
                                input_height=299,
                                input_width=299,
                                input_mean=0,
                                input_std=255):
    input_name = "file_reader"
    output_name = "normalized"
    file_reader = tf.read_file(file_name, input_name)
    if file_name.endswith(".png"):
        image_reader = tf.image.decode_png(file_reader, channels=3, name="png_reader")
    elif file_name.endswith(".gif"):
        image_reader = tf.squeeze(tf.image.decode_gif(file_reader, name="gif_reader"))
    elif file_name.endswith(".bmp"):
        image_reader = tf.image.decode_bmp(file_reader, name="bmp_reader")
    else:
        image_reader = tf.image.decode_jpeg(file_reader, channels=3, name="jpeg_reader")
    float_caster = tf.cast(image_reader, tf.float32)
    dims_expander = tf.expand_dims(float_caster, 0)
    resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
    normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
    with tf.Session() as sess:
        result = sess.run(normalized)
        return result

def load_labels(label_file):
    label = []
    proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()
    for l in proto_as_ascii_lines:
        label.append(l.rstrip())
    return label

### Load Model

In [None]:
graph = load_graph(model_file)

input_height = 299
input_width = 299
input_mean = 0
input_std = 255

input_layer = "Placeholder"
output_layer = "final_result"

In [None]:
def predict_flower(data):
    input_name = "import/" + input_layer
    output_name = "import/" + output_layer
    input_operation = graph.get_operation_by_name(input_name)
    output_operation = graph.get_operation_by_name(output_name)

    with tf.Session(graph=graph) as sess:
        results = sess.run(output_operation.outputs[0], {
            input_operation.outputs[0]: data
        })
    results = np.squeeze(results)

    top_k = results.argsort()[-5:][::-1]
    labels = load_labels(label_file)
    for i in top_k:
        print(labels[i], results[i])

### Predict test data

Feed the test dataset to the model to get predictions.

In [None]:
file_name = "./resources/test-images/Daisy1.jpg"

t = read_tensor_from_image_file(
    file_name,
    input_height=input_height,
    input_width=input_width,
    input_mean=input_mean,
    input_std=input_std
)

predict_flower(t)

In [None]:
file_name = "./resources/test-images/Rose1.jpg"

t = read_tensor_from_image_file(
    file_name,
    input_height=input_height,
    input_width=input_width,
    input_mean=input_mean,
    input_std=input_std
)

predict_flower(t)

## Deploy a model in Azure Kubernetes Services (AKS)
### Register a model

In [None]:
from azureml.core.model import Model

model_graph_name = "flower_photos_graph"
model_labels_name = "flower_photos_labels"

model_graph = Model.register(
    model_path=model_file,
    model_name=model_graph_name,
    tags={"data": "flower_photos", "model": "classification"},
    description="Retrained Inception V3 model with flower photos",
    workspace=ws
)

model_labels = Model.register(
    model_path=label_file,
    model_name=model_labels_name,
    tags={"data": "flower_photos", "model": "classification"},
    description="Output labels of the retrained Inception V3 model with flower photos",
    workspace=ws
)

## Deploy as web service

Once you've tested the model and are satisfied with the results, deploy the model as a web service hosted in ACI. 

To build the correct environment for ACI, provide the following:
* A scoring script to show how to use the model
* An environment file to show what packages need to be installed
* A configuration file to build the ACI
* The model you trained before

### Check AKS Cluster state

In [None]:
import time

status = aks_target.get_status()
while status != 'Succeeded' and status != 'Failed':
    print('current status: {} - waiting...'.format(status))
    time.sleep(10)
    status = aks_target.get_status()

### Create scoring script

Create the scoring script, called score.py, used by the web service call to show how to use the model.

You must include two required functions into the scoring script:
* The `init()` function, which typically loads the model into a global object. This function is run only once when the Docker container is started. 

* The `run(input_data)` function uses the model to predict a value based on the input data. Inputs and outputs to the run typically use JSON for serialization and de-serialization, but other formats are supported.

In [None]:
%%writefile score_flowers.py
import json
import os
import traceback
import numpy as np
import tensorflow as tf
import time

from azureml.core.model import Model

def load_graph(graph_path):
    global graph
    global input_operation
    global output_operation

    print("loading graph from", graph_path, time.strftime("%H:%M:%S"))
    graph = tf.Graph()
    graph_def = tf.GraphDef()

    with open(graph_path, "rb") as f:
        graph_def.ParseFromString(f.read())

    with graph.as_default():
        tf.import_graph_def(graph_def)

    input_operation = graph.get_operation_by_name('import/Placeholder')
    output_operation = graph.get_operation_by_name('import/final_result')
    print("graph loaded successfully.", time.strftime("%H:%M:%S"))

def load_labels(label_path):
    global labels
    
    print("loading labels from", label_path, time.strftime("%H:%M:%S"))
    labels = []
    proto_as_ascii_lines = tf.gfile.GFile(label_path).readlines()
    for l in proto_as_ascii_lines:
        labels.append(l.rstrip())
    print("labels loaded successfully.", time.strftime("%H:%M:%S"))

def init():
    try:
        print ("model initializing" + time.strftime("%H:%M:%S"))
        # retreive the path to the model file using the model name
        graph_path = Model.get_model_path('flower_photos_graph')
        load_graph(graph_path)

        labels_path = Model.get_model_path('flower_photos_labels')
        load_labels(labels_path)
        print ("model initialized" + time.strftime("%H:%M:%S"))
    except Exception as e:
        error = str(e)
        stacktrace = traceback.format_exc()
        print (error + time.strftime("%H:%M:%S"))
        print (stacktrace)
        raise

def run(raw_data):
    try:
        data = json.loads(raw_data)
        data = np.array(data)
        print ("image array: " + str(data)[:50])
        
        # make prediction
        with tf.Session(graph=graph) as sess:
            results = sess.run(output_operation.outputs[0], {
                input_operation.outputs[0]: data
            })
        results = np.squeeze(results)

        top_k = results.argsort()[-5:][::-1]

        result = []
        for i in top_k:
            result.append([labels[i], results[i]])
        print ("result: " + str(result))
        
        # you can return any data type as long as it is JSON-serializable
        return str(result)
    except Exception as e:
        error = str(e)
        stacktrace = traceback.format_exc()
        print (error + time.strftime("%H:%M:%S"))
        print (stacktrace)
        return stacktrace

### Create environment file

Next, create an environment file, called myenv.yml, that specifies all of the script's package dependencies. This file is used to ensure that all of those dependencies are installed in the Docker image. This model needs `tensorflow` and `azureml-sdk`.

In [None]:
from azureml.core.conda_dependencies import CondaDependencies 

myenv = CondaDependencies()
myenv.add_tensorflow_conda_package(core_type='cpu')
myenv.add_conda_package("numpy")
#myenv.add_pip_package("azureml-monitoring")

with open(os.path.join(project_folder, "myenv.yml"),"w") as f:
    f.write(myenv.serialize_to_string())

Review the content of the `myenv.yml` file.

In [None]:
with open(os.path.join(project_folder, "myenv.yml"),"r") as f:
    print(f.read())

### Create image configuration

Define the image configuration using:
* The scoring file (`score_flowers.py`)
* The environment file (`myenv.yml`)

In [None]:
from azureml.core.image import ContainerImage

# configure the image
image_config = ContainerImage.image_configuration(
    execution_script="score_flowers.py", 
    runtime="python", 
    conda_file=os.path.join(project_folder, "myenv.yml")
)

### Create configuration file

Create a deployment configuration file and specify the number of CPUs and gigabyte of RAM needed for your ACI container. While it depends on your model, the default of 1 core and 1 gigabyte of RAM is usually sufficient for many models. If you feel you need more later, you would have to recreate the image and redeploy the service.

In [None]:
from azureml.core.webservice import AksWebservice

aks_config = AksWebservice.deploy_configuration(
    cpu_cores=1, 
    memory_gb=1, 
    #collect_model_data=True,
    enable_app_insights=True, 
    tags={"data": "flower_photos",  "method" : "TensorFlow"}, 
    description='Predict flowers with TensorFlow'
)

### Creating and Deploying the image in AKS
Estimated time to complete: **about 5-8 minutes**

The following code goes through these steps:

1. Create the image and store it in the workspace. 
1. Send the image to the AKS cluster.
1. Start up a container in AKS using the image.
1. Get the web service HTTP endpoint.

In [None]:
%%time
from azureml.core.webservice import Webservice

service = Webservice.deploy_from_model(
    workspace=ws,
    name='flower-photos-svc',
    deployment_config=aks_config,
    deployment_target=aks_target,
    models=[model_graph, model_labels],
    image_config=image_config
)

service.wait_for_deployment(show_output=True)
print(service.state)

Get the scoring web service's HTTP endpoint, which accepts REST client calls. This endpoint can be shared with anyone who wants to test the web service or integrate it into an application.

In [None]:
print(service.scoring_uri)

## Test deployed service

Earlier you scored all the test data with the local version of the model. Now, you can test the deployed model with a random sample of 30 images from the test data.  

The following code goes through these steps:
1. Send the data as a JSON array to the web service hosted in ACI. 

1. Use the SDK's `run` API to invoke the service. You can also make raw calls using any HTTP tool such as curl.

1. Print the returned predictions and plot them along with the input images. Red font and inverse image (white on black) is used to highlight the misclassified samples. 

 Since the model accuracy is high, you might have to run the following code a few times before you can see a misclassified sample.

In [None]:
import os
import json

file_name = "./resources/test-images/Daisy1.jpg"
#file_name = "./test.png"



for dirpath, dnames, fnames in os.walk("./resources/test-images/"):
    for f in fnames:
        file_name = os.path.join(dirpath, f)
        
        # load image
        print("Loading image", file_name)

        data = read_tensor_from_image_file(
            file_name,
            input_height=input_height,
            input_width=input_width,
            input_mean=input_mean,
            input_std=input_std
        )
        raw_data = str(data.tolist())

        # predict using the deployed model
        print("Sending image", f, "to service")
        response = service.run(input_data=raw_data)
        print("Service response:", response)
        #result = json.loads(response)
        #print("Predicted class:", result[0][0])
        #print("Probability:", result[0][1])
        print()

You can also send raw HTTP request to test the web service.

In [None]:
import requests
import json

api_keys = service.get_keys()
headers = {
    'Content-Type':'application/json',
    'Authorization':('Bearer '+ api_keys[0])
}

file_name = "./resources/test-images/Daisy1.jpg"

data = read_tensor_from_image_file(
    file_name,
    input_height=input_height,
    input_width=input_width,
    input_mean=input_mean,
    input_std=input_std
)
input_data = str(data.tolist())

print("POST to url", service.scoring_uri)
resp = requests.post(service.scoring_uri, input_data, headers=headers)
print("prediction:", resp.text)

## Clean up resources

To keep the resource group and workspace for other tutorials and exploration, you can delete only the ACI deployment using this API call:

In [None]:
service.delete()
if os.path.exists('score_flowers.py'):
    os.remove('score_flowers.py')

## Start TensorBoard

In [None]:
from azureml.contrib.tensorboard import Tensorboard

# The Tensorboard constructor takes an array of runs, so be sure and pass it in as a single-element array here
tb = Tensorboard([run])

# If successful, start() returns a string with the URI of the instance.
tb.start()

## Stop TensorBoard
When you're done, make sure to call the stop() method of the Tensorboard object, or it will stay running even after your job completes.

In [None]:
tb.stop()