# Deploy an image classification model to Azure

In [None]:
%matplotlib notebook
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K 
import azureml
from azureml.core import Workspace, Run

# display the core SDK version number
print("TensorFlow Version: ", tf.__version__)
print("Azure ML SDK Version: ", azureml.core.VERSION)

## Initialize Workspace
Initialize a workspace object from persisted configuration.

In [None]:
ws = Workspace.from_config()
print("Resource group: ", ws.resource_group)
print("Location: ", ws.location)
print("Workspace name: ", ws.name)

## Create a project directory
Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on.

In [None]:
import os

project_folder = '../projects/deploy_fashion_mnist'
os.makedirs(project_folder, exist_ok=True)

## Register a model

In [None]:
from azureml.core.model import Model

model_name = "fashion-mnist-model"
model_path = "./resources/models/keras_fashion_mnist/full_model.h5"
    
models = Model.list(workspace=ws, name=model_name, latest=True)
if not models:
    print("Registering new Fashion MNIST model...")
    model = Model.register(
        model_path=model_path,
        model_name=model_name,
        tags={"data": "fashion-mnist", "model": "classification"},
        description="Fashion MNIST image recognition",
        workspace=ws
    )
else:
    print("Using latest Fashion MNIST model...")
    model=models[0]

print("Model name: ", model.name)
print("Model id: ", model.id)
print("Model version: ", model.version)

## Download Fashion MNIST Dataset

In [None]:
import os
import urllib.request

data_folder = '../data/fashion_mnist'
os.makedirs(data_folder, exist_ok = True)
urllib.request.urlretrieve('https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/train-images-idx3-ubyte.gz', filename=os.path.join(data_folder, "train-images.gz"))
urllib.request.urlretrieve('https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/train-labels-idx1-ubyte.gz', filename=os.path.join(data_folder, "train-labels.gz"))
urllib.request.urlretrieve('https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/t10k-images-idx3-ubyte.gz', filename=os.path.join(data_folder, "test-images.gz"))
urllib.request.urlretrieve('https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/t10k-labels-idx1-ubyte.gz', filename=os.path.join(data_folder, "test-labels.gz"))

In [None]:
import gzip
import struct

def load_data(filename, label=False):
    with gzip.open(filename) as gz:
        struct.unpack('I', gz.read(4))
        n_items = struct.unpack('>I', gz.read(4))
        if not label:
            n_rows = struct.unpack('>I', gz.read(4))[0]
            n_cols = struct.unpack('>I', gz.read(4))[0]
            res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
            res = res.reshape(n_items[0], n_rows * n_cols)
        else:
            res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
            res = res.reshape(n_items[0], 1)
    return res

In [None]:
# input image dimensions
img_rows, img_cols = 28, 28

# the data, shuffled and split between train and test sets
x_train = load_data(os.path.join(data_folder, 'train-images.gz'), False)
x_test = load_data(os.path.join(data_folder, 'test-images.gz'), False)
y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True)
y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

#   Deal with format issues between different backends.  Some put the # of channels in the image before the width and height of image.
if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

#   Type convert and scale the test and training data
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape (after reshape):', x_train.shape)
print('x_test shape (after reshape):', x_test.shape)

## Test model locally

### Predict test data

Feed the test dataset to the model to get predictions.

In [None]:
keras_model = keras.models.load_model(model_path)
y_pred = keras_model.predict(x_test)
y_hat = np.argmax(y_pred, axis=1)

###  Examine the confusion matrix

Generate a confusion matrix to see how many samples from the test set are classified correctly. Notice the mis-classified value for the incorrect predictions.

In [None]:
from sklearn.metrics import confusion_matrix

conf_mx = confusion_matrix(y_test, y_hat)
print(conf_mx)

Use `matplotlib` to display the confusion matrix as a graph. In this graph, the X axis represents the actual values, and the Y axis represents the predicted values. The color in each grid represents the error rate. The lighter the color, the higher the error rate is. For example, many 5's are mis-classified as 3's. Hence you see a bright grid at (5,3).

In [None]:
# normalize the diagnal cells so that they don't overpower the rest of the cells when visualized
row_sums = conf_mx.sum(axis=1, keepdims=True)
norm_conf_mx = conf_mx / row_sums
np.fill_diagonal(norm_conf_mx, 0)

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(111)
cax = ax.matshow(norm_conf_mx, cmap=plt.cm.bone)
ticks = np.arange(0, 10, 1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(ticks)
ax.set_yticklabels(ticks)
fig.colorbar(cax)
plt.ylabel('true labels', fontsize=14)
plt.xlabel('predicted values', fontsize=14)
#plt.savefig(os.path.join(project_folder, 'conf.png'))
plt.show()

## Provision the AKS Cluster

In the meantime, let's spin up an Azure Kubernetes Services cluster...

This is a one time setup. You can reuse this cluster for multiple deployments after it has been created. If you delete the cluster or the resource group that contains it, then you would have to recreate it.

In [None]:
from azureml.core.compute import AksCompute, ComputeTarget
from azureml.core.compute_target import ComputeTargetException

aks_name = 'myaks'

try:
    aks_target = AksCompute(workspace=ws, name=aks_name)
    print('found existing:', aks_target.name)
except ComputeTargetException:
    print('creating new.')

    # AKS configuration
    prov_config = AksCompute.provisioning_configuration(
        agent_count=3,
        vm_size="Standard_B4ms"
    )
    
    # Create the cluster
    aks_target = ComputeTarget.create(
        workspace = ws, 
        name = aks_name, 
        provisioning_configuration = prov_config
    )

## Deploy as web service

Once you've tested the model and are satisfied with the results, deploy the model as a web service hosted in ACI. 

To build the correct environment for ACI, provide the following:
* A scoring script to show how to use the model
* An environment file to show what packages need to be installed
* A configuration file to build the ACI
* The model you trained before

### Create scoring script

Create the scoring script, called score.py, used by the web service call to show how to use the model.

You must include two required functions into the scoring script:
* The `init()` function, which typically loads the model into a global object. This function is run only once when the Docker container is started. 

* The `run(input_data)` function uses the model to predict a value based on the input data. Inputs and outputs to the run typically use JSON for serialization and de-serialization, but other formats are supported.


In [None]:
%%writefile $project_folder/score.py
import json
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras

from azureml.core.model import Model

def init():
    global model
    # retreive the path to the model file using the model name
    model_path = Model.get_model_path('fashion-mnist-model')
    model = keras.models.load_model(model_path)

def run(raw_data):
    data = np.array(json.loads(raw_data)['data'])
    # make prediction
    y_pred = model.predict(data)
    y_hat = np.argmax(y_pred, axis=1)
    # you can return any data type as long as it is JSON-serializable
    return y_hat.tolist()

### Configure inference environment

Next, register and build an environment, called mysklearnenv, that specifies all of the script's package dependencies. This environment is used to ensure that all of those dependencies are installed in the Docker image. This model needs `scikit-learn` and `azureml-sdk`.

In [None]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

myenv = Environment(name="mytfinferenceenv")
# Adds dependencies to PythonSection of myenv
conda_dep = CondaDependencies()
conda_dep.add_pip_package("tensorflow-gpu==2.4.1")
myenv.python.conda_dependencies=conda_dep

myenv.register(workspace=ws)

Review the content of the conda_dep variable.

In [None]:
print(conda_dep.serialize_to_string())

In [None]:
from azureml.core.model import InferenceConfig
inference_config = InferenceConfig(
    entry_script=os.path.join(project_folder, 'score.py'), 
    environment=myenv
)

### Create ACI configuration

Create a deployment configuration file and specify the number of CPUs and gigabyte of RAM needed for your ACI container. While it depends on your model, the default of 1 core and 1 gigabyte of RAM is usually sufficient for many models. If you feel you need more later, you would have to recreate the image and redeploy the service.

In [None]:
from azureml.core.webservice import AciWebservice

aci_config = AciWebservice.deploy_configuration(
    cpu_cores=1, 
    memory_gb=1, 
    tags={"data": "MNIST",  "method" : "sklearn"}, 
    description='Predict MNIST with sklearn'
)

### Deploy in ACI
Estimated time to complete: **about 5-10 minutes**

The following code goes through these steps:

1. Create an Docker Image for the Inference Container
1. Start up a container in ACI using the image.
1. Get the web service HTTP endpoint.

In [None]:
%%time
aci_service_name = 'fashion-mnist-aci-svc'
aci_service = Model.deploy(workspace=ws,
                           name=aci_service_name,
                           models=[model],
                           inference_config=inference_config,
                           deployment_config=aci_config,
                           overwrite=True)
aci_service.wait_for_deployment(show_output=True)

Get the scoring web service's HTTP endpoint, which accepts REST client calls. This endpoint can be shared with anyone who wants to test the web service or integrate it into an application.

In [None]:
print(aci_service.scoring_uri)

## Test deployed service

Earlier you scored all the test data with the local version of the model. Now, you can test the deployed model with a random sample of 30 images from the test data.  

The following code goes through these steps:
1. Send the data as a JSON array to the web service hosted in ACI. 

1. Use the SDK's `run` API to invoke the service. You can also make raw calls using any HTTP tool such as curl.

1. Print the returned predictions and plot them along with the input images. Red font and inverse image (white on black) is used to highlight the misclassified samples. 

 Since the model accuracy is high, you might have to run the following code a few times before you can see a misclassified sample.

In [None]:
import json

# find 30 random samples from test set
n = 30
sample_indices = np.random.permutation(x_test.shape[0])[0:n]

test_samples = json.dumps({"data": x_test[sample_indices].tolist()})
test_samples = bytes(test_samples, encoding='utf8')

# predict using the deployed model
result = aci_service.run(input_data=test_samples)

# compare actual value vs. the predicted values:
i = 0
plt.figure(figsize = (20, 1))

for s in sample_indices:
    plt.subplot(1, n, i + 1)
    plt.axhline('')
    plt.axvline('')
    
    # use different color for misclassified sample
    font_color = 'red' if y_test[s] != result[i] else 'black'
    clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys
    
    plt.text(x=10, y =-10, s=result[i], fontsize=18, color=font_color)
    plt.imshow(x_test[s].reshape(28, 28), cmap=clr_map)
    
    i = i + 1
plt.show()

You can also send raw HTTP request to test the web service.

In [None]:
import requests
import json

# send a random row from the test set to score
n = 30
sample_indices = np.random.permutation(x_test.shape[0])[0:n]
input_data = json.dumps({"data": x_test[sample_indices].tolist()})
input_data = bytes(input_data, encoding='utf8')

headers = {'Content-Type':'application/json'}

resp = requests.post(aci_service.scoring_uri, input_data, headers=headers)

print("POST to url", aci_service.scoring_uri)
#print("input data:", input_data)
labels = []
for i in y_test[sample_indices]: 
    labels.append(i[0])
print("label:", labels)
print("prediction:", resp.text)

## Deploy to Azure Kubernetes Services
### Check AKS Cluster state

In [None]:
import time

status = aks_target.get_status()
while status != 'Succeeded' and status != 'Failed':
    print('current status: {} - waiting...'.format(status))
    time.sleep(10)
    status = aks_target.get_status()

In [None]:
%%time
from azureml.core.webservice import Webservice, AksWebservice

aks_service_name = 'fashion-mnist-aks-svc'

#Set the web service configuration (using default here)
aks_config = AksWebservice.deploy_configuration(
    cpu_cores=1, 
    memory_gb=1, 
    tags={"data": "MNIST",  "method" : "sklearn"}, 
    description='Predict MNIST with sklearn'
)

aks_service = Model.deploy(workspace=ws,
                           name=aks_service_name,
                           models=[model],
                           inference_config=inference_config,
                           deployment_config=aks_config,
                           deployment_target=aks_target,
                           overwrite=True)
aks_service.wait_for_deployment(show_output = True)
print(aks_service.state)

In [None]:
import requests
import json

# send a random row from the test set to score
n = 30
sample_indices = np.random.permutation(x_test.shape[0])[0:n]
input_data = json.dumps({"data": x_test[sample_indices].tolist()})
input_data = bytes(input_data, encoding='utf8')

# for AKS deployment you'd need to the service key in the header as well
api_keys = aks_service.get_keys()
headers = {'Content-Type':'application/json',  'Authorization':('Bearer '+ api_keys[0])} 

resp = requests.post(aci_service.scoring_uri, input_data, headers=headers)

print("POST to url", aci_service.scoring_uri)
#print(headers)
#print("input data:", input_data)
labels = []
for i in y_test[sample_indices]: 
    labels.append(i[0])
print("label:", labels)
print("prediction:", resp.text)

## Clean up resources

To keep the resource group and workspace for other tutorials and exploration, you can delete only the ACI deployment using this API call:

In [None]:
aci_service.delete()
aks_service.delete()